Welcome, guest | Sign In | My Account | Store | Cart
def _should_include_path(path, includes, excludes):
   
"""Return True iff the given path should be included."""
   
from os.path import basename
   
from fnmatch import fnmatch

   
base = basename(path)
   
if includes:
       
for include in includes:
           
if fnmatch(base, include):
               
try:
                    log
.debug("include `%s' (matches `%s')", path, include)
               
except (NameError, AttributeError):
                   
pass
               
break
       
else:
           
try:
                log
.debug("exclude `%s' (matches no includes)", path)
           
except (NameError, AttributeError):
               
pass
           
return False
   
for exclude in excludes:
       
if fnmatch(base, exclude):
           
try:
                log
.debug("exclude `%s' (matches `%s')", path, exclude)
           
except (NameError, AttributeError):
               
pass
           
return False
   
return True

def _walk(top, topdown=True, onerror=None, follow_symlinks=False):
   
"""A version of `os.walk()` with a couple differences regarding symlinks.
   
    1. follow_symlinks=False (the default): A symlink to a dir is
       returned as a *non*-dir. In `os.walk()`, a symlink to a dir is
       returned in the *dirs* list, but it is not recursed into.
    2. follow_symlinks=True: A symlink to a dir is returned in the
       *dirs* list (as with `os.walk()`) but it *is conditionally*
       recursed into (unlike `os.walk()`).
       
       A symlinked dir is only recursed into if it is to a deeper dir
       within the same tree. This is my understanding of how `find -L
       DIR` works.

    TODO: put as a separate recipe
    """

   
import os
   
from os.path import join, isdir, islink, abspath

   
# We may not have read permission for top, in which case we can't
   
# get a list of the files the directory contains.  os.path.walk
   
# always suppressed the exception then, rather than blow up for a
   
# minor reason when (say) a thousand readable directories are still
   
# left to visit.  That logic is copied here.
   
try:
        names
= os.listdir(top)
   
except OSError, err:
       
if onerror is not None:
            onerror
(err)
       
return

    dirs
, nondirs = [], []
   
if follow_symlinks:
       
for name in names:
           
if isdir(join(top, name)):
                dirs
.append(name)
           
else:
                nondirs
.append(name)
   
else:
       
for name in names:
            path
= join(top, name)
           
if islink(path):
                nondirs
.append(name)
           
elif isdir(path):
                dirs
.append(name)
           
else:
                nondirs
.append(name)

   
if topdown:
       
yield top, dirs, nondirs
   
for name in dirs:
        path
= join(top, name)
       
if follow_symlinks and islink(path):
           
# Only walk this path if it links deeper in the same tree.
            top_abs
= abspath(top)
            link_abs
= abspath(join(top, os.readlink(path)))
           
if not link_abs.startswith(top_abs + os.sep):
               
continue
       
for x in _walk(path, topdown, onerror, follow_symlinks=follow_symlinks):
           
yield x
   
if not topdown:
       
yield top, dirs, nondirs

_NOT_SPECIFIED
= ("NOT", "SPECIFIED")
def _paths_from_path_patterns(path_patterns, files=True, dirs="never",
                              recursive
=True, includes=None, excludes=None,
                              skip_dupe_dirs
=False,
                              follow_symlinks
=False,
                              on_error
=_NOT_SPECIFIED):
   
"""_paths_from_path_patterns([<path-patterns>, ...]) -> file paths

    Generate a list of paths (files and/or dirs) represented by the given path
    patterns.

        "
path_patterns" is a list of paths optionally using the '*', '?' and
            '[seq]' glob patterns.
        "
files" is boolean (default True) indicating if file paths
            should be yielded
        "
dirs" is string indicating under what conditions dirs are
            yielded. It must be one of:
              never             (default) never yield dirs
              always            yield all dirs matching given patterns
              if-not-recursive  only yield dirs for invocations when
                                recursive=False
            See use cases below for more details.
        "
recursive" is boolean (default True) indicating if paths should
            be recursively yielded under given dirs.
        "
includes" is a list of file patterns to include in recursive
            searches.
        "
excludes" is a list of file and dir patterns to exclude.
            (Note: This is slightly different than GNU grep's --exclude
            option which only excludes *files*.  I.e. you cannot exclude
            a "
.svn" dir.)
        "
skip_dupe_dirs" can be set True to watch for and skip
            descending into a dir that has already been yielded. Note
            that this currently does not dereference symlinks.
        "
follow_symlinks" is a boolean indicating whether to follow
            symlinks (default False). To guard against infinite loops
            with circular dir symlinks, only dir symlinks to *deeper*
            dirs are followed.
        "
on_error" is an error callback called when a given path pattern
            matches nothing:
                on_error(PATH_PATTERN)
            If not specified, the default is look for a "
log" global and
            call:
                log.error("`%s': No such file or directory")
            Specify None to do nothing.

    Typically this is useful for a command-line tool that takes a list
    of paths as arguments. (For Unix-heads: the shell on Windows does
    NOT expand glob chars, that is left to the app.)

    Use case #1: like `
grep -r`
      {files=True, dirs='never', recursive=(if '-r' in opts)}
        script FILE     # yield FILE, else call on_error(FILE)
        script DIR      # yield nothing
        script PATH*    # yield all files matching PATH*; if none,
                        # call on_error(PATH*) callback
        script -r DIR   # yield files (not dirs) recursively under DIR
        script -r PATH* # yield files matching PATH* and files recursively
                        # under dirs matching PATH*; if none, call
                        # on_error(PATH*) callback

    Use case #2: like `
file -r` (if it had a recursive option)
      {files=True, dirs='if-not-recursive', recursive=(if '-r' in opts)}
        script FILE     # yield FILE, else call on_error(FILE)
        script DIR      # yield DIR, else call on_error(DIR)
        script PATH*    # yield all files and dirs matching PATH*; if none,
                        # call on_error(PATH*) callback
        script -r DIR   # yield files (not dirs) recursively under DIR
        script -r PATH* # yield files matching PATH* and files recursively
                        # under dirs matching PATH*; if none, call
                        # on_error(PATH*) callback

    Use case #3: kind of like `
find .`
      {files=True, dirs='always', recursive=(if '-r' in opts)}
        script FILE     # yield FILE, else call on_error(FILE)
        script DIR      # yield DIR, else call on_error(DIR)
        script PATH*    # yield all files and dirs matching PATH*; if none,
                        # call on_error(PATH*) callback
        script -r DIR   # yield files and dirs recursively under DIR
                        # (including DIR)
        script -r PATH* # yield files and dirs matching PATH* and recursively
                        # under dirs; if none, call on_error(PATH*)
                        # callback

    TODO: perf improvements (profile, stat just once)
    """
    from os.path import basename, exists, isdir, join, normpath, abspath, \
                        lexists, islink, realpath
    from glob import glob

    assert not isinstance(path_patterns, basestring), \
        "'path_patterns' must be a sequence, not a string: %r" % path_patterns
    if includes is None: includes = []
    if excludes is None: excludes = []
    GLOB_CHARS = '*?['

    if skip_dupe_dirs:
        searched_dirs = set()

    for path_pattern in path_patterns:
        # Determine the set of paths matching this path_pattern.
        for glob_char in GLOB_CHARS:
            if glob_char in path_pattern:
                paths = glob(path_pattern)
                break
        else:
            if follow_symlinks:
                paths = exists(path_pattern) and [path_pattern] or []
            else:
                paths = lexists(path_pattern) and [path_pattern] or []
        if not paths:
            if on_error is None:
                pass
            elif on_error is _NOT_SPECIFIED:
                try:
                    log.error("`
%s': No such file or directory", path_pattern)
                except (NameError, AttributeError):
                    pass
            else:
                on_error(path_pattern)

        for path in paths:
            if (follow_symlinks or not islink(path)) and isdir(path):
                if skip_dupe_dirs:
                    canon_path = normpath(abspath(path))
                    if follow_symlinks:
                        canon_path = realpath(canon_path)
                    if canon_path in searched_dirs:
                        continue
                    else:
                        searched_dirs.add(canon_path)

                # '
includes' SHOULD affect whether a dir is yielded.
                if (dirs == "always"
                    or (dirs == "if-not-recursive" and not recursive)
                   ) and _should_include_path(path, includes, excludes):
                    yield path

                # However, if recursive, '
includes' should NOT affect
                # whether a dir is recursed into. Otherwise you could
                # not:
                #   script -r --include="*.py" DIR
                if recursive and _should_include_path(path, [], excludes):
                    for dirpath, dirnames, filenames in _walk(path,
                            follow_symlinks=follow_symlinks):
                        dir_indeces_to_remove = []
                        for i, dirname in enumerate(dirnames):
                            d = join(dirpath, dirname)
                            if skip_dupe_dirs:
                                canon_d = normpath(abspath(d))
                                if follow_symlinks:
                                    canon_d = realpath(canon_d)
                                if canon_d in searched_dirs:
                                    dir_indeces_to_remove.append(i)
                                    continue
                                else:
                                    searched_dirs.add(canon_d)
                            if dirs == "always" \
                               and _should_include_path(d, includes, excludes):
                                yield d
                            if not _should_include_path(d, [], excludes):
                                dir_indeces_to_remove.append(i)
                        for i in reversed(dir_indeces_to_remove):
                            del dirnames[i]
                        if files:
                            for filename in sorted(filenames):
                                f = join(dirpath, filename)
                                if _should_include_path(f, includes, excludes):
                                    yield f

            elif files and _should_include_path(path, includes, excludes):
                yield path

Diff to Previous Revision

--- revision 2 2010-05-18 17:06:45
+++ revision 3 2010-06-18 03:30:26
@@ -92,7 +92,7 @@
 
 _NOT_SPECIFIED
= ("NOT", "SPECIFIED")
 
def _paths_from_path_patterns(path_patterns, files=True, dirs="never",
-                              recursive=True, includes=[], excludes=[],
+                              recursive=True, includes=None, excludes=None,
                               skip_dupe_dirs
=False,
                               follow_symlinks
=False,
                               on_error
=_NOT_SPECIFIED):
@@ -181,6 +181,8 @@
 
     
assert not isinstance(path_patterns, basestring), \
         
"'path_patterns' must be a sequence, not a string: %r" % path_patterns
+    if includes is None: includes = []
+    if excludes is None: excludes = []
     GLOB_CHARS
= '*?['
 
     
if skip_dupe_dirs:

History