Welcome, guest | Sign In | My Account | Store | Cart
class LocalFTP(object):
    """
    Class adding recursive nlst() behavior to ftplib.FTP instance. The
    ftplib.FTP instance is available through the connection attribute, and
    is exposed through __getattr__.

    The behavior added by this class (recursive directory listing) is most
    appropriate for ftp connections on a local network over a fast connection, 
    or for small directories on remote ftp servers.

    The class relies on an externally defined callable, which can parse the
    lines returned by the ftplib.FTP.dir() method. This callable should be 
    bound to the 'dirparser' attribute on this object. The callable 'dirparser' 
    attribute can be initialized by passing it in to the constructor using the
    keyword argument 'dirparser', or by attaching the callable to the
    'dirparser' attribute after instantiation. 

    The callable should return parsed results as a dict. This class makes some
    assumptions about the contents of the dict returned by the user-defined 
    dirparser callable:

    -- the key 'trycwds' holds a list of booleans 

    -- the key 'names' holds a list of filenames in the dir() listing.
    
    -- The two lists should be the same length. A True value in the list
       referred to by the 'trycwds' key indicates the corresponding value
       in the list referred to by the 'names' key is a directory.

    -- The key names are based on fields in the ftpparse structure, from the   
       ftpparse module/C library.     
   
    -- Other keys can be included in the dict, but they are not used by the 
       rnlst() method.
      
    -- The callable should return an empty dict() if there is nothing to return
       from the dir listing.
       
    This module provides two parsers which seem to work ok, but it should
    be easy to create others if these don't work for some reason:

    -- parse_windows parses the dir listing from Windows ftp servers.
    -- parse_unix parses the dir listing from UNIX ftp servers.
    
    """
    
    def __init__(self, host='', user='', passwd='', acct='', 
                 dirparser=None):
        self.connection = ftplib.FTP(host, user, passwd, acct)
        self.remotepathsep = '/'
        self.dirparser = dirparser
        

    def __getattr__(self, name):
        """
        Delegate most requests to the underlying FTP object. 
        """

        return getattr(self.connection, name)


    def _dir(self,path):
        """
        Call dir() on path, and use callback to accumulate
        returned lines. Return list of lines.
        """

        dirlist = []
        try:
            self.connection.dir(path, dirlist.append)
        except ftplib.error_perm:
            warnings.warn('Access denied for path %s'%path)
        return dirlist


    def parsedir(self, path=''):
        """
        Method to parse the lines returned by the ftplib.FTP.dir(),
        when called on supplied path. Uses callable dirparser
        attribute. 
        """
        
        if self.dirparser is None:
            msg = ('Must set dirparser attribute to a callable '
                   'before calling this method')
            raise TypeError(msg)

        dirlines = self._dir(path)
        dirdict = self.dirparser(dirlines)
        return dirdict
        
        
    def _cleanpath(self, path):
        """
        Clean up path - remove repeated and trailing separators. 
        """
        
        slashes = self.remotepathsep*2
        while slashes in path:
            path = path.replace(slashes,self.remotepathsep)
            
        if path.endswith(self.remotepathsep):
            path = path[:-1]
            
        return path
        
        
    def _rnlst(self, path, filelist):
        """
        Recursively accumulate filelist starting at
        path, on the server accessed through this object's
        ftp connection.
        """
        
        path = self._cleanpath(path)
        dirdict = self.parsedir(path)
        
        trycwds = dirdict.get('trycwds', [])
        names = dirdict.get('names', [])
        
        for trycwd, name in zip(trycwds, names):           
            if trycwd: # name is a directory
                self._rnlst(self.remotepathsep.join([path, name]), filelist)
            else: 
                filelist.append(self.remotepathsep.join([path, name]))
                
        return filelist

                
    def rnlst(self, path=''):
        """
        Recursive nlst(). Return a list of filenames under path.
        """
      
        filelist = []
        return self._rnlst(path,filelist)
        

# Naive ftplib.FTP.dir() parsing functions, which may or may not work. (These
# happen to work for servers I connect to.) Create your own functions (perhaps
# using ftpparse) for more robust solutions.
       
def parse_windows(dirlines):
    """
    Parse the lines returned by ftplib.FTP.dir(), when called
    on a Windows ftp server. May not work for all servers, but it
    works for the ones I need to connect to.
    """

    typemap = {'<DIR>': True}
    
    if not dirlines:
        return dict()
    
    maxlen = max(len(line) for line in dirlines)
    columns = [slice(0, 9), slice(9, 17), slice(17, 29), slice(29, 38), 
               slice(38, maxlen+1)]

    fields = 'dates times trycwds sizes names'.split()

    values = []
    for line in dirlines:
        vals = [line[slc].strip() for slc in columns]
        vals[2] = typemap.get(vals[2], False)
        values.append(vals)
        
    lists = zip(*values)
    
    assert len(lists) == len(fields)

    return dict(zip(fields, lists))


def parse_unix(dirlines,startindex=1):
    """
    Parse the lines returned by ftplib.FTP.dir(), when called
    on a UNIX ftp server. May not work for all servers, but it
    works for the ones I need to connect to.
    """

    dirlines = dirlines[startindex:]
    if not dirlines:
        return dict()
   
    pattern = re.compile('(.)(.*?)\s+(.*?)\s+(.*?)\s+(.*?)\s+'
                         '(.*?)\s+(.*?\s+.*?\s+.*?)\s+(.*)')

    fields = 'trycwds tryretrs inodes users groups sizes dates names'.split()

    getmatches = lambda s:pattern.search(s)
    matches = filter(getmatches, dirlines)

    getfields = lambda s:pattern.findall(s)[0]
    lists = zip(*map(getfields, matches))
    
    # change the '-','d','l' values to booleans, where names referring
    # to directories get True, and others get False.
    lists[0] = ['d' == s for s in lists[0]]
    
    assert len(lists) == len(fields)
    
    return dict(zip(fields, lists))
    

History

  • revision 5 (17 years ago)
  • previous revisions are not available