Welcome, guest | Sign In | My Account | Store | Cart
#!/usr/bin/python
import fnmatch, ftplib, optparse, os, stat, string, sys, time

class FtpWalker:
    def __init__( self, site, user, passwd ):
        self.ftp = ftplib.FTP( site, user, passwd )
    def cd( self, path ):
        try:
            self.ftp.cwd( path )
        except:
            return False
        else:
            return True
    def pwd( self ):
        return self.ftp.pwd()
    def get( self, fileinfo, binary=True, callback=None ):
        status = alreadydownloaded( fileinfo )
        if status == DOWNLOAD_NONE: return
        if not callback:
            localfile = createfile( fileinfo, binary, status == DOWNLOAD_PARTIAL )
            callback  = localfile.write
        try:
            filename = fileinfo.longname
            getstr = "RETR %s" % fileinfo.name
            if binary:
                if status == DOWNLOAD_PARTIAL:
                    self.ftp.retrbinary( getstr, callback, rest=os.path.getsize( filename ) )
                else:
                    self.ftp.retrbinary( getstr, callback )
            else:
                self.ftp.retrlines( getstr, callback )
        finally: 
            if localfile:
                localfile.close()
                os.utime( filename, (fileinfo.date, fileinfo.date) )
                os.chmod( filename, fileinfo.mode )
    def ls( self, cwd ):
        lines = []
        self.ftp.retrlines( "LIST", lines.append )
        return map( lambda x: extract_info( cwd, x ), lines ) 

DOWNLOAD_FULL, DOWNLOAD_PARTIAL, DOWNLOAD_NONE = 0, 1, 2 

def alreadydownloaded( fileinfo ):
    f = fileinfo.longname
    if os.path.isfile( f ):
        ldate, rdate = os.path.getmtime( f ), fileinfo.date
        lsize, rsize = os.path.getsize( f ),  fileinfo.size
        if round( ldate ) == round( rdate ):
            if lsize == rsize:
                return DOWNLOAD_NONE    # already downloaded
            else:
                return DOWNLOAD_PARTIAL # partially downloaded
        else:
            newfilename = mknewversion( fileinfo.path, fileinfo.name )
            os.rename( fileinfo.longname, newfilename )
            return DOWNLOAD_FULL        # old version, rename
    else:
        return DOWNLOAD_FULL            # no file, download

def mknewversion( path, filename ):
    version = 1
    def mkversion( version ):
        return os.path.join( path, ".%s.%03d" % (filename, version) )
    longname = mkversion( version )
    while os.path.exists( longname ):
        version += 1
        longname = mkversion( version )
    return longname
 
def iff( test_, then_, else_ ): # then_, else_ always get evaled so pls be atoms
    if test_:
        return then_
    else:
        return else_

def createfile( fileinfo, binary, append ):
    fname = fileinfo.longname
    if not os.path.isdir( fileinfo.path ):
        os.makedirs( fileinfo.path )
    permissions = iff( binary, 'wb', 'w' )
    if append and os.path.isfile( fname ):
        permissions += 'a'
        perm = os.stat( fname )[stat.ST_MODE]
        if not perm & stat.S_IWUSR:
            os.chmod( fname, perm | stat.S_IWUSR )
    return file( fname, permissions )

curr_year_fmt, prev_year_fmt, unified_fmt = '%b %d %H:%M', '%b %d  %Y', '%Y-%m-%d-%H:%M'

def updatetuple( t, i, x ): # insert x into the ith field of tuple, t
    l = list( t )
    return tuple( l[:i] + [x] + l[i+1:] )

def parsePrevYear( date ): return time.strptime( date, prev_year_fmt )
def parseCurrYear( date ):
    datewith1900 = time.strptime( date, curr_year_fmt )
    currentYear  = time.gmtime()[0]
    return updatetuple( datewith1900, 0, currentYear )

def dateParser( date ): return iff( ':' in date, parseCurrYear, parsePrevYear )
def parseDate( date ):  return time.mktime( dateParser( date )( date ) )

def displayDate( date ):
    date_struct, curr_struct = time.gmtime( date ), time.gmtime()
    date_year, curr_year = date_struct[0], curr_struct[0]
    year_fmt = iff( date_year == curr_year, curr_year_fmt, prev_year_fmt )
    return time.strftime( year_fmt, date_struct )

R_MSK, W_MSK, X_MSK, Z_MSK =   4,   2,   1,   0
R_STR, W_STR, X_STR, Z_STR = 'r', 'w', 'x', '-'

def str2mode( str ):
    r, w, x = str[0] == R_STR,  str[1] == W_STR,  str[2] == X_STR
    return iff( r, R_MSK, Z_MSK ) | iff( w, W_MSK, Z_MSK ) | iff( x, X_MSK, Z_MSK )

def mode2str( mode ):
    r, w, x = mode & R_MSK, mode & W_MSK, mode & X_MSK
    return iff( r, R_STR, Z_STR ) + iff( w, W_STR, Z_STR ) + iff( x, X_STR, Z_STR )

def str2fullmode( str ):
    u, g, o = str[0:3], str[3:6], str[6:9]
    return str2mode( u ) << 6 | str2mode( g ) << 3 | str2mode( o )

def fullmode2str( mode ):
    u, g, o = mode >> 6 & 0x7, mode >> 3 & 0x7, mode & 0x7
    return mode2str( u ) + mode2str( g ) + mode2str( o )

def str2perm( str ):
    return str[0] == 'd', str[0] == 'l', str2fullmode( str[1:] )

def perm2str( isdir, islink, mode ):
    return iff( isdir, 'd', iff( islink, 'l', '-' ) ) + fullmode2str( mode )

def extract_info( cwd, line ):
    fullmode, links, owner, group, size, rest = line.split( None, 5 )
    isdir, islink, mode = str2perm( fullmode )
    dateStr, name = rest[:12], rest[13:]
    date = parseDate( dateStr )
    return FileInfo( cwd, name, fullmode, isdir, islink, mode, int( links ), owner, group, int( size ), dateStr, date)

class FileInfo:
    def __init__( self, path, name, modeStr, isdir, islink, mode, links, owner, group, size, dateStr, date, line ):
        self.path, self.name, self.isdir, self.islink = path, name, isdir, islink
        self.modeStr, self.mode, self.owner, self.group = modeStr, mode, owner, group
        self.links, self.size, self.dateStr, self.date = links, size, dateStr, date
        self.longname, self.age, self.line = os.path.join( path, name ), now - self.date, line

def dropslashes( str ): 
    i, n = 0, len( str )
    while i < n and str[i] == '/': i += 1
    return str[i:]

def excluded( exclude_patterns, dir ):
    for exclude_pattern in exclude_patterns:
        if pattern( exclude_pattern, dir ):
            return True
    return False

def listSiteGen( walker, dir, opts ):
    path = walker.pwd()
    if not excluded( opts.exclude, dir ) and walker.cd( dir ):
        for info in walker.ls( dropslashes( os.path.join( path, dir ) ) ):
            if info.isdir:
                for rec_info in listSiteGen( walker, info.name, opts ):
                    yield rec_info
            else:
                yield info
    walker.cd( path )

def ftpfind( walker, dir, opts ):
    for fileinfo in listSiteGen( walker, dir, opts ):
        if opts.expr( fileinfo ):
            print "%s" % opts.printer( fileinfo )
            if not opts.test:
                walker.get( fileinfo )

def date( d, f=None ):
    if f:
        return time.mktime( time.strptime( d, f ) )
    else:
        return parseDate( d )

def pattern( p, v ): return fnmatch.fnmatch( v, p )
kilobyte = 1024; megabyte = kilobyte * kilobyte; gigabyte = kilobyte * megabyte; terabyte = kilobyte * gigabyte
second = 1; minute = 60*second; hour = 60*minute; day = 24*hour; week = 7*day; year = 52*week
 
def expr_cb( option, opt_str, value, parser ): parser.values.expr = eval( "lambda file: " + value )
def print_cb( option, opt_str, value, parser ): parser.values.printer = eval( "lambda file: " + value )

now = time.mktime( time.gmtime() ) # used by age filter

def daystart_cb( option, opt_str, value, parser ):
    global now
    x = time.gmtime()
    start_of_day = x[0], x[1], x[2], 0, 0, 0, x[6], x[7], x[8]
    now = time.mktime( start_of_day )

def_printer=lambda file: file.line
def_expr=lambda file: True

def parse_command_line():
    parser = optparse.OptionParser()
    parser.set_defaults( user="anonymous", password="ftpfind@sf.net", expr=def_expr, test=False, exclude=[], printer=def_printer  )
    parser.add_option( "-e", "--expr",     action="callback", callback=expr_cb, type="string", help="use the python expression, lambda file: <EXPR>, as a filter (must return boolean)", metavar="EXPR" )
    parser.add_option( "-p", "--password", help="specify the password to use", metavar="PASSWD" )
    parser.add_option( "--print", action="callback", callback=print_cb, type="string", help="use the printer, lambda file: <EXPR>, to print file summary (must return string)", metavar="EXPR" )
    parser.add_option( "-s", "--daystart", action="callback", callback=daystart_cb, help="calculate ages from today @ 00:00" )
    parser.add_option( "-t", "--test",     action ="store_true", help="print filename but do not perform file transfer" )
    parser.add_option( "-u", "--user",     help="specify the username to use", metavar="USER" )
    parser.add_option( "-x", "--exclude",  action="append", help="do not traverse this directory", metavar="DIR" )
    return parser.parse_args()

if __name__ == '__main__':
    opts, args = parse_command_line()
    site, dirs = args[0], args[1:]
    if len( dirs ) == 0: dirs = ['/']
    try:
        walker = FtpWalker( site, opts.user, opts.password )
    except:
        print "Couldn't authenticate '%s' with password '%s' on %s" % (opts.user, opts.password, site)
        sys.exit(3)
    else:
        for dir in dirs:
            ftpfind( walker, dir, opts )

History