On several occasions I wanted to peruse an FTP site for a specific rpm within a certain age range and with a particular pattern to its filename and none of the tools available gave me that functionality. This recipe gives a find-like tool to the world of FTP. Great for cron jobs that download new RPMs that fit some tricky condition (e.g. less than 1 meg, less than a week old, ends in x86_64.tar.gz etc).
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 | #!/usr/bin/python
import fnmatch, ftplib, optparse, os, stat, string, sys, time
class FtpWalker:
def __init__( self, site, user, passwd ):
self.ftp = ftplib.FTP( site, user, passwd )
def cd( self, path ):
try:
self.ftp.cwd( path )
except:
return False
else:
return True
def pwd( self ):
return self.ftp.pwd()
def get( self, fileinfo, binary=True, callback=None ):
status = alreadydownloaded( fileinfo )
if status == DOWNLOAD_NONE: return
if not callback:
localfile = createfile( fileinfo, binary, status == DOWNLOAD_PARTIAL )
callback = localfile.write
try:
filename = fileinfo.longname
getstr = "RETR %s" % fileinfo.name
if binary:
if status == DOWNLOAD_PARTIAL:
self.ftp.retrbinary( getstr, callback, rest=os.path.getsize( filename ) )
else:
self.ftp.retrbinary( getstr, callback )
else:
self.ftp.retrlines( getstr, callback )
finally:
if localfile:
localfile.close()
os.utime( filename, (fileinfo.date, fileinfo.date) )
os.chmod( filename, fileinfo.mode )
def ls( self, cwd ):
lines = []
self.ftp.retrlines( "LIST", lines.append )
return map( lambda x: extract_info( cwd, x ), lines )
DOWNLOAD_FULL, DOWNLOAD_PARTIAL, DOWNLOAD_NONE = 0, 1, 2
def alreadydownloaded( fileinfo ):
f = fileinfo.longname
if os.path.isfile( f ):
ldate, rdate = os.path.getmtime( f ), fileinfo.date
lsize, rsize = os.path.getsize( f ), fileinfo.size
if round( ldate ) == round( rdate ):
if lsize == rsize:
return DOWNLOAD_NONE # already downloaded
else:
return DOWNLOAD_PARTIAL # partially downloaded
else:
newfilename = mknewversion( fileinfo.path, fileinfo.name )
os.rename( fileinfo.longname, newfilename )
return DOWNLOAD_FULL # old version, rename
else:
return DOWNLOAD_FULL # no file, download
def mknewversion( path, filename ):
version = 1
def mkversion( version ):
return os.path.join( path, ".%s.%03d" % (filename, version) )
longname = mkversion( version )
while os.path.exists( longname ):
version += 1
longname = mkversion( version )
return longname
def iff( test_, then_, else_ ): # then_, else_ always get evaled so pls be atoms
if test_:
return then_
else:
return else_
def createfile( fileinfo, binary, append ):
fname = fileinfo.longname
if not os.path.isdir( fileinfo.path ):
os.makedirs( fileinfo.path )
permissions = iff( binary, 'wb', 'w' )
if append and os.path.isfile( fname ):
permissions += 'a'
perm = os.stat( fname )[stat.ST_MODE]
if not perm & stat.S_IWUSR:
os.chmod( fname, perm | stat.S_IWUSR )
return file( fname, permissions )
curr_year_fmt, prev_year_fmt, unified_fmt = '%b %d %H:%M', '%b %d %Y', '%Y-%m-%d-%H:%M'
def updatetuple( t, i, x ): # insert x into the ith field of tuple, t
l = list( t )
return tuple( l[:i] + [x] + l[i+1:] )
def parsePrevYear( date ): return time.strptime( date, prev_year_fmt )
def parseCurrYear( date ):
datewith1900 = time.strptime( date, curr_year_fmt )
currentYear = time.gmtime()[0]
return updatetuple( datewith1900, 0, currentYear )
def dateParser( date ): return iff( ':' in date, parseCurrYear, parsePrevYear )
def parseDate( date ): return time.mktime( dateParser( date )( date ) )
def displayDate( date ):
date_struct, curr_struct = time.gmtime( date ), time.gmtime()
date_year, curr_year = date_struct[0], curr_struct[0]
year_fmt = iff( date_year == curr_year, curr_year_fmt, prev_year_fmt )
return time.strftime( year_fmt, date_struct )
R_MSK, W_MSK, X_MSK, Z_MSK = 4, 2, 1, 0
R_STR, W_STR, X_STR, Z_STR = 'r', 'w', 'x', '-'
def str2mode( str ):
r, w, x = str[0] == R_STR, str[1] == W_STR, str[2] == X_STR
return iff( r, R_MSK, Z_MSK ) | iff( w, W_MSK, Z_MSK ) | iff( x, X_MSK, Z_MSK )
def mode2str( mode ):
r, w, x = mode & R_MSK, mode & W_MSK, mode & X_MSK
return iff( r, R_STR, Z_STR ) + iff( w, W_STR, Z_STR ) + iff( x, X_STR, Z_STR )
def str2fullmode( str ):
u, g, o = str[0:3], str[3:6], str[6:9]
return str2mode( u ) << 6 | str2mode( g ) << 3 | str2mode( o )
def fullmode2str( mode ):
u, g, o = mode >> 6 & 0x7, mode >> 3 & 0x7, mode & 0x7
return mode2str( u ) + mode2str( g ) + mode2str( o )
def str2perm( str ):
return str[0] == 'd', str[0] == 'l', str2fullmode( str[1:] )
def perm2str( isdir, islink, mode ):
return iff( isdir, 'd', iff( islink, 'l', '-' ) ) + fullmode2str( mode )
def extract_info( cwd, line ):
fullmode, links, owner, group, size, rest = line.split( None, 5 )
isdir, islink, mode = str2perm( fullmode )
dateStr, name = rest[:12], rest[13:]
date = parseDate( dateStr )
return FileInfo( cwd, name, fullmode, isdir, islink, mode, int( links ), owner, group, int( size ), dateStr, date)
class FileInfo:
def __init__( self, path, name, modeStr, isdir, islink, mode, links, owner, group, size, dateStr, date, line ):
self.path, self.name, self.isdir, self.islink = path, name, isdir, islink
self.modeStr, self.mode, self.owner, self.group = modeStr, mode, owner, group
self.links, self.size, self.dateStr, self.date = links, size, dateStr, date
self.longname, self.age, self.line = os.path.join( path, name ), now - self.date, line
def dropslashes( str ):
i, n = 0, len( str )
while i < n and str[i] == '/': i += 1
return str[i:]
def excluded( exclude_patterns, dir ):
for exclude_pattern in exclude_patterns:
if pattern( exclude_pattern, dir ):
return True
return False
def listSiteGen( walker, dir, opts ):
path = walker.pwd()
if not excluded( opts.exclude, dir ) and walker.cd( dir ):
for info in walker.ls( dropslashes( os.path.join( path, dir ) ) ):
if info.isdir:
for rec_info in listSiteGen( walker, info.name, opts ):
yield rec_info
else:
yield info
walker.cd( path )
def ftpfind( walker, dir, opts ):
for fileinfo in listSiteGen( walker, dir, opts ):
if opts.expr( fileinfo ):
print "%s" % opts.printer( fileinfo )
if not opts.test:
walker.get( fileinfo )
def date( d, f=None ):
if f:
return time.mktime( time.strptime( d, f ) )
else:
return parseDate( d )
def pattern( p, v ): return fnmatch.fnmatch( v, p )
kilobyte = 1024; megabyte = kilobyte * kilobyte; gigabyte = kilobyte * megabyte; terabyte = kilobyte * gigabyte
second = 1; minute = 60*second; hour = 60*minute; day = 24*hour; week = 7*day; year = 52*week
def expr_cb( option, opt_str, value, parser ): parser.values.expr = eval( "lambda file: " + value )
def print_cb( option, opt_str, value, parser ): parser.values.printer = eval( "lambda file: " + value )
now = time.mktime( time.gmtime() ) # used by age filter
def daystart_cb( option, opt_str, value, parser ):
global now
x = time.gmtime()
start_of_day = x[0], x[1], x[2], 0, 0, 0, x[6], x[7], x[8]
now = time.mktime( start_of_day )
def_printer=lambda file: file.line
def_expr=lambda file: True
def parse_command_line():
parser = optparse.OptionParser()
parser.set_defaults( user="anonymous", password="ftpfind@sf.net", expr=def_expr, test=False, exclude=[], printer=def_printer )
parser.add_option( "-e", "--expr", action="callback", callback=expr_cb, type="string", help="use the python expression, lambda file: <EXPR>, as a filter (must return boolean)", metavar="EXPR" )
parser.add_option( "-p", "--password", help="specify the password to use", metavar="PASSWD" )
parser.add_option( "--print", action="callback", callback=print_cb, type="string", help="use the printer, lambda file: <EXPR>, to print file summary (must return string)", metavar="EXPR" )
parser.add_option( "-s", "--daystart", action="callback", callback=daystart_cb, help="calculate ages from today @ 00:00" )
parser.add_option( "-t", "--test", action ="store_true", help="print filename but do not perform file transfer" )
parser.add_option( "-u", "--user", help="specify the username to use", metavar="USER" )
parser.add_option( "-x", "--exclude", action="append", help="do not traverse this directory", metavar="DIR" )
return parser.parse_args()
if __name__ == '__main__':
opts, args = parse_command_line()
site, dirs = args[0], args[1:]
if len( dirs ) == 0: dirs = ['/']
try:
walker = FtpWalker( site, opts.user, opts.password )
except:
print "Couldn't authenticate '%s' with password '%s' on %s" % (opts.user, opts.password, site)
sys.exit(3)
else:
for dir in dirs:
ftpfind( walker, dir, opts )
|
There are already a few FTP recipes in the cookbook, why another one? Well http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/302592 is a little on the complicated side and you have to write an XML config file to use it - not my cup of tea; http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/275594 and http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/327141 are simple and straightforward but limited in their functionality. ftpmirror.py in the python distribution is closer to the mark but it didn't have the command-line flexibility I was after. What I needed was something that could be parameterized with various conditions like the Unix 'find' command so that I could, for instance, download the newest version of the foo*.x86_64.tar.gz provided it was less than two megabytes and less than a week old from ftp.bar.edu, like so:
ftpfind.py --expr='pattern( "foox86_64.tar.gz", file.name ) and file.age There are already a few FTP recipes in the cookbook, why another one? Well http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/302592 is a little on the complicated side and you have to write an XML config file to use it - not my cup of tea; http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/275594 and http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/327141 are simple and straightforward but limited in their functionality. ftpmirror.py in the python distribution is closer to the mark but it didn't have the command-line flexibility I was after. What I needed was something that could be parameterized with various conditions like the Unix 'find' command so that I could, for instance, download the newest version of the foo.x86_64.tar.gz provided it was less than two megabytes and less than a week old from ftp.bar.edu, like so:
ftpfind.py --expr='pattern( "foo*x86_64.tar.gz", file.name ) and file.age
previous doc prematurely truncated.
doc part 3.
typo in extract_info. Missing parameter, line, in the call to the FileInfo constructor- it should have read...