Welcome, guest | Sign In | My Account | Store | Cart

This program is an advanced directory synchronization and update tool. It can be used to update content between two directories, synchronize them, or just report the difference in content between them. It uses the syntax of the 'diff' program in printing the difference.

Python, 501 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
""" pyrobocopy.py -

    Version: 1.0
    
    Report the difference in content
    of two directories, synchronize or
    update a directory from another, taking
    into account time-stamps of files etc.

    By Anand B Pillai 

    (This program is inspired by the windows
    'Robocopy' program.)

    Mod  Nov 11 Rewrote to use the filecmp module.
"""

import os, stat
import time
import shutil
import filecmp

def usage():
    return """
Pyrobocopy: Command line directory diff, synchronization, update & copy

Author: Anand Pillai

Usage: %s <sourcedir> <targetdir> Options

Main Options:\n
\t-d --diff         - Only report difference between sourcedir and targetdir
\t-s, --synchronize - Synchronize content between sourcedir and targetdir
\t-u, --update      - Update existing content between sourcedir and targetdir

Additional Options:\n
\t-p, --purge       - Purge files when synchronizing (does not purge by default).
\t-f, --force       - Force copying of files, by trying to change file permissions.
\t-n, --nodirection - Update files in source directory from target
\t                    directory (only updates target from source by default).
\t-c, --create      - Create target directory if it does not exist (By default,
\t                    target directory should exist.)
\t-m, --modtime     - Only compare file's modification times for an update (By default,
\t                    compares source file's creation time also).
"""                   



class PyRobocopier:
    """ An advanced directory synchronization, updation
    and file copying class """

    prog_name = "pyrobocopy.py"
    
    def __init__(self):

        self.__dir1 = ''
        self.__dir2 = ''
        self.__dcmp = None
        
        self.__copyfiles = True
        self.__forcecopy = False
        self.__copydirection = 0
        self.__updatefiles = True
        self.__creatdirs = True
        self.__purge =False
        self.__maketarget =False
        self.__modtimeonly =False
        self.__mainfunc = None
        
        # stat vars
        self.__numdirs =0
        self.__numfiles =0
        self.__numdelfiles =0
        self.__numdeldirs =0     
        self.__numnewdirs =0
        self.__numupdates =0
        self.__starttime = 0.0
        self.__endtime = 0.0
        
        # failure stat vars
        self.__numcopyfld =0
        self.__numupdsfld =0
        self.__numdirsfld =0
        self.__numdelffld  =0
        self.__numdeldfld  =0

    def parse_args(self, arguments):
        """ Parse arguments """
        
        import getopt

        shortargs = "supncm"
        longargs = ["synchronize=", "update=", "purge=", "nodirection=", "create=", "modtime="]

        try:
            optlist, args = getopt.getopt( arguments, shortargs, longargs )
        except getopt.GetoptError, e:
            print e
            return None

        allargs = []
        if len(optlist):
            allargs = [x[0] for x in optlist]
            
        allargs.extend( args )
        self.__setargs( allargs )
            
    def __setargs(self, argslist):
        """ Sets internal variables using arguments """
        
        for option in argslist:
            if option.lower() in ('-s', '--synchronize'):
                self.__mainfunc = self.synchronize
            elif option.lower() in ('-u', '--update'):
                self.__mainfunc = self.update
            elif option.lower() in ('-d', '--diff'):
                self.__mainfunc = self.dirdiff
            elif option.lower() in ('-p', '--purge'):
                self.__purge = True
            elif option.lower() in ('-n', '--nodirection'):
                self.__copydirection = 2
            elif option.lower() in ('-f', '--force'):
                self.__forcecopy = True
            elif option.lower() in ('-c', '--create'):
                self.__maketarget = True
            elif option.lower() in ('-m', '--modtime'):
                self.__modtimeonly = True                            
            else:
                if self.__dir1=='':
                    self.__dir1 = option
                elif self.__dir2=='':
                    self.__dir2 = option
                
        if self.__dir1=='' or self.__dir2=='':
            sys.exit("Argument Error: Directory arguments not given!")
        if not os.path.isdir(self.__dir1):
            sys.exit("Argument Error: Source directory does not exist!")
        if not self.__maketarget and not os.path.isdir(self.__dir2):
            sys.exit("Argument Error: Target directory %s does not exist! (Try the -c option)." % self.__dir2)
        if self.__mainfunc is None:
            sys.exit("Argument Error: Specify an action (Diff, Synchronize or Update) ")

        self.__dcmp = filecmp.dircmp(self.__dir1, self.__dir2)

    def do_work(self):
        """ Do work """

        self.__starttime = time.time()
        
        if not os.path.isdir(self.__dir2):
            if self.__maketarget:
                print 'Creating directory', self.__dir2
                try:
                    os.makedirs(self.__dir2)
                except Exception, e:
                    print e
                    return None

        # All right!
        self.__mainfunc()
        self.__endtime = time.time()
        
    def __dowork(self, dir1, dir2, copyfunc = None, updatefunc = None):
        """ Private attribute for doing work """
        
        print 'Source directory: ', dir1, ':'

        self.__numdirs += 1
        self.__dcmp = filecmp.dircmp(dir1, dir2)
        
        # Files & directories only in target directory
        if self.__purge:
            for f2 in self.__dcmp.right_only:
                fullf2 = os.path.join(dir2, f2)
                print 'Deleting ',fullf2
                try:
                    if os.path.isfile(fullf2):
                        
                        try:
                            os.remove(fullf2)
                            self.__numdelfiles += 1
                        except OSError, e:
                            print e
                            self.__numdelffld += 1
                    elif os.path.isdir(fullf2):
                        try:
                            shutil.rmtree( fullf2, True )
                            self.__numdeldirs += 1
                        except shutil.Error, e:
                            print e
                            self.__numdeldfld += 1
                                
                except Exception, e: # of any use ?
                    print e
                    continue


        # Files & directories only in source directory
        for f1 in self.__dcmp.left_only:
            try:
               st = os.stat(os.path.join(dir1, f1))
            except os.error:
                continue

            if stat.S_ISREG(st.st_mode):
                if copyfunc: copyfunc(f1, dir1, dir2)
            elif stat.S_ISDIR(st.st_mode):
                fulld1 = os.path.join(dir1, f1)
                fulld2 = os.path.join(dir2, f1)
                
                if self.__creatdirs:
                    try:
                        # Copy tree
                        print 'Copying tree', fulld2
                        shutil.copytree(fulld1, fulld2)
                        self.__numnewdirs += 1
                        print 'Done.'
                    except shutil.Error, e:
                        print e
                        self.__numdirsfld += 1
                        
                        # jump to next file/dir in loop since this op failed
                        continue

                # Call tail recursive
                # if os.path.exists(fulld2):
                #    self.__dowork(fulld1, fulld2, copyfunc, updatefunc)

        # common files/directories
        for f1 in self.__dcmp.common:
            try:
                st = os.stat(os.path.join(dir1, f1))
            except os.error:
                continue

            if stat.S_ISREG(st.st_mode):
                if updatefunc: updatefunc(f1, dir1, dir2)
            elif stat.S_ISDIR(st.st_mode):
                fulld1 = os.path.join(dir1, f1)
                fulld2 = os.path.join(dir2, f1)
                # Call tail recursive
                self.__dowork(fulld1, fulld2, copyfunc, updatefunc)
                

    def __copy(self, filename, dir1, dir2):
        """ Private function for copying a file """

        # NOTE: dir1 is source & dir2 is target
        if self.__copyfiles:

            print 'Copying file', filename, dir1, dir2
            try:
                if self.__copydirection== 0 or self.__copydirection == 2:  # source to target
                    
                    if not os.path.exists(dir2):
                        if self.__forcecopy:
                            os.chmod(os.path.dirname(dir2), 0777)
                        try:
                            os.makedirs(dir1)
                        except OSError, e:
                            print e
                            self.__numdirsfld += 1
                        
                    if self.__forcecopy:
                        os.chmod(dir2, 0777)

                    sourcefile = os.path.join(dir1, filename)
                    try:
                        shutil.copy(sourcefile, dir2)
                        self.__numfiles += 1
                    except (IOError, OSError), e:
                        print e
                        self.__numcopyfld += 1
                    
                elif self.__copydirection==1 or self.__copydirection == 2: # target to source 

                    if not os.path.exists(dir1):
                        if self.__forcecopy:
                            os.chmod(os.path.dirname(dir1), 0777)

                        try:
                            os.makedirs(dir1)
                        except OSError, e:
                            print e
                            self.__numdirsfld += 1                          

                    targetfile = os.path.abspath(os.path.join(dir1, filename))
                    if self.__forcecopy:
                        os.chmod(dir1, 0777)

                    sourcefile = os.path.join(dir2, filename)
                    
                    try:
                        shutil.copy(sourcefile, dir1)
                        self.__numfiles += 1
                    except (IOError, OSError), e:
                        print e
                        self.__numcopyfld += 1
                    
            except Exception, e:
                print 'Error copying  file', filename, e

    def __cmptimestamps(self, filest1, filest2):
        """ Compare time stamps of two files and return True
        if file1 (source) is more recent than file2 (target) """

        return ((filest1.st_mtime > filest2.st_mtime) or \
                   (not self.__modtimeonly and (filest1.st_ctime > filest2.st_mtime)))
    
    def __update(self, filename, dir1, dir2):
        """ Private function for updating a file based on
        last time stamp of modification """

        print 'Updating file', filename
        
        # NOTE: dir1 is source & dir2 is target        
        if self.__updatefiles:

            file1 = os.path.join(dir1, filename)
            file2 = os.path.join(dir2, filename)

            try:
                st1 = os.stat(file1)
                st2 = os.stat(file2)
            except os.error:
                return -1

            # Update will update in both directions depending
            # on the timestamp of the file & copy-direction.

            if self.__copydirection==0 or self.__copydirection == 2:

                # Update file if file's modification time is older than
                # source file's modification time, or creation time. Sometimes
                # it so happens that a file's creation time is newer than it's
                # modification time! (Seen this on windows)
                if self.__cmptimestamps( st1, st2 ):
                    print 'Updating file ', file2 # source to target
                    try:
                        if self.__forcecopy:
                            os.chmod(file2, 0666)

                        try:
                            shutil.copy(file1, file2)
                            self.__numupdates += 1
                            return 0
                        except (IOError, OSError), e:
                            print e
                            self.__numupdsfld += 1
                            return -1

                    except Exception, e:
                        print e
                        return -1

            elif self.__copydirection==1 or self.__copydirection == 2:

                # Update file if file's modification time is older than
                # source file's modification time, or creation time. Sometimes
                # it so happens that a file's creation time is newer than it's
                # modification time! (Seen this on windows)
                if self.__cmptimestamps( st2, st1 ):
                    print 'Updating file ', file1 # target to source
                    try:
                        if self.__forcecopy:
                            os.chmod(file1, 0666)

                        try:
                            shutil.copy(file2, file1)
                            self.__numupdates += 1
                            return 0
                        except (IOError, OSError), e:
                            print e
                            self.__numupdsfld += 1
                            return -1
                        
                    except Exception, e:
                        print e
                        return -1

        return -1

    def __dirdiffandcopy(self, dir1, dir2):
        """ Private function which does directory diff & copy """
        self.__dowork(dir1, dir2, self.__copy)

    def __dirdiffandupdate(self, dir1, dir2):
        """ Private function which does directory diff & update  """        
        self.__dowork(dir1, dir2, None, self.__update)

    def __dirdiffcopyandupdate(self, dir1, dir2):
        """ Private function which does directory diff, copy and update (synchro) """               
        self.__dowork(dir1, dir2, self.__copy, self.__update)

    def __dirdiff(self):
        """ Private function which only does directory diff """

        if self.__dcmp.left_only:
            print 'Only in', self.__dir1
            for x in self.__dcmp.left_only:
                print '>>', x

        if self.__dcmp.right_only:
            print 'Only in', self.__dir2
            for x in self.__dcmp.right_only:
                print '<<', x

        if self.__dcmp.common:
            print 'Common to', self.__dir1,' and ',self.__dir2
            print
            for x in self.__dcmp.common:
                print '--', x
        else:
            print 'No common files or sub-directories!'

    def synchronize(self):
        """ Synchronize will try to synchronize two directories w.r.t
        each other's contents, copying files if necessary from source
        to target, and creating directories if necessary. If the optional
        argument purge is True, directories in target (dir2) that are
        not present in the source (dir1) will be deleted . Synchronization
        is done in the direction of source to target """

        self.__copyfiles = True
        self.__updatefiles = True
        self.__creatdirs = True
        self.__copydirection = 0

        print 'Synchronizing directory', self.__dir2, 'with', self.__dir1 ,'\n'
        self.__dirdiffcopyandupdate(self.__dir1, self.__dir2)

    def update(self):
        """ Update will try to update the target directory
        w.r.t source directory. Only files that are common
        to both directories will be updated, no new files
        or directories are created """

        self.__copyfiles = False
        self.__updatefiles = True
        self.__purge = False
        self.__creatdirs = False

        print 'Updating directory', self.__dir2, 'from', self.__dir1 , '\n'
        self.__dirdiffandupdate(self.__dir1, self.__dir2)

    def dirdiff(self):
        """ Only report difference in content between two
        directories """

        self.__copyfiles = False
        self.__updatefiles = False
        self.__purge = False
        self.__creatdirs = False
        self.__updatefiles = False
        
        print 'Difference of directory ', self.__dir2, 'from', self.__dir1 , '\n'
        self.__dirdiff()
        
    def report(self):
        """ Print report of work at the end """

        # We need only the first 4 significant digits
        tt = (str(self.__endtime - self.__starttime))[:4]
        
        print '\nPython robocopier finished in',tt, 'seconds.'
        print self.__numdirs, 'directories parsed,',self.__numfiles, 'files copied.'
        if self.__numdelfiles:
            print self.__numdelfiles, 'files were purged.'
        if self.__numdeldirs:
            print self.__numdeldirs, 'directories were purged.'
        if self.__numnewdirs:
            print self.__numnewdirs, 'directories were created.'
        if self.__numupdates:
            print self.__numupdates, 'files were updated by timestamp.'

        # Failure stats
        print '\n'
        if self.__numcopyfld:
            print self.__numcopyfld, 'files could not be copied.'
        if self.__numdirsfld:
            print self.__numdirsfld, 'directories could not be created.'
        if self.__numupdsfld:
            print self.__numupdsfld, 'files could not be updated.'
        if self.__numdeldfld:
            print self.__numdeldfld, 'directories could not be purged.'
        if self.__numdelffld:
            print self.__numdelffld, 'files could not be purged.'
        
if __name__=="__main__":
    import sys

    if len(sys.argv)<2:
        sys.exit( usage() % PyRobocopier.prog_name )

    copier = PyRobocopier()
    copier.parse_args(sys.argv[1:])
    copier.do_work()

    # print report at the end
    copier.report()

This program is inspired by the Windows "robocopy" program. I have been using the "robocopy" program for performing command line directory synchronization or update. I felt that a python solution is apt for this job and ended up writing pyrobocopy.

-Anand

21 comments

anthony tarlano 18 years, 11 months ago  # | flag

Missing the script interpreter declaration. Note to user..

If you copy the script and make it executable (i.e. chmod +x pyrobocopy.py) without adding a script interpreter declaration (#!/usr/bin/env python) at the top of the file, the script will not work.

Here is the error output

$ pyrobocopy.py
/usr/local/bin/pyrobocopy.py: line 15:  pyrobocopy.py -

    Version: 0.1

    Report the difference in content
    of two directories, synchronize or
    update a directory from another, taking
    into account time-stamps of files etc.

    By Anand B Pillai

    (This program is inspired by the windows
    'Robocopy' program.)

: command not found
/usr/local/bin/pyrobocopy.py: line 20: syntax error near unexpected token `('
/usr/local/bin/pyrobocopy.py: line 20: `def usage():'
Anand (author) 18 years, 11 months ago  # | flag
I normally work on a Windows 2000 box
where '.py' and '.pyw' files are associated
with python <pre>
I normally work on a Windows 2000 box
where '.py' and '.pyw' files are associated
with python

</pre>

Anand (author) 18 years, 11 months ago  # | flag
I normally work on a Windows 2000 box
where '.py' and '.pyw' files are associated
with python <pre>
I normally work on a Windows 2000 box
where '.py' and '.pyw' files are associated
with python

</pre>

Jim Carroll 18 years, 11 months ago  # | flag

Very useful! Nice script! I'll use this instead of rsync for simpler stuff!

I think I see a bug. should all references be to dir2 in this code?

if not os.path.exists(dir2):
     if self.__forcecopy:
         os.chmod(os.path.dirname(dir2), 0777)
     os.makedirs(dir1)

Also, what effect does the chmod have on Windows?

I was very happy to see that on windows if I added my tools/python directory to my path, I could just type robocopy.py at a command line anywhere to use this! Very nice!

Anand (author) 18 years, 11 months ago  # | flag

My original comments got screwed up somehow by ASPN ;-).

On windows if you associate ".py" extension to python you

dont need to worry about running the program. Just type

'robocopy.py' at the command prompt, thats all.

On unix/linux you can add a line like the following at the

top of the file.

#!/usr/bin/python

The pointed out line is not a bug. The target directory is 'dir2'

so the chmod tries to change its permissions. On windows this does

not have any effect AFAIK.

anthony tarlano 18 years, 11 months ago  # | flag

Just a thought.. I like the implementation, however I think the command structure would be more consise using the following format.

$ pyrobocopy.py [OPTIONS] sourcedir targetdir
Anand (author) 18 years, 11 months ago  # | flag

Cmd structure modification. Command structure now works both ways.

$pyrobocopy.py [OPTIONS] <sourcedir> <targetdir>

as well as,

$pyrobocopy.py <sourcedir> <targetdir> [OPTIONS]

-Anand

Anand (author) 18 years, 10 months ago  # | flag

Fixed an error. Fixed an error in copying sub-directories that

did not exist in target directory.

The script was skipping some sub-directories earlier.

-Anand

Mark Carter 18 years, 10 months ago  # | flag

Dirssync at Sourceforge. You may also be interested in dirssync project at Sourceforge.

Description:

The goal is to synchronize 2 directories. This application will synchronize all the files and all the sub-directories, and prompt the user to confirm all the moves. It's written in python and with wxPython.

Link:

http://sourceforge.net/projects/dirssync/

Mark Carter 18 years, 10 months ago  # | flag

dfp project. You might also be interested in the dfp project.

Description:

A frequent software pattern is processing files that have changed:

  • an incremental backup system, which will add new and changed files to an archive or transfer them to another computer

  • a website update with the latest changes

  • automatic processing, like compiling changed source code

  • software integrity check: detect changed files and raise an alert

dfp is a suite of components which permit to detect changed files and to process them.

Link:

http://www.homepages.lu/pu/dfp.html

Rick Zantow 16 years, 9 months ago  # | flag

How about multiple backup versions? I found the PyRobocopier very useful, but my favorite client (my wife) wanted to have multiple versions backed up. I added a __backver method and some supporting code. The idea is that if a file has already been backed up, the old version is copied to a generation-numbered file; if that generation already exists, it is itself copied to a new backup and so on. A 'maxvers' variable determines the oldest permitted generation (I use 3 in my code, which allows the current file and three past generations all to be stored in the backup directory). Not everyone would want this feature, so I also added a -v command-line switch that must be specified if versions are to be generated.

The versioned files are readily identifiable in the directory; they are named with this format: '%s%d_%s' % (self.backstr, vernum, original_name) ... so that 'abc.jpg' is backed up under the name '_bak1_abc.jpg', etc. The highest 'vernum' digit is the oldest backup. When the limit is reached in a directory, no new files are created, but the generation-shuffling still takes place.

If anyone else happens to be interested, here is a WinXP fc between the original (labeled pyRobocopierPy_orig.txt) and the altered version (labeled PYROBOCOPIER.PY):

Comparing files pyRobocopierPy_orig.txt and PYROBOCOPIER.PY

***** pyRobocopierPy_orig.txt
   36:  Additional Options:\n
   37:  \t-p, --purge       - Purge files when synchronizing (does not purge by default).
***** PYROBOCOPIER.PY
   41:  Additional Options:\n
   42:  \t-v, --versions    - Create backup versions in same directories
   43:  \t-p, --purge       - Purge files when synchronizing (does not purge by default).
*****

***** pyRobocopierPy_orig.txt
   60:
   61:          self.__copyfiles = True
***** PYROBOCOPIER.PY
   66:
   67:          # don't do version backups unless specified
   68:          self.__versions = False
   69:          # depth of version backups
   70:          self.maxvers = 3
   71:          # backup copy flag string
   72:          self.backstr = '_bak'
   73:
   74:          self.__copyfiles = True
*****

***** pyRobocopierPy_orig.txt
   92:
   93:          shortargs = "supncm"
   94:          longargs = ["synchronize=", "update=", "purge=", "nodirection=", "create=", "modtime="]
   95:
***** PYROBOCOPIER.PY
  105:
  106:          shortargs = "supncmv"
  107:          longargs = ["synchronize=", "update=", "purge=", "nodirection=", "create=", "modtime=","versions="]
  108:
*****

(comment continued...)

Rick Zantow 16 years, 9 months ago  # | flag

(...continued from previous comment)

***** pyRobocopierPy_orig.txt
  128:                  self.__modtimeonly = True
  129:              else:
***** PYROBOCOPIER.PY
  141:                  self.__modtimeonly = True
  142:              elif option.lower() in ('-v', '--versions'):
  143:                  self.__versions = True
  144:              else:
*****

***** pyRobocopierPy_orig.txt
  216:                          print 'Copying tree', fulld2
  217:                          shutil.copytree(fulld1, fulld2)
***** PYROBOCOPIER.PY
  231:                          print 'Copying tree', fulld2
  232:                          if self.__versions:
  233:                              self.__backver(dir2,f1)
  234:                          shutil.copytree(fulld1, fulld2)
*****

***** pyRobocopierPy_orig.txt
  244:                  self.__dowork(fulld1, fulld2, copyfunc, updatefunc)
  245:
  246:
***** PYROBOCOPIER.PY
  261:                  self.__dowork(fulld1, fulld2, copyfunc, updatefunc)
  262:
  263:      # Addendum by RZ 20051221
  264:      # The idea here is to make backup versions of existing files, to a
  265:      # depth specified by self.maxvers. The backup version names will be
  266:      # generated by prepending a string (self.backstr) and an int, which
  267:      # increases with each level, separated by underscores. As written,
  268:      # this means _bak1_xxx.xxx, _bak2_..., etc. The __backver method is
  269:      # invoked for copies and updates if __versions == True.
  270:
  271:      def __backver(self,dir,fname):
  272:          tfname = os.path.join(dir, fname)
  273:          # if the file already exists, make backup version(s)
  274:          if os.path.isfile(tfname):
  275:              vnames = [tfname]
  276:              for v in range(self.maxvers):
  277:                  tvname = os.path.join(dir,'%s%d_%s' % (self.backstr,v+1,fname))
  278:                  try:
  279:                      if not os.path.isfile(tvname):
  280:                          xname = vnames.pop()
  281:                          while xname:
  282:                              shutil.copy(xname,tvname)
  283:                              print xname,tvname
  284:                              tvname,xname = xname,vnames.pop()
  285:                      else:
  286:                          vnames.append(tvname)
  287:                  except IndexError:
  288:                      break
  289:              else:
  290:                  for v in range(self.maxvers,0,-1):
  291:                      toname = vnames[v]
  292:                      fromname = vnames[v-1]
  293:                      shutil.copy(fromname,toname)
  294:
*****

(comment continued...)

Rick Zantow 16 years, 9 months ago  # | flag

(...continued from previous comment)

***** pyRobocopierPy_orig.txt
  269:                      sourcefile = os.path.join(dir1, filename)
  270:                      try:
***** PYROBOCOPIER.PY
  317:                      sourcefile = os.path.join(dir1, filename)
  318:                      if self.__versions:
  319:                          self.__backver(dir2,filename)
  320:
  321:                      try:
*****

***** pyRobocopierPy_orig.txt
  293:                      sourcefile = os.path.join(dir2, filename)
  294:
***** PYROBOCOPIER.PY
  344:                      sourcefile = os.path.join(dir2, filename)
  345:                      if self.__versions:
  346:                          self.__backver(dir1,filename)
  347:
*****

***** pyRobocopierPy_orig.txt
  307:          if file1 (source) is more recent than file2 (target) """
  308:
***** PYROBOCOPIER.PY
  360:          if file1 (source) is more recent than file2 (target) """
  361:          print filest1.st_mtime, filest2.st_mtime    # dz
  362:
*****

***** pyRobocopierPy_orig.txt
  345:                          try:
  346:                              shutil.copy(file1, file2)
***** PYROBOCOPIER.PY
  399:                          try:
  400:                              if self.__versions:
  401:                                  self.__backver(dir2,filename)
  402:                              shutil.copy(file1, file2)
*****

***** pyRobocopierPy_orig.txt
  370:                          try:
  371:                              shutil.copy(file2, file1)
***** PYROBOCOPIER.PY
  426:                          try:
  427:                              if self.__versions:
  428:                                  self.__backver(dir1,filename)
  429:                              shutil.copy(file2, file1)
*****
Chris Falck 12 years, 6 months ago  # | flag

Great script but does not handle long path/filenames on windows. For example:

Z:\Process Control Transfer\PCxWrapper\PCxWrapper release2 (SPOL R1)\Releases\9-02-2010\PCxWrapperRelease2SPOL.metadata.plugins\org.eclipse.tomcat\wsexplorer\WEB-INF\classes\com\ibm\etools\webservice\explorer\engine\transformer\

With a file name "CurrentNodeSelectionTransformer.class"

I realise this is a windows limitation, but programs like robocopy and xxcopy do get round this issue, although I have no idea how ;-)

Anybody got any thought?

Chris Falck 12 years, 6 months ago  # | flag

Sorry above path did not display properly

Z:\Process Control Transfer\PCxWrapper\PCxWrapper release2 (SPOL R1)\Releases\9-02-2010\PCxWrapperRelease2SPOL\.metadata\.plugins\org.eclipse.tomcat\wsexplorer\WEB-INF\classes\com\ibm\etools\webservice\explorer\engine\transformer

Matt Wilson 11 years, 10 months ago  # | flag

Great script - one suggestion though: when using shutil.copy(), the files which are copied into targetdir will have the current date/time in their attributes, not the sourcedir file date/time. To fix this, simply change instances of shutil.copy() to shutil.copy2() - which does copy file attributes by calling shutil.copystat().

One good development would be to put this in a multithreaded setup - this should speed it up somewhat for very large folders (e.g. I want to synchronise ~500 Gb).

sahin 11 years, 5 months ago  # | flag

thanks

Michael Kantor 11 years, 4 months ago  # | flag

Very nice! I noticed that when symlinks are encountered, it copies the target file instead of recreating the link. I think this modification to __copy will help:

replacing: shutil.copy(sourcefile, dir2)

with: if os.path.islink(sourcefile): os.symlink(os.readlink(sourcefile), os.path.join(dir2, filename)) else: shutil.copy(sourcefile, dir2)

and similarly for the 'target to source' block

Joshua Lansford 5 years, 6 months ago  # | flag

I made a couple fixes and changes I thought might help others. When updating individual files, the modification times are not copied. This has been changed. A couple more arguments have been added to the program --overwritechanges - Update target files even if modified later (but not the same) --fft - Assume time stamps are the same if within 2 seconds The fft option is needed if the source is on a different file format as the time stamp resolution may be different.

--- a/pyrobocopy.py 
+++ b/pyrobocopy.py 
@@ -43,6 +43,8 @@ Additional Options:\n
 \t                    target directory should exist.)
 \t-m, --modtime     - Only compare file's modification times for an update (By default,
 \t                    compares source file's creation time also).
+\t--overwritechanges - Update target files even if modified later
+\t    --fft         - Assume time stamps are the same if within 2 seconds
 """                   


@@ -68,6 +70,8 @@ class PyRobocopier:
         self.__maketarget =False
         self.__modtimeonly =False
         self.__mainfunc = None
+        self.__overwritechanges = False
+        self.__fft = False

         # stat vars
         self.__numdirs =0
@@ -127,6 +131,10 @@ class PyRobocopier:
                 self.__maketarget = True
             elif option.lower() in ('-m', '--modtime'):
                 self.__modtimeonly = True                            
+            elif option.lower() in ('--overwritechanges' ):
+                self.__overwritechanges = True
+            elif option.lower() in ('--fft' ):
+                self.__fft = True
             else:
                 if self.__dir1=='':
                     self.__dir1 = option
@@ -269,6 +277,7 @@ class PyRobocopier:
                     sourcefile = os.path.join(dir1, filename)
                     try:
                         shutil.copy(sourcefile, dir2)
+                        shutil.copystat(sourcefile, dir2)
                         self.__numfiles += 1
                     except (IOError, OSError), e:
                         print e
Joshua Lansford 5 years, 6 months ago  # | flag

(...continued patch)

@@ -294,6 +303,7 @@ class PyRobocopier:

                     try:
                         shutil.copy(sourcefile, dir1)
+                        shutil.copystat(sourcefile, dir1)
                         self.__numfiles += 1
                     except (IOError, OSError), e:
                         print e
@@ -302,12 +312,24 @@ class PyRobocopier:
             except Exception, e:
                 print 'Error copying  file', filename, e

-    def __cmptimestamps(self, filest1, filest2):
+    def __cmptimestamps(self, filest1, filest2, differenceOnly=False):
         """ Compare time stamps of two files and return True
         if file1 (source) is more recent than file2 (target) """

-        return ((filest1.st_mtime > filest2.st_mtime) or \
-                   (not self.__modtimeonly and (filest1.st_ctime > filest2.st_mtime)))
+        a_greater_mtime = (filest1.st_mtime > filest2.st_mtime)
+        b_greater_ctime = (filest1.st_ctime > filest2.st_mtime)
+        f_equal_mtime   = (filest1.st_mtime == filest2.st_mtime)
+
+        #give two seconds of time slop to account for different file
+        #systems time resolution
+        if self.__fft:
+            a_greater_mtime = (filest1.st_mtime > (filest2.st_mtime+2))
+            b_greater_ctime = (filest1.st_ctime > (filest2.st_mtime+2))
+            f_equal_mtime   =  abs(filest1.st_mtime - filest2.st_mtime)<2
+
+        return a_greater_mtime or \
+                   (not self.__modtimeonly and b_greater_ctime) or \
+                    (differenceOnly and not f_equal_mtime )

     def __update(self, filename, dir1, dir2):
         """ Private function for updating a file based on
Joshua Lansford 5 years, 6 months ago  # | flag

(continued again...)

@@ -336,7 +358,7 @@ class PyRobocopier:
                 # source file's modification time, or creation time. Sometimes
                 # it so happens that a file's creation time is newer than it's
                 # modification time! (Seen this on windows)
-                if self.__cmptimestamps( st1, st2 ):
+                if self.__cmptimestamps( st1, st2, differenceOnly=self.__overwritechanges ):
                     print 'Updating file ', file2 # source to target
                     try:
                         if self.__forcecopy:
@@ -344,6 +366,7 @@ class PyRobocopier:

                         try:
                             shutil.copy(file1, file2)
+                            shutil.copystat(file1, file2)
                             self.__numupdates += 1
                             return 0
                         except (IOError, OSError), e:
@@ -369,6 +392,7 @@ class PyRobocopier:

                         try:
                             shutil.copy(file2, file1)
+                            shutil.copystat(file2, file1)
                             self.__numupdates += 1
                             return 0
                         except (IOError, OSError), e: