Welcome, guest | Sign In | My Account | Store | Cart
# drive ghoster
# Author: c8
# Description: A program that will create a "ghost" of a given directory or drive on windows systems
# (easily adaptable to unix etc) to a given destination. This will consist of a copy of all folders and files
# with only names and extensions retained, not size.
# This allows browsing of a remote drive or network location when offline.

# sample input:
#   Automatic run, with prompts. ghoster2.py -s SOURCE -d DEST
#   Manual selection of 'l', will show list of changes. ghoster2.py -s SOURCE -d DEST -l
#   Manual selection of 'u', will commit changes. ghoster2.py -s SOURCE -d DEST -u


# imports
import os
import sys
import shutil
import msvcrt
import win32file
import string
import codecs
import argparse
from datetime import datetime

class Ghoster():

    def __init__(self):   
        self.enumerate_drives()
        self.parse_args()
        
        self.truncated = False
        self.ignore_list = ['System Volume Information']
        
        self.sanity_checks()
    

    # determine system drives
    def enumerate_drives(self):
        drives = []
        for i in string.ascii_lowercase:
            t = "%s:\\" % i
            if win32file.GetDriveType(t) == win32file.DRIVE_FIXED:
                drives.extend(t[0])

        self.drive_str = ', '.join(a.upper() for a in drives)            
               

    # arg parser
    def parse_args(self):
        parser = argparse.ArgumentParser(description='Create local 0Kb copy of remote drive or destination')
        parser.add_argument('-l','--log', action="store_true", default=False, dest="log", help='Log run of program')
        parser.add_argument('-u','--update', action="store_true", default=False, dest="update", help='Run and update changes')
        parser.add_argument('-d','--dest', dest="dest", help='Provide destination for ghost copy, for example one \
        of the following connected system drives:\n' + self.drive_str, required = True)
        parser.add_argument('-s','--source', dest="source", help='Provide source location', required = True)
        parser.add_argument('-o','--output', dest="output", help='Log in provided file')

        args = vars(parser.parse_args())

        # require only one (if provided) mode option
        if (args['log'] or args['update']):
            if (args['log'] and args['update']):
                parser.error('Please enter only one action, add --log OR --update')
                exit()
            else:
                self.manual = 'log' if args['log'] else 'update'
        else:
            self.manual = ''
          
            
        # parse destination option
        ghost_drive = args['source'] + ':/' if len(args['source']) == 1 else args['source']
        save_dir = args['dest'] if args['dest'][-1] == '\\' else args['dest'] + '\\'
        
        self.args = args
        self.ghost_drive = unicode(ghost_drive)
        self.save_dir = unicode(save_dir + ghost_drive[0] + '\\')

        # parse output option
        if args['output']:
            self.logging = True
            self.logging_location = args['output']
        else:
            self.logging = False
  
    # safety checks
    def sanity_checks(self):
        # if run on platform that isn't windows
        if os.name != 'nt':
            print 'This program is currently only windows compatible. Sorry!'
            exit()
        
        # if drive not attached
        if not os.path.exists(self.ghost_drive):
            print self.ghost_drive + ' not attached'
            exit()
            
        # disallow c:\ drive
        if self.args['source'] == 'c':
            print "You probably don't want to be ghosting c:/ drive..."
            exit()
            
        # prevent recursive copying
        if self.normalise_backslash(self.args['source']) in self.normalise_backslash(self.args['dest']):
            print 'destination is part of the ghost - please see recursion in a dictionary'
            exit()
            
        # prompt if dest is root
        #if save_dir:    
        if len(self.args['dest']) == 3:
            print "Are you sure that you want to copy to the root of the drive? y/n"
            answer = msvcrt.getch()
            if answer.lower() == 'y':
                pass
            else:
                exit()
  
############################### General Purpose #############################################
    def normalise_backslash(self, inp):
        return inp.replace('/','\\')
      
    # insert commas in numbers (ints and floats)
    def numbers_with_commas(self, number, decimals = 2):
        tail = '.' + str(number).rpartition('.')[-1] if '.' in str(number) else ''
        temp = ''
        for idx, i in enumerate(str(int(number))[::-1]):
            if idx % 3 == 0 and idx != 0 and i != '-':
                temp += ','
            temp += i
        return temp[::-1] + str(tail)[:decimals + 1]  

    # return size in Gb/Mb/Kb/b appropriately as string. 
    # Takes size in bytes.
    def size_normalise(self, size, magnitude = None):
        sizes = {'b':1, 'kb':1024, 'mb':1024**2, 'gb':1024**3, 'tb':1024**4}
        if magnitude:
            magnitude = magnitude.lower()
            if magnitude not in sizes:
                print 'no such size possible, defaulting to Mb'
                magnitude = 'mb'
            size_type = magnitude              
            new_size = float(size) / sizes[magnitude]
        else:
            if size < 1024:
                size_type = 'b'
                new_size = float(size)
            elif size < 1024**2:
                size_type = 'kb'
                new_size = float(size) / 1024
            elif size < 1024**3:
                size_type = 'mb'
                new_size = float(size) / (1024**2)
            elif size < 1024**4:
                size_type = 'gb'
                new_size = float(size) / (1024**3)                
            # Current max of Tb
            else:
                size_type = 'tb'
                new_size = float(size) / (1024**4)
        
        return self.numbers_with_commas(new_size), size_type.title()

##############################################################################################        
        
    # calculate disc size and test for overly large file names    
    def data_enumerate(self, path):
        print "\nCalculating disk size"
        truncate = []
        files = 0
        dirs = 0
        file_size = 0
        prev_dir = ''
        self.trunc_dir_count = 0
        
        for dirname, dirnames, filenames in os.walk(path):
            if dirname.rpartition('\\')[-1] not in self.ignore_list:
                dirs += len(dirnames)
                files += len(filenames)
                
                for a_file in filenames: 
                    file_size += os.path.getsize(os.path.join(dirname,a_file))
                    if len(os.path.join(self.save_dir + dirname[3:], a_file)) > 255:
                        if self.save_dir + dirname[3:] == prev_dir:
                            truncate.append('\t%s' % a_file)
                        else:    
                            self.trunc_dir_count += 1
                            truncate.append(self.save_dir + dirname[3:])
                            truncate.append('\t%s' % a_file)
                            prev_dir = self.save_dir + dirname[3:]
                        
        
        print "%s files and %s directories in dir %s with size %s%s" % ((self.numbers_with_commas(files),) + (self.numbers_with_commas(dirs),) + (self.ghost_drive,) + self.size_normalise(file_size))
        print

        if truncate:
            self.print_limiter(truncate, max_lines = 5, truncate = True)
            truncated = True
        else:
            truncated = False
            
        return files, dirs, truncated

    def update(self, files, dirs, commit_changes = False):  
        changes = []
        completed = {'files':0, 'dirs':0}
        percentage = -5
        
        # else if dest not exist, create
        if commit_changes:        
            if not os.path.exists(self.save_dir):
                os.makedirs(self.save_dir)    

        print '[',
        
        # recurse drive: remove files/dir if shouldn't exist, create if should and doesn't
        for dirname, dirnames, filenames in os.walk(self.ghost_drive):
            if dirname.rpartition('\\')[-1] not in self.ignore_list:
                completed['dirs'] += 1
                trunc_dirname = dirname[3:]

                # for each dir...
                if os.path.exists(self.save_dir + trunc_dirname):
                    dirnames_a = os.walk(self.save_dir + trunc_dirname).next()[1]
                    filenames_a = os.walk(self.save_dir + trunc_dirname).next()[2]
                    
                    # if there diff number of files in save dir and source
                    if filenames_a != filenames:
                        # remove old files from save location since deleted 
                        for filename_a in filenames_a:
                            if filename_a not in filenames:
                                path = os.path.join(self.save_dir + trunc_dirname,filename_a)
                                if commit_changes:
                                    os.remove(path)
                                changes.extend(['DELETE: %s' % path])
                    
                        # create any new files in destination            
                        for filename in filenames:
                            if filename not in filenames_a:
                                completed['files'] += 1
                                path = self.save_dir + trunc_dirname
                                        
                                if not os.path.exists(os.path.join(path, filename)):
                                    if commit_changes:
                                        temp = open(os.path.join(path, filename),'w')
                                        temp.close()
                                    changes.extend(['ADD: %s' % os.path.join(path, filename)])
                    
                    # if identical add to completed files
                    else:
                        completed['files'] += len(filenames)
                    
                    # if diff number of dir in save dir and source
                    if dirnames_a != dirnames:
                        for a_dir in dirnames_a:
                            if a_dir not in dirnames:
                                path = os.path.join(self.save_dir,a_dir)
                                if commit_changes:
                                    shutil.rmtree(path)            
                                changes.extend(['ADD: %s' % path])
                                    
                # else folder doesn't exist => create                    
                else:
                    if commit_changes:
                        os.makedirs(self.save_dir + trunc_dirname)
                    changes.extend(['ADD: %s' % self.save_dir + trunc_dirname])
                    
                    # create any new files in destination            
                    for filename in filenames:
                        completed['files'] += 1
                        path = self.save_dir + trunc_dirname
                                
                        if not os.path.exists(os.path.join(path, filename)):
                            if commit_changes:
                                temp = open(os.path.join(path, filename),'w')
                                temp.close()
                            changes.extend(['ADD: %s' % os.path.join(path, filename)])
                                
                # time remaining
                if (float(completed['files']) / files * 100) >= percentage + 5:
                    print ' %d%%' % ((int(float(completed['files']) / files * 100)) / 5 * 5),
                    percentage = (float(completed['files']) / files * 100) /5 * 5

        if percentage != 100:
            print ' 100%',
        print ']'
        
        if commit_changes and changes:
            print '\nAll changes commited.\n'
        elif commit_changes and not changes:
            print '\nThere were no changes to commit.\n'
        elif not changes:
            print '\nThere were no changes.\n'

        return changes

    def logger_printer(self, output, truncate = False):
        if self.logging:
            if truncate:
                print output,
            self.log.write(output)
        else:
            print output,            
                
    # (only) the following can be logged as results may need to be viewed seperately    
    def print_limiter(self, source, commit_changes = False, max_lines = 15, truncate = False, increment = 20):
        if not source:
            return
            
        lines = len(source)
        current_line = 0

        # open log file
        if self.logging:
            self.log = codecs.open(os.path.join(os.path.abspath(os.path.dirname(sys.argv[0])),self.logging_location) ,'w', "utf-8")  
            self.log.write(datetime.now().strftime('%d-%m-%Y, %H:%M:%S') + '\n')

        if truncate:
            self.logger_printer('\nThe following files will have their name truncated:\n\n', truncate)
        elif commit_changes:
            self.logger_printer('\nThe following changes have been commited:\n\n')
        else:
            self.logger_printer('\nThe following changes will occur:\n\n')

        if lines > max_lines:            
            print "There are %d files and dirs. Do you want to view them all? press 'y' or 'n'" % (lines - self.trunc_dir_count)
            ans = msvcrt.getch()
            if ans == 'y':
                while(current_line < lines):
                    if current_line + increment > lines:
                        upper_limit = lines
                    else:
                        upper_limit = current_line + increment
                    for i in range(current_line, upper_limit):
                        self.logger_printer('%s\n' % source[i], truncate)

                    if i != upper_limit - 1:    
                        print "Press 's' to skip the rest, enter to continue"
                        ans = msvcrt.getch()
                        if ans == 's':
                            break
                    current_line += increment
            elif ans == 'n':
                pass
            else:
                pass
        else:
            for i in source:
                self.logger_printer('%s\n' % i, truncate)
        # close log file        
        if self.logging:
            self.log.close()    


    def auto_run(self, files, dirs):
        while(True):
            print '\nTo show a list of files and dirs to be changed press "l"'
            print 'To update the destination press "u"'
            print 'To quit type q\n'
            
            command = msvcrt.getch()

            if command.lower() == 'l':
                self.run_type(files, dirs, commit_changes = False)
                    
            elif command.lower() == 'u':
                self.update(files, dirs, commit_changes = True)
                break
            elif command.lower() == 'q':
                break
            else:
                pass
    
    def manual_run(self, files, dirs):
        if self.manual == 'update':
            self.run_type(files, dirs, commit_changes = True)
        elif self.manual == 'log':
            self.run_type(files, dirs, commit_changes = False)
    
    def run_type(self, files, dirs, commit_changes):
        changes = self.update(files, dirs, commit_changes)
        self.print_limiter(changes, commit_changes) 

    def execute(self):        
        try:
            # Count files and dirs
            files, dirs, truncated = self.data_enumerate(self.ghost_drive)

            if truncated:
                print "\nPlease correct directory names to prevent file name truncation and rerun"
                exit()
               
            if self.manual:
                self.manual_run(files, dirs)
            else:
                self.auto_run(files, dirs)
            
            print "\nGood Bye"        
        except WindowsError:
            print "WindowsError occured - could the source or destination have been removed mid process?"
            exit()
def main():
    ghoster = Ghoster()
    ghoster.execute()
    
# take it away sam...
if __name__ == '__main__':
    main()

History