# drive ghoster # Author: c8 # Description: A program that will create a "ghost" of a given directory or drive on windows systems # (easily adaptable to unix etc) to a given destination. This will consist of a copy of all folders and files # with only names and extensions retained, not size. # This allows browsing of a remote drive or network location when offline. # sample input: # Automatic run, with prompts. ghoster2.py -s SOURCE -d DEST # Manual selection of 'l', will show list of changes. ghoster2.py -s SOURCE -d DEST -l # Manual selection of 'u', will commit changes. ghoster2.py -s SOURCE -d DEST -u # imports import os import sys import shutil import msvcrt import win32file import string import codecs import argparse from datetime import datetime class Ghoster(): def __init__(self): self.enumerate_drives() self.parse_args() self.truncated = False self.ignore_list = ['System Volume Information'] self.sanity_checks() # determine system drives def enumerate_drives(self): drives = [] for i in string.ascii_lowercase: t = "%s:\\" % i if win32file.GetDriveType(t) == win32file.DRIVE_FIXED: drives.extend(t[0]) self.drive_str = ', '.join(a.upper() for a in drives) # arg parser def parse_args(self): parser = argparse.ArgumentParser(description='Create local 0Kb copy of remote drive or destination') parser.add_argument('-l','--log', action="store_true", default=False, dest="log", help='Log run of program') parser.add_argument('-u','--update', action="store_true", default=False, dest="update", help='Run and update changes') parser.add_argument('-d','--dest', dest="dest", help='Provide destination for ghost copy, for example one \ of the following connected system drives:\n' + self.drive_str, required = True) parser.add_argument('-s','--source', dest="source", help='Provide source location', required = True) parser.add_argument('-o','--output', dest="output", help='Log in provided file') args = vars(parser.parse_args()) # require only one (if provided) mode option if (args['log'] or args['update']): if (args['log'] and args['update']): parser.error('Please enter only one action, add --log OR --update') exit() else: self.manual = 'log' if args['log'] else 'update' else: self.manual = '' # parse destination option ghost_drive = args['source'] + ':/' if len(args['source']) == 1 else args['source'] save_dir = args['dest'] if args['dest'][-1] == '\\' else args['dest'] + '\\' self.args = args self.ghost_drive = unicode(ghost_drive) self.save_dir = unicode(save_dir + ghost_drive[0] + '\\') # parse output option if args['output']: self.logging = True self.logging_location = args['output'] else: self.logging = False # safety checks def sanity_checks(self): # if run on platform that isn't windows if os.name != 'nt': print 'This program is currently only windows compatible. Sorry!' exit() # if drive not attached if not os.path.exists(self.ghost_drive): print self.ghost_drive + ' not attached' exit() # disallow c:\ drive if self.args['source'] == 'c': print "You probably don't want to be ghosting c:/ drive..." exit() # prevent recursive copying if self.normalise_backslash(self.args['source']) in self.normalise_backslash(self.args['dest']): print 'destination is part of the ghost - please see recursion in a dictionary' exit() # prompt if dest is root #if save_dir: if len(self.args['dest']) == 3: print "Are you sure that you want to copy to the root of the drive? y/n" answer = msvcrt.getch() if answer.lower() == 'y': pass else: exit() ############################### General Purpose ############################################# def normalise_backslash(self, inp): return inp.replace('/','\\') # insert commas in numbers (ints and floats) def numbers_with_commas(self, number, decimals = 2): tail = '.' + str(number).rpartition('.')[-1] if '.' in str(number) else '' temp = '' for idx, i in enumerate(str(int(number))[::-1]): if idx % 3 == 0 and idx != 0 and i != '-': temp += ',' temp += i return temp[::-1] + str(tail)[:decimals + 1] # return size in Gb/Mb/Kb/b appropriately as string. # Takes size in bytes. def size_normalise(self, size, magnitude = None): sizes = {'b':1, 'kb':1024, 'mb':1024**2, 'gb':1024**3, 'tb':1024**4} if magnitude: magnitude = magnitude.lower() if magnitude not in sizes: print 'no such size possible, defaulting to Mb' magnitude = 'mb' size_type = magnitude new_size = float(size) / sizes[magnitude] else: if size < 1024: size_type = 'b' new_size = float(size) elif size < 1024**2: size_type = 'kb' new_size = float(size) / 1024 elif size < 1024**3: size_type = 'mb' new_size = float(size) / (1024**2) elif size < 1024**4: size_type = 'gb' new_size = float(size) / (1024**3) # Current max of Tb else: size_type = 'tb' new_size = float(size) / (1024**4) return self.numbers_with_commas(new_size), size_type.title() ############################################################################################## # calculate disc size and test for overly large file names def data_enumerate(self, path): print "\nCalculating disk size" truncate = [] files = 0 dirs = 0 file_size = 0 prev_dir = '' self.trunc_dir_count = 0 for dirname, dirnames, filenames in os.walk(path): if dirname.rpartition('\\')[-1] not in self.ignore_list: dirs += len(dirnames) files += len(filenames) for a_file in filenames: file_size += os.path.getsize(os.path.join(dirname,a_file)) if len(os.path.join(self.save_dir + dirname[3:], a_file)) > 255: if self.save_dir + dirname[3:] == prev_dir: truncate.append('\t%s' % a_file) else: self.trunc_dir_count += 1 truncate.append(self.save_dir + dirname[3:]) truncate.append('\t%s' % a_file) prev_dir = self.save_dir + dirname[3:] print "%s files and %s directories in dir %s with size %s%s" % ((self.numbers_with_commas(files),) + (self.numbers_with_commas(dirs),) + (self.ghost_drive,) + self.size_normalise(file_size)) print if truncate: self.print_limiter(truncate, max_lines = 5, truncate = True) truncated = True else: truncated = False return files, dirs, truncated def update(self, files, dirs, commit_changes = False): changes = [] completed = {'files':0, 'dirs':0} percentage = -5 # else if dest not exist, create if commit_changes: if not os.path.exists(self.save_dir): os.makedirs(self.save_dir) print '[', # recurse drive: remove files/dir if shouldn't exist, create if should and doesn't for dirname, dirnames, filenames in os.walk(self.ghost_drive): if dirname.rpartition('\\')[-1] not in self.ignore_list: completed['dirs'] += 1 trunc_dirname = dirname[3:] # for each dir... if os.path.exists(self.save_dir + trunc_dirname): dirnames_a = os.walk(self.save_dir + trunc_dirname).next()[1] filenames_a = os.walk(self.save_dir + trunc_dirname).next()[2] # if there diff number of files in save dir and source if filenames_a != filenames: # remove old files from save location since deleted for filename_a in filenames_a: if filename_a not in filenames: path = os.path.join(self.save_dir + trunc_dirname,filename_a) if commit_changes: os.remove(path) changes.extend(['DELETE: %s' % path]) # create any new files in destination for filename in filenames: if filename not in filenames_a: completed['files'] += 1 path = self.save_dir + trunc_dirname if not os.path.exists(os.path.join(path, filename)): if commit_changes: temp = open(os.path.join(path, filename),'w') temp.close() changes.extend(['ADD: %s' % os.path.join(path, filename)]) # if identical add to completed files else: completed['files'] += len(filenames) # if diff number of dir in save dir and source if dirnames_a != dirnames: for a_dir in dirnames_a: if a_dir not in dirnames: path = os.path.join(self.save_dir,a_dir) if commit_changes: shutil.rmtree(path) changes.extend(['ADD: %s' % path]) # else folder doesn't exist => create else: if commit_changes: os.makedirs(self.save_dir + trunc_dirname) changes.extend(['ADD: %s' % self.save_dir + trunc_dirname]) # create any new files in destination for filename in filenames: completed['files'] += 1 path = self.save_dir + trunc_dirname if not os.path.exists(os.path.join(path, filename)): if commit_changes: temp = open(os.path.join(path, filename),'w') temp.close() changes.extend(['ADD: %s' % os.path.join(path, filename)]) # time remaining if (float(completed['files']) / files * 100) >= percentage + 5: print ' %d%%' % ((int(float(completed['files']) / files * 100)) / 5 * 5), percentage = (float(completed['files']) / files * 100) /5 * 5 if percentage != 100: print ' 100%', print ']' if commit_changes and changes: print '\nAll changes commited.\n' elif commit_changes and not changes: print '\nThere were no changes to commit.\n' elif not changes: print '\nThere were no changes.\n' return changes def logger_printer(self, output, truncate = False): if self.logging: if truncate: print output, self.log.write(output) else: print output, # (only) the following can be logged as results may need to be viewed seperately def print_limiter(self, source, commit_changes = False, max_lines = 15, truncate = False, increment = 20): if not source: return lines = len(source) current_line = 0 # open log file if self.logging: self.log = codecs.open(os.path.join(os.path.abspath(os.path.dirname(sys.argv[0])),self.logging_location) ,'w', "utf-8") self.log.write(datetime.now().strftime('%d-%m-%Y, %H:%M:%S') + '\n') if truncate: self.logger_printer('\nThe following files will have their name truncated:\n\n', truncate) elif commit_changes: self.logger_printer('\nThe following changes have been commited:\n\n') else: self.logger_printer('\nThe following changes will occur:\n\n') if lines > max_lines: print "There are %d files and dirs. Do you want to view them all? press 'y' or 'n'" % (lines - self.trunc_dir_count) ans = msvcrt.getch() if ans == 'y': while(current_line < lines): if current_line + increment > lines: upper_limit = lines else: upper_limit = current_line + increment for i in range(current_line, upper_limit): self.logger_printer('%s\n' % source[i], truncate) if i != upper_limit - 1: print "Press 's' to skip the rest, enter to continue" ans = msvcrt.getch() if ans == 's': break current_line += increment elif ans == 'n': pass else: pass else: for i in source: self.logger_printer('%s\n' % i, truncate) # close log file if self.logging: self.log.close() def auto_run(self, files, dirs): while(True): print '\nTo show a list of files and dirs to be changed press "l"' print 'To update the destination press "u"' print 'To quit type q\n' command = msvcrt.getch() if command.lower() == 'l': self.run_type(files, dirs, commit_changes = False) elif command.lower() == 'u': self.update(files, dirs, commit_changes = True) break elif command.lower() == 'q': break else: pass def manual_run(self, files, dirs): if self.manual == 'update': self.run_type(files, dirs, commit_changes = True) elif self.manual == 'log': self.run_type(files, dirs, commit_changes = False) def run_type(self, files, dirs, commit_changes): changes = self.update(files, dirs, commit_changes) self.print_limiter(changes, commit_changes) def execute(self): try: # Count files and dirs files, dirs, truncated = self.data_enumerate(self.ghost_drive) if truncated: print "\nPlease correct directory names to prevent file name truncation and rerun" exit() if self.manual: self.manual_run(files, dirs) else: self.auto_run(files, dirs) print "\nGood Bye" except WindowsError: print "WindowsError occured - could the source or destination have been removed mid process?" exit() def main(): ghoster = Ghoster() ghoster.execute() # take it away sam... if __name__ == '__main__': main()