#!/usr/local/bin/python -O # read faulty # for each of its arguments, it tries to copy the faulty file to the cwd import sys, os, errno import collections import cPickle as Pickle import gzip # use the correct errno reported per platform if sys.platform == 'win32': read_failed= lambda exc: exc.errno == errno.EACCES else: read_failed= lambda exc: exc.errno == errno.EIO class Chunk(object): "A description of a data chunk to be read" UNIT= 2048 BIG_UNIT= 32*UNIT def __init__(self, offset, size): self.offset= offset self.size= size def next_attempt(self): "Return sequence of chunks to retry" if self.size == self.UNIT: # a single sector failed yield self # try again in the next phase else: for ix in xrange(self.offset, self.offset+self.size, self.UNIT): yield self.__class__(ix, self.UNIT) def __getstate__(self): return self.offset, self.size def __setstate__(self, tpl): self.offset, self.size= tpl def description(self): "Return textual description of chunk" unit1= self.offset / self.UNIT unit2= (self.offset+self.size) / self.UNIT - 1 if unit1 == unit2: return "%dMiB:%d" % (self.offset//1048576, unit1) else: return "%dMiB:%d-%d" % (self.offset//1048576, unit1, unit2) class SuspectFile(object): "A file to be copied" destination= "." def __init__(self, filename, destination=None): # phase 1 contains big chunks to be read # phase 2 contains sectors to re-read # phase 3 contains chunks to store as completely failed self.filename= filename if destination is not None: self.destination= destination self.state_filename= os.path.basename(filename) + ".state" self.phase3= collections.deque() try: self.read_last_attempt_state() except IOError: # state file does not exist self.phase1= self.chunks_to_read() self.phase2= collections.deque() def chunks_to_read(self): result= collections.deque() filesize= os.path.getsize(self.filename) for offset in xrange(0, filesize, Chunk.BIG_UNIT): result.append(Chunk( offset, filesize-offset>Chunk.BIG_UNIT and Chunk.BIG_UNIT or filesize-offset)) return result def record_state(self): if self.phase1 or self.phase2 or self.phase3: fpr= gzip.open(self.state_filename, "wb") Pickle.dump(self.phase1, fpr, -1) dummy_deque= collections.deque() dummy_deque.extend(self.phase2) dummy_deque.extend(self.phase3) Pickle.dump(dummy_deque, fpr, -1) fpr.close() else: try: os.remove(self.state_filename) except OSError: pass # ignore non-existant filename @staticmethod def copy_chunk(fpi, fpo, chunk): fpi.seek(chunk.offset) data= fpi.read(chunk.size) if data: fpo.seek(chunk.offset) fpo.write(data) return data def read_last_attempt_state(self): fpr= gzip.open(self.state_filename, "rb") self.phase1= Pickle.load(fpr) self.phase2= Pickle.load(fpr) # the report_* methods are to be overloaded def report_attempt(self, chunk): "This is to be overloaded with a way to report progress" pass def report_success(self, chunk): pass def report_failure(self, chunk): pass def phase_copy(self, fpi, fpo, phase_in, phase_out): "Copy chunks from fpi to fpo storing failures in phase_out" while phase_in: chunk= phase_in.popleft() try: # to make sure this chunk is not skipped, eg by KeyboardInterrupt self.report_attempt(chunk) try: self.copy_chunk(fpi, fpo, chunk) except IOError, exc: if read_failed(exc): # the way windows reports failure for new_chunk in chunk.next_attempt(): phase_out.append(new_chunk) chunk= None self.report_failure(chunk) else: raise else: # report success, but first make sure chunk is None _, chunk= chunk, None self.report_success(_) finally: if chunk: phase_in.appendleft(chunk) def copy(self): "Copy the file to the local directory" fpi= open(self.filename, "rb") fpo_filename= os.path.join( self.destination, os.path.basename(self.filename)) try: fpo= open(fpo_filename, "r+b") except IOError, exc: if exc.errno == errno.ENOENT: fpo= open(fpo_filename, "wb") else: raise try: self.phase_copy(fpi, fpo, self.phase1, self.phase2) self.phase_copy(fpi, fpo, self.phase2, self.phase3) finally: self.record_state() if __name__=="__main__": class SuspectFileCmd(SuspectFile): def report_attempt(self, chunk): sys.stderr.write(chunk.description()) def report_success(self, chunk): sys.stderr.write("\r") def report_failure(self, chunk): sys.stderr.write(" failed\n") def record_state(self): super(SuspectFileCmd, self).record_state() sys.stderr.write("** remaining %d bytes in fast reads\n" % sum(chunk.size for chunk in self.phase1)) sys.stderr.write("and %d sectors in re-reads\n" % (len(self.phase2) + len(self.phase3)) ) for filename in sys.argv[1:]: faulty_file= SuspectFileCmd(filename) sys.stderr.write("copying %s\n" % filename) faulty_file.copy() sys.stderr.write("\n")