Files existing on damaged media (eg old CD-ROMs) are not completely lost. Often, readers will read sectors unreadable in other readers. The "trick" is to merge the successful reads from several devices.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 | #!/usr/local/bin/python -O
# read faulty
# for each of its arguments, it tries to copy the faulty file to the cwd
import sys, os, errno
import collections
import cPickle as Pickle
import gzip
# use the correct errno reported per platform
if sys.platform == 'win32':
read_failed= lambda exc: exc.errno == errno.EACCES
else:
read_failed= lambda exc: exc.errno == errno.EIO
class Chunk(object):
"A description of a data chunk to be read"
UNIT= 2048
BIG_UNIT= 32*UNIT
def __init__(self, offset, size):
self.offset= offset
self.size= size
def next_attempt(self):
"Return sequence of chunks to retry"
if self.size == self.UNIT: # a single sector failed
yield self # try again in the next phase
else:
for ix in xrange(self.offset, self.offset+self.size, self.UNIT):
yield self.__class__(ix, self.UNIT)
def __getstate__(self):
return self.offset, self.size
def __setstate__(self, tpl):
self.offset, self.size= tpl
def description(self):
"Return textual description of chunk"
unit1= self.offset / self.UNIT
unit2= (self.offset+self.size) / self.UNIT - 1
if unit1 == unit2:
return "%dMiB:%d" % (self.offset//1048576, unit1)
else:
return "%dMiB:%d-%d" % (self.offset//1048576, unit1, unit2)
class SuspectFile(object):
"A file to be copied"
destination= "."
def __init__(self, filename, destination=None):
# phase 1 contains big chunks to be read
# phase 2 contains sectors to re-read
# phase 3 contains chunks to store as completely failed
self.filename= filename
if destination is not None:
self.destination= destination
self.state_filename= os.path.basename(filename) + ".state"
self.phase3= collections.deque()
try:
self.read_last_attempt_state()
except IOError: # state file does not exist
self.phase1= self.chunks_to_read()
self.phase2= collections.deque()
def chunks_to_read(self):
result= collections.deque()
filesize= os.path.getsize(self.filename)
for offset in xrange(0, filesize, Chunk.BIG_UNIT):
result.append(Chunk(
offset,
filesize-offset>Chunk.BIG_UNIT
and Chunk.BIG_UNIT
or filesize-offset))
return result
def record_state(self):
if self.phase1 or self.phase2 or self.phase3:
fpr= gzip.open(self.state_filename, "wb")
Pickle.dump(self.phase1, fpr, -1)
dummy_deque= collections.deque()
dummy_deque.extend(self.phase2)
dummy_deque.extend(self.phase3)
Pickle.dump(dummy_deque, fpr, -1)
fpr.close()
else:
try: os.remove(self.state_filename)
except OSError: pass # ignore non-existant filename
@staticmethod
def copy_chunk(fpi, fpo, chunk):
fpi.seek(chunk.offset)
data= fpi.read(chunk.size)
if data:
fpo.seek(chunk.offset)
fpo.write(data)
return data
def read_last_attempt_state(self):
fpr= gzip.open(self.state_filename, "rb")
self.phase1= Pickle.load(fpr)
self.phase2= Pickle.load(fpr)
# the report_* methods are to be overloaded
def report_attempt(self, chunk):
"This is to be overloaded with a way to report progress"
pass
def report_success(self, chunk):
pass
def report_failure(self, chunk):
pass
def phase_copy(self, fpi, fpo, phase_in, phase_out):
"Copy chunks from fpi to fpo storing failures in phase_out"
while phase_in:
chunk= phase_in.popleft()
try: # to make sure this chunk is not skipped, eg by KeyboardInterrupt
self.report_attempt(chunk)
try:
self.copy_chunk(fpi, fpo, chunk)
except IOError, exc:
if read_failed(exc): # the way windows reports failure
for new_chunk in chunk.next_attempt():
phase_out.append(new_chunk)
chunk= None
self.report_failure(chunk)
else:
raise
else:
# report success, but first make sure chunk is None
_, chunk= chunk, None
self.report_success(_)
finally:
if chunk: phase_in.appendleft(chunk)
def copy(self):
"Copy the file to the local directory"
fpi= open(self.filename, "rb")
fpo_filename= os.path.join(
self.destination, os.path.basename(self.filename))
try:
fpo= open(fpo_filename, "r+b")
except IOError, exc:
if exc.errno == errno.ENOENT:
fpo= open(fpo_filename, "wb")
else:
raise
try:
self.phase_copy(fpi, fpo, self.phase1, self.phase2)
self.phase_copy(fpi, fpo, self.phase2, self.phase3)
finally:
self.record_state()
if __name__=="__main__":
class SuspectFileCmd(SuspectFile):
def report_attempt(self, chunk):
sys.stderr.write(chunk.description())
def report_success(self, chunk):
sys.stderr.write("\r")
def report_failure(self, chunk):
sys.stderr.write(" failed\n")
def record_state(self):
super(SuspectFileCmd, self).record_state()
sys.stderr.write("** remaining %d bytes in fast reads\n" % sum(chunk.size for chunk in self.phase1))
sys.stderr.write("and %d sectors in re-reads\n" % (len(self.phase2) + len(self.phase3)) )
for filename in sys.argv[1:]:
faulty_file= SuspectFileCmd(filename)
sys.stderr.write("copying %s\n" % filename)
faulty_file.copy()
sys.stderr.write("\n")
|
This recipe can be used for recovering files from damaged media (CD-ROMs and DVDs are specifically addressed, because of the UNIT=2048, which is the byte-size of a sector on these media).
This recipe attempts to copy batches of sectors from the faulty file to the current directory; if a batch (typically 32) read fails, these sectors will be re-read later one-by-one. If these sectors still can't be read, the program will exit storing a file (named as the original filename with ".state" appended) containing the unread sectors. Put the disk in another device, and run again. Only the unread sectors will be retried (as long as the file and its .state exist both in the current directory), and you have a chance of reading more data.
At any point you can stop the program with Control-C. Wait a little until the current read fails, and the program will exit having first stored the .state file.
This code is actually a quick hack which I just prettied up a little bit to share it here.
Best used in places with lots of reader devices. ...like your office :)
Use a shared folder (eg nfs, windows, samba) and try from several workstations.
Recover your files 100% or your money back!