This recipe provides a class which will read a text file in reverse... It basically reads a block of data from the end of the file as a list and keeps popping items off of that everytime the readline() method is called. When the block is exhausted, another block is read, and so forth... This takes care of corner cases where a line is longer than the buffer or the file is smaller than the buffer, etc.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | #!/usr/bin/env python
import sys
import os
import string
"""read a file returning the lines in reverse order for each call of readline()
This actually just reads blocks (4096 bytes by default) of data from the end of
the file and returns last line in an internal buffer. I believe all the corner
cases are handled, but never can be sure..."""
class BackwardsReader:
def readline(self):
while len(self.data) == 1 and ((self.blkcount * self.blksize) < self.size):
self.blkcount = self.blkcount + 1
line = self.data[0]
try:
self.f.seek(-self.blksize * self.blkcount, 2) # read from end of file
self.data = string.split(self.f.read(self.blksize) + line, '\n')
except IOError: # can't seek before the beginning of the file
self.f.seek(0)
self.data = string.split(self.f.read(self.size - (self.blksize * (self.blkcount-1))) + line, '\n')
if len(self.data) == 0:
return ""
# self.data.pop()
# make it compatible with python <= 1.5.1
line = self.data[-1]
self.data = self.data[:-1]
return line + '\n'
def __init__(self, file, blksize=4096):
"""initialize the internal structures"""
# get the file size
self.size = os.stat(file)[6]
# how big of a block to read from the file...
self.blksize = blksize
# how many blocks we've read
self.blkcount = 1
self.f = open(file, 'rb')
# if the file is smaller than the blocksize, read a block,
# otherwise, read the whole thing...
if self.size > self.blksize:
self.f.seek(-self.blksize * self.blkcount, 2) # read from end of file
self.data = string.split(self.f.read(self.blksize), '\n')
# strip the last item if it's empty... a byproduct of the last line having
# a newline at the end of it
if not self.data[-1]:
# self.data.pop()
self.data = self.data[:-1]
if(__name__ == "__main__"):
# do a thorough test...
f = open('br.py', 'r')
lines = []
line = f.readline()
while line:
lines.append(line)
line = f.readline()
f.close()
lines.reverse()
for i in range(1, 5000): # test different buffer sizes...
foo = BackwardsReader('br.py', i)
linesbr = []
line = foo.readline()
while line:
linesbr.append(line)
line = foo.readline()
if linesbr != lines:
print "\nNOT MATCHED %5d" % (i)
else:
print "MATCHED %5d\r" % (i),
sys.stdout.flush()
|
There are probably more concise implementations...