Welcome, guest | Sign In | My Account | Store | Cart

This a very simple implementation for how to do a readlines in reverse. It is not optimized for performance (which often doesn't matter). I have a 2nd version that is faster by loading blocks of data into memory instead of character by character. Of course, the code then almost doubles in size. And finally a third version that is the fastest, using split.

Python, 57 lines
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import collections,cStringIO

def rev_readlines(arg):
    f=open(arg,'rb')
    f.seek(0,2)# go to the end
    line=collections.deque()
    while f.tell():    
        f.seek(-1,1)
        c=f.read(1)
        f.seek(-1,1)
        line.appendleft(c)
        if c =='\n':
            yield ''.join(line).strip()
            line.clear() #clear for next line
    yield ''.join(line).strip()

#bit of optimization, load groups of bytes from disk into memory
def rev_readlines2(arg,bufsize=8192):
    f1=open(arg,'rb')
    f1.seek(0,2)# go to the end
    leftover=''
    while f1.tell():
        print f1.tell()
        if f1.tell()<bufsize: bufsize=f1.tell()
        f1.seek(-bufsize,1)
        in_memory=f1.read(bufsize)+leftover
        f1.seek(-bufsize,1)
        buffer=cStringIO.StringIO(in_memory)
        buffer.seek(0,2)# go to the end
        line=collections.deque()
        while buffer.tell():
            buffer.seek(-1,1)
            c=buffer.read(1)
            buffer.seek(-1,1)
            line.appendleft(c)
            if c =='\n':
                yield ''.join(line).strip()
                line.clear()
        leftover=''.join(line).strip()
    yield leftover

#different approach and much faster
def rev_readlines3(arg,bufsize=8192):
    f1=open(arg,'rb')
    f1.seek(0,2)# go to the end
    leftover=''
    while f1.tell():
        if f1.tell()<bufsize: bufsize=f1.tell()
        f1.seek(-bufsize,1)
        in_memory=f1.read(bufsize)+leftover
        f1.seek(-bufsize,1)
        lines=in_memory.split('\n')
        for i in reversed(lines[1:]): yield i
        leftover=lines[0]
    yield leftover

for i in rev_readlines(filename): print i

The first 2 methods goes through a data file in reverse character by character and uses a deque to rebuild the string until a newline is found. Using a deque is clearer (appendleft) and also offers better performance.

The 3rd method, simply splits strings from a buffer as it goes backwards.