# lz77.py
#
# This code is Public Domain.
#
# By Nelson Rush
#

# Boyer-Moore-Horspool fast string search.
class BMH:
    def pattern(self, data):
        self.skip = []
        self.m = len(data)
        for k in range(256): self.skip.append(self.m)
        for k in range(self.m - 1): self.skip[ord(data[k])] = self.m - k - 1
        self.skip = tuple(self.skip)
        self.data = data
    def find(self, text):
        n = len(text)
        if self.m > n: return -1
        k = self.m - 1
        while k < n:
            j = self.m - 1; i = k
            while j >= 0 and text[i] == self.data[j]:
                j -= 1; i -= 1
            if j == -1: return i + 1
            k += self.skip[ord(text[k])]
        return -1

# Note this currently doesn't work for blocks larger than 256 bytes, its just an example.
# Normally you would use a larger 'pointer' to access 4k to 64k sized blocks.
# And this should really be implemented using bit manipulation for smaller size.
class LZ77:
    def __init__(self, data):
        self.position = 0
        self.window = ""
        self.stream = data
        self.streamSize = len(self.stream)
        self.search = BMH()
    def Encode(self):
        # Initialize pointer, last char, and last result to nothing.
        p = 0
        c = ''
        lastresult = 0
        found = 0
        # Loop through the lookahead buffer.
        for i in range(self.streamSize):
            # Set pattern to find the longest match.
            self.search.pattern(self.stream[self.position:self.position+i+1])
            # Find the pattern match in the window.
            result = self.search.find(self.window)
            # If match failed, we've found the longest.
            if result < 0: break
            # Set the last result.
            lastresult = result
            found = 1
        # Set c to the last character after result.
        c = self.stream[self.position+i]
        # Set pointer to the last result that worked.
        p = lastresult
        # Set the number of bytes backwards to travel.
        B = 0
        if i > 0: B = self.position - p
        L = i
        # Check if the lookahead buffer is not empty.
        if self.streamSize > 0:
            # Increase the position of the lookahead buffer.
            self.position += i + 1
            # Decrease the size of the lookahead buffer to match.
            self.streamSize -= i + 1
            # The window encompasses everything up to the lookahead.
            self.window = self.stream[:self.position]
        return ((B, L), c)
    def Encoder(self):
        print 'Encoding...'
        output = ""
        length = self.streamSize
        # Loop through and save the code information.
        while self.streamSize > 0:
            ((B, L), C) = self.Encode()
            output += chr(B) + chr(L) + C
        return (output, length)
    def Decoder(self, length):
        sz = len(self.stream)
        P = []
        # Set up the codes and the window.
        for i in range(0, sz, 3):
            B,L,C = self.stream[i:i+3]
            P.append((ord(B),ord(L)))
            self.window += C
        P = tuple(P)
        print 'Decoding...'
        output = ""
        i = 0
        # Loop through executing the code instructions
        # (move back B, copy length L, drop in C)
        for p in P:
            B = p[0]
            L = p[1]
            C = self.window[i]
            if L > 0:
                if i-B < 0:
                    c = self.window[0:L]
                else:
                    pos = i-B
                    c = self.window[pos:pos+L]
                c += C
            else:
                c = self.window[i]
            output += c
            i += 1
        return output

if __name__ == '__main__':
    original = 'AABCBBABC'
    lz = LZ77(original)
    stream, streamSize = lz.Encoder()
    lz = LZ77(stream)
    s = lz.Decoder(streamSize)
    print 'Original string:',original
    print 'Decoded  string:',s