Welcome, guest | Sign In | My Account | Store | Cart
# Written in 2003 by Andrew Dalke, Dalke Scientific Software, LLC.
# This software has been released to the public domain.  No
# copyright is asserted.

from cStringIO import StringIO

class ReseekFile:
    """wrap a file handle to allow seeks back to the beginning

    Takes a file handle in the constructor.
    
    See the module docstring for more documentation.
    """
    def __init__(self, file):
        self.file = file
        self.buffer_file = StringIO()
        self.at_beginning = 1
        try:
            self.beginning = file.tell()
        except (IOError, AttributeError):
            self.beginning = 0
        self._use_buffer = 1
        
    def seek(self, offset, whence = 0):
        """offset, whence = 0

        Seek to a given byte position.  Only supports whence == 0
        and offset == the initial value of ReseekFile.tell() (which
        is usually 0, but not always.)
        """
        if whence != 0:
            raise TypeError("Unexpected whence value of %s; expecting 0" % \
                            (whence,))
        if offset != self.beginning:
            raise TypeError("Unexpected offset value of %r; expecting '%s'" % \
                             (offset, self.beginning))
        self.buffer_file.seek(0)
        self.at_beginning = 1
        
    def tell(self):
        """the current position of the file

        The initial position may not be 0 if the underlying input
        file supports tell and it not at position 0.
        """
        if not self.at_beginning:
            raise TypeError("ReseekFile cannot tell except at the beginning of file")
        return self.beginning

    def _read(self, size):
        if size < 0:
            y = self.file.read()
            z = self.buffer_file.read() + y
            if self._use_buffer:
                self.buffer_file.write(y)
            return z
        if size == 0:
            return ""
        x = self.buffer_file.read(size)
        if len(x) < size:
            y = self.file.read(size - len(x))
            if self._use_buffer:
                self.buffer_file.write(y)
            return x + y
        return x
        
    def read(self, size = -1):
        """read up to 'size' bytes from the file

        Default is -1, which means to read to end of file.
        """
        x = self._read(size)
        if self.at_beginning and x:
            self.at_beginning = 0
        self._check_no_buffer()
        return x

    def readline(self):
        """read a line from the file"""

        # Can we get it out of the buffer_file?
        s = self.buffer_file.readline()
        if s[-1:] == "\n":
            return s
        # No, so now we read a line from the input file
        t = self.file.readline()

        # Append the new data to the buffer, if still buffering
        if self._use_buffer:
            self.buffer_file.write(t)
        
        self._check_no_buffer()

        return s + t

    def readlines(self):
        """read all remaining lines from the file"""
        s = self.read()
        lines = []
        i, j = 0, s.find("\n")
        while j > -1:
            lines.append(s[i:j+1])
            i = j+1
            j = s.find("\n", i)
        if i < len(s):
            # Only get here if the last line doesn't have a newline
            lines.append(s[i:])
        return lines

    def _check_no_buffer(self):
        # If 'nobuffer' called and finished with the buffer file
        # then get rid of the buffer and redirect everything to
        # the original input file.
        if self._use_buffer == 0 and self.buffer_file.tell() == \
                                        len(self.buffer_file.getvalue()):
            # I'm doing this for the slightly better performance
            self.seek = getattr(self.file, "seek", None)
            self.tell = getattr(self.file, "tell", None)
            self.read = self.file.read
            self.readline = self.file.readline
            self.readlines = self.file.readlines
            del self.buffer_file

    def nobuffer(self):
        """tell the ReseekFile to stop using the buffer once it's exhausted"""
        self._use_buffer = 0

def prepare_input_source(source):
    """given a URL, returns a xml.sax.xmlreader.InputSource

    Works like xml.sax.saxutils.prepare_input_source.  Wraps the
    InputSource in a ReseekFile if the URL returns a non-seekable
    file.

    To turn the buffer off if that happens, you'll need to do
    something like

    f = source.getCharacterStream()
     ...
    try:
       f.nobuffer()
    except AttributeError:
       pass

    or

    if isinstance(f, ReseekFile):
      f.nobuffer()
    
    """
    from xml.sax import saxutils
    source = saxutils.prepare_input_source(source)
    # Is this correct?  Don't know - don't have Unicode exprerience
    f = source.getCharacterStream() or source.getByteStream()
    try:
        f.tell()
    except (AttributeError, IOError):
        f = ReseekFile.ReseekFile(f)
        source.setByteStream(f)
        source.setCharacterStream(None)
    return source

History