Welcome, guest | Sign In | My Account | Store | Cart
""" $Id: filetail.py 1512 2011-05-20 16:14:29Z morrissj $

Python3 module for tailing a file such as a system log that grows continuously.
Transparently handles files that get rotated or trucated.
Inspired by the Perl File::Tail module.

A simple algorithm is used to dynamically sleep when no new data is available in
the file. The longer the amount of time goes by w/o new data the longer the
sleep interval will be (up to "max_interval") and starts at "interval".

Example:
    from filetail import FileTail
    tail = FileTail("/var/log/syslog")
    for line in tail:
        # do something
        pass

"""

import os
import sys
from stat import *
from math import floor
from time import sleep, time

class FileTail(object):
    """
    Tail a file, even if its rotated/truncated.
    Inspiration came from the perl module File::Tail.
    """

    def __init__(self,
                 file,                  # filename to monitor
                 start_pos="end",       # where to initially start reading from
                 #max_buffer_size=16384, # Max buffer size hint (Not exact; @see file.readlines)
                 interval=0.1,          # sleep time to wait if no data is present (dynamically changes)
                 #min_interval=0.01,     # min sleep time
                 max_interval=5,        # max sleep time 
                 max_wait=60,           # max time to wait with no data before reopening file
                 reopen_check="inode",  # how to check if file is different (inode or time) - inode does not work on win32
                 encoding="utf-8"       # file encoding
                ):

        self.start_pos = start_pos
        self.reopen_check = reopen_check
        self.max_wait = max_wait
        #self.max_buffer_size = max_buffer_size
        #self.min_interval = min_interval
        self.max_interval = max_interval
        self.interval = interval
        if self.interval > self.max_interval:
            self.interval = self.max_interval
        self.encoding = encoding

        # will throw exception if it fails... caller should intercept
        self.open(file, start_pos=start_pos)

        # initialize some internal vars
        self._buffer = []
        self.last_time = time()
        self.last_count = 0

    def open(self, file, start_pos="head"):
        """Open the file to tail and initialize our state."""
        fh = open(file, "r", encoding=self.encoding)

        # seek to the initial position in the file we want to start reading
        if start_pos == "end" or start_pos == "tail":
            fh.seek(0, os.SEEK_END)                       # End of file
        elif start_pos == "start" or start_pos == "head":
            #fh.seek(0, os.SEEK_SET)                      # Beginning of file
            pass
        elif start_pos is not None:
            if start_pos >= 0:                            # Absolute position
                fh.seek(start_pos, os.SEEK_SET)
            else:                                         # Absolute position (from end)
                fh.seek(abs(start_pos), os.SEEK_END)
        
        # if we passed the end of the file rewind to the actual end.
        # This avoids a potential race condition if the file was being rotated
        # in the process of opening the file. Not sure if this can actually
        # happen, but better safe than sorry.
        pos = fh.tell()
        if pos > os.stat(file)[ST_SIZE]:
            pos = fh.tell()

        self.fh = fh
        self.pos = pos
        self.stat = os.fstat(fh.fileno())
        self.file = file
    
    def reopen(self):
        """
        Attempt to reopen the current file. If it doesn't appear to have
        changed (been rotated) then the current file handle is not changed.
        """

        #print("Reopening", self.file, "...", end="")

        # if we don't have an opened file already then try to open it now
        if not self.fh or self.fh.closed:
            try:
                self.open(self.file, start_pos="head");
            except IOError:
                return False
            return True

        # save current values
        fh = self.fh
        pos = self.pos
        cur = self.stat
        
        # reopen same file
        try:
            self.open(self.file, "head")
        except IOError as e:
            #print("FILE DOES NOT EXIST")
            return False
        
        new = self.stat
        #print(new.st_ino, ' == ', cur.st_ino)
        if (
            (self.reopen_check == 'inode' and new.st_ino == cur.st_ino)
            or
            (self.reopen_check == 'time' and new.st_mtime <= floor(self.last_time) and new.st_size == pos)
           ):
            #print("FILE NOT CHANGED")
            # file appears to be the same or older than our last read
            #self.last_time = new.st_mtime
            self.fh = fh
            self.pos = pos
            self.stat = cur
            return False

        #print("NEW FILE")
        return True

       
    def __iter__(self):
        """
            Return iterator to support:
                for line in filetail:
                    print line
        """
        self.wait_count = 0
        return self


    def __next__(self):
        """Interator "next" call."""
        return self.next()


    def next(self):
        line = None
        self.wait_count = 0

        # low CPU (probably same as the block below this, but ALLOWS tell()!
        while not line:
            line = self.fh.readline()
            if line != "":
                # track the time we received new data and how much
                self.last_time = time()
                self.last_count = 1
            else:
                self.wait()

        ## uses the least amount of CPU, but does not allow me to tell()
        ## is that a bug in readlines()?
        #while len(self._buffer) == 0:
        #    self._buffer = self.fh.readlines(self.max_buffer_size)
        #    if len(self._buffer) > 0:
        #        # track the time we received new data and how much
        #        self.last_time = time()
        #        self.last_count = len(self._buffer)
        #        self.wait_count = 0
        #    else:
        #        self.wait()
        #line = self._buffer.pop(0)

        # dealing with the file as binary isn't working as well as i hoped
        #while len(self.lines) == 0:
        #    buffer = self.fh.read(self.max_buffer_size).decode(self.encoding)
        #    if buffer is not None:
        #        self._buffer += buffer
        #        size = self.enqueue(self._buffer)
        #        if size:
        #            self._buffer = self._buffer[size:]
        #    else:
        #        self.wait()
        #line = self.lines.pop(0)
            
        # uses too much CPU!! (but not 100%)
        #line = self.fh.readline()
        #while line == "":
        #    self.wait()
        #    line = self.fh.readline()
        #    if line != "":
        #        # track the time we received new data and how much
        #        self.pos = self.fh.tell()
        #        self.last_time = time()
        #        self.last_count = 1 #len(self._buffer)
        #        self.wait_count = 0

        return line

    #def enqueue(self, buffer):
    #    """
    #    Extract any lines from buffer and add to our self.lines list. Ignores
    #    the last line if it does not have a line termination ("\n")
    #    @return total characters extracted from buffer.
    #    """
    #    lines = buffer.splitlines(True)
    #    total = 0;
    #    for l in lines:
    #        if l.endswith("\n"):
    #            self.lines.append(l)
    #            total += len(l)
    #    return total

    # wait for X seconds. The sleep interval is dynamically predicted based on
    # how much was previously read. The predicted interval will never be more
    # than max_interval. If enough time passes w/o any new data the file will
    # be reopened and checked.
    def wait(self):
        if self.wait_count == 0:
            self.pos = self.fh.tell()
            self.stat = os.fstat(self.fh.fileno())

        self.wait_count += 1
        elapsed = time() - self.last_time

        # if we've waited long enough try to reopen the file, if that returns
        # true then we're done here and we do not sleep.
        if elapsed >= self.max_wait:
            self.last_time = time()
            if self.reopen():
                return
            

        # determine delay value. Delay is longer based on total time passed
        # note: currently last_count is always 1.
        if self.last_count:
            #delay = (time() - self.last_time) / self.last_count
            delay = elapsed
        else:
            delay = self.interval

        # don't delay too long
        if delay > self.max_interval:
            delay = self.max_interval
        #elif delay < self.min_interval:
        #    delay = self.min_interval

        #print("delay={:0.06f} elapsed={:0.06f}".format(delay, elapsed))
        sleep(delay)
    
# end of FileTail class


def main():
    print("No tests implemented.")
    
    
if __name__ == "__main__":
    sys.exit(main())

History