""" $Id: filetail.py 1512 2011-05-20 16:14:29Z morrissj $
Python3 module for tailing a file such as a system log that grows continuously.
Transparently handles files that get rotated or trucated.
Inspired by the Perl File::Tail module.
A simple algorithm is used to dynamically sleep when no new data is available in
the file. The longer the amount of time goes by w/o new data the longer the
sleep interval will be (up to "max_interval") and starts at "interval".
Example:
from filetail import FileTail
tail = FileTail("/var/log/syslog")
for line in tail:
# do something
pass
"""
import os
import sys
from stat import *
from math import floor
from time import sleep, time
class FileTail(object):
"""
Tail a file, even if its rotated/truncated.
Inspiration came from the perl module File::Tail.
"""
def __init__(self,
file, # filename to monitor
start_pos="end", # where to initially start reading from
#max_buffer_size=16384, # Max buffer size hint (Not exact; @see file.readlines)
interval=0.1, # sleep time to wait if no data is present (dynamically changes)
#min_interval=0.01, # min sleep time
max_interval=5, # max sleep time
max_wait=60, # max time to wait with no data before reopening file
reopen_check="inode", # how to check if file is different (inode or time) - inode does not work on win32
encoding="utf-8" # file encoding
):
self.start_pos = start_pos
self.reopen_check = reopen_check
self.max_wait = max_wait
#self.max_buffer_size = max_buffer_size
#self.min_interval = min_interval
self.max_interval = max_interval
self.interval = interval
if self.interval > self.max_interval:
self.interval = self.max_interval
self.encoding = encoding
# will throw exception if it fails... caller should intercept
self.open(file, start_pos=start_pos)
# initialize some internal vars
self._buffer = []
self.last_time = time()
self.last_count = 0
def open(self, file, start_pos="head"):
"""Open the file to tail and initialize our state."""
fh = open(file, "r", encoding=self.encoding)
# seek to the initial position in the file we want to start reading
if start_pos == "end" or start_pos == "tail":
fh.seek(0, os.SEEK_END) # End of file
elif start_pos == "start" or start_pos == "head":
#fh.seek(0, os.SEEK_SET) # Beginning of file
pass
elif start_pos is not None:
if start_pos >= 0: # Absolute position
fh.seek(start_pos, os.SEEK_SET)
else: # Absolute position (from end)
fh.seek(abs(start_pos), os.SEEK_END)
# if we passed the end of the file rewind to the actual end.
# This avoids a potential race condition if the file was being rotated
# in the process of opening the file. Not sure if this can actually
# happen, but better safe than sorry.
pos = fh.tell()
if pos > os.stat(file)[ST_SIZE]:
pos = fh.tell()
self.fh = fh
self.pos = pos
self.stat = os.fstat(fh.fileno())
self.file = file
def reopen(self):
"""
Attempt to reopen the current file. If it doesn't appear to have
changed (been rotated) then the current file handle is not changed.
"""
#print("Reopening", self.file, "...", end="")
# if we don't have an opened file already then try to open it now
if not self.fh or self.fh.closed:
try:
self.open(self.file, start_pos="head");
except IOError:
return False
return True
# save current values
fh = self.fh
pos = self.pos
cur = self.stat
# reopen same file
try:
self.open(self.file, "head")
except IOError as e:
#print("FILE DOES NOT EXIST")
return False
new = self.stat
#print(new.st_ino, ' == ', cur.st_ino)
if (
(self.reopen_check == 'inode' and new.st_ino == cur.st_ino)
or
(self.reopen_check == 'time' and new.st_mtime <= floor(self.last_time) and new.st_size == pos)
):
#print("FILE NOT CHANGED")
# file appears to be the same or older than our last read
#self.last_time = new.st_mtime
self.fh = fh
self.pos = pos
self.stat = cur
return False
#print("NEW FILE")
return True
def __iter__(self):
"""
Return iterator to support:
for line in filetail:
print line
"""
self.wait_count = 0
return self
def __next__(self):
"""Interator "next" call."""
return self.next()
def next(self):
line = None
self.wait_count = 0
# low CPU (probably same as the block below this, but ALLOWS tell()!
while not line:
line = self.fh.readline()
if line != "":
# track the time we received new data and how much
self.last_time = time()
self.last_count = 1
else:
self.wait()
## uses the least amount of CPU, but does not allow me to tell()
## is that a bug in readlines()?
#while len(self._buffer) == 0:
# self._buffer = self.fh.readlines(self.max_buffer_size)
# if len(self._buffer) > 0:
# # track the time we received new data and how much
# self.last_time = time()
# self.last_count = len(self._buffer)
# self.wait_count = 0
# else:
# self.wait()
#line = self._buffer.pop(0)
# dealing with the file as binary isn't working as well as i hoped
#while len(self.lines) == 0:
# buffer = self.fh.read(self.max_buffer_size).decode(self.encoding)
# if buffer is not None:
# self._buffer += buffer
# size = self.enqueue(self._buffer)
# if size:
# self._buffer = self._buffer[size:]
# else:
# self.wait()
#line = self.lines.pop(0)
# uses too much CPU!! (but not 100%)
#line = self.fh.readline()
#while line == "":
# self.wait()
# line = self.fh.readline()
# if line != "":
# # track the time we received new data and how much
# self.pos = self.fh.tell()
# self.last_time = time()
# self.last_count = 1 #len(self._buffer)
# self.wait_count = 0
return line
#def enqueue(self, buffer):
# """
# Extract any lines from buffer and add to our self.lines list. Ignores
# the last line if it does not have a line termination ("\n")
# @return total characters extracted from buffer.
# """
# lines = buffer.splitlines(True)
# total = 0;
# for l in lines:
# if l.endswith("\n"):
# self.lines.append(l)
# total += len(l)
# return total
# wait for X seconds. The sleep interval is dynamically predicted based on
# how much was previously read. The predicted interval will never be more
# than max_interval. If enough time passes w/o any new data the file will
# be reopened and checked.
def wait(self):
if self.wait_count == 0:
self.pos = self.fh.tell()
self.stat = os.fstat(self.fh.fileno())
self.wait_count += 1
elapsed = time() - self.last_time
# if we've waited long enough try to reopen the file, if that returns
# true then we're done here and we do not sleep.
if elapsed >= self.max_wait:
self.last_time = time()
if self.reopen():
return
# determine delay value. Delay is longer based on total time passed
# note: currently last_count is always 1.
if self.last_count:
#delay = (time() - self.last_time) / self.last_count
delay = elapsed
else:
delay = self.interval
# don't delay too long
if delay > self.max_interval:
delay = self.max_interval
#elif delay < self.min_interval:
# delay = self.min_interval
#print("delay={:0.06f} elapsed={:0.06f}".format(delay, elapsed))
sleep(delay)
# end of FileTail class
def main():
print("No tests implemented.")
if __name__ == "__main__":
sys.exit(main())