Find out how often and by who a particular file is being requested. Prints the requesting addresses, hostnames, access times, and hit counts.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 | from operator import itemgetter
from socket import gethostbyaddr, herror
import time
accessLog = "/var/log/apache2/access_log"
def track(filename, logFile=accessLog):
"""Retrieve request statistics for a specific file in an access log."""
log = open(logFile)
filename = '/' + filename.lstrip('/')
access = {}
for line in log:
parts = line.split()
if parts[6] == filename:
timeStr = parts[3]
accessTime = time.strptime(timeStr, "[%d/%b/%Y:%H:%M:%S")
access[parts[0]] = (accessTime, access.get(parts[0], ('', 0))[1] + 1)
print '%s has been requested by %d people and hit %d times.' % \
(filename, len(access), sum(map(itemgetter(1), access.itervalues())))
accessTuple = sorted(access.iteritems(), key=itemgetter(1))
hitWidth = len(str(max([data[1] for user, data in accessTuple])))
for user, data in accessTuple:
lastAccess, hits = data
print 'User: %s Last Access: %s Hits: %s' % \
((user+",").ljust(16),
time.strftime("%a %d-%b-%Y %I:%M:%S %p,", lastAccess),
str(hits).rjust(hitWidth))
return accessTuple
def resolve(*args):
"""Resolve a sequence of IP addresses to their hostnames, if possible."""
if len(args) > 1:
addrList = args
else:
addrList = args[0]
for addr in addrList:
if isinstance(addr, tuple):
addr = addr[0]
try:
host = gethostbyaddr(addr)[0]
except (herror, IndexError):
host = addr
print 'Addr: %s Hostname: %s' % ((addr+',').ljust(16), host)
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
print "Usage: python track_access.py filename access_log"
else:
if len(sys.argv) > 2:
accessLog = sys.argv[2]
resolve(track(sys.argv[1], accessLog))
|
track() returns a list of tuples including access statistics for each IP address. To print simple hit statistics, call track like so:
track('index.html', '/var/log/apache2/access_log')
To also resolve hostnames:
resolve(track('index.html', '/var/log/apache2/access_log'))
Here's an example of how it is used from the command line:
$ python track_file.py beaches.png /var/log/lighttpd/access.log /beaches.png has been requested by 17 people and hit 29 times. User: 129.22.9.207, Last Access: Tue 02-Aug-2005 12:01:07 AM, Hits: 2 User: 129.22.151.187, Last Access: Tue 02-Aug-2005 12:38:05 AM, Hits: 1 ... Addr: 129.22.9.207, Hostname: h-129-22-9-207.ins.cwru.edu Addr: 129.22.151.187, Hostname: thomaspaine.STUDENT.CWRU.Edu ...