Welcome, guest | Sign In | My Account | Store | Cart

Find out how often and by who a particular file is being requested. Prints the requesting addresses, hostnames, access times, and hit counts.

Python, 55 lines
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from operator import itemgetter
from socket import gethostbyaddr, herror
import time

accessLog = "/var/log/apache2/access_log"

def track(filename, logFile=accessLog):
    """Retrieve request statistics for a specific file in an access log."""
    log = open(logFile)
    filename = '/' + filename.lstrip('/')
    access = {}
    for line in log:
        parts = line.split()
        if parts[6] == filename:
            timeStr = parts[3]
            accessTime = time.strptime(timeStr, "[%d/%b/%Y:%H:%M:%S")
            access[parts[0]] = (accessTime, access.get(parts[0], ('', 0))[1] + 1)
    print '%s has been requested by %d people and hit %d times.' % \
          (filename, len(access), sum(map(itemgetter(1), access.itervalues())))
    
    accessTuple = sorted(access.iteritems(), key=itemgetter(1))
    hitWidth = len(str(max([data[1] for user, data in accessTuple])))

    for user, data in accessTuple:
        lastAccess, hits = data
        print 'User: %s Last Access: %s Hits: %s' % \
              ((user+",").ljust(16),
               time.strftime("%a %d-%b-%Y %I:%M:%S %p,", lastAccess),
               str(hits).rjust(hitWidth))

    return accessTuple

def resolve(*args):
    """Resolve a sequence of IP addresses to their hostnames, if possible."""
    if len(args) > 1:
        addrList = args
    else:
        addrList = args[0]
    for addr in addrList:
        if isinstance(addr, tuple):
            addr = addr[0]
        try:
            host = gethostbyaddr(addr)[0]
        except (herror, IndexError):
            host = addr
        print 'Addr: %s Hostname: %s' % ((addr+',').ljust(16), host)

if __name__ == "__main__":
    import sys
    if len(sys.argv) < 2:
        print "Usage: python track_access.py filename access_log"
    else:
        if len(sys.argv) > 2:
            accessLog = sys.argv[2]
        resolve(track(sys.argv[1], accessLog))

track() returns a list of tuples including access statistics for each IP address. To print simple hit statistics, call track like so:

track('index.html', '/var/log/apache2/access_log')

To also resolve hostnames:

resolve(track('index.html', '/var/log/apache2/access_log'))

Here's an example of how it is used from the command line:

$ python track_file.py beaches.png /var/log/lighttpd/access.log /beaches.png has been requested by 17 people and hit 29 times. User: 129.22.9.207, Last Access: Tue 02-Aug-2005 12:01:07 AM, Hits: 2 User: 129.22.151.187, Last Access: Tue 02-Aug-2005 12:38:05 AM, Hits: 1 ... Addr: 129.22.9.207, Hostname: h-129-22-9-207.ins.cwru.edu Addr: 129.22.151.187, Hostname: thomaspaine.STUDENT.CWRU.Edu ...