Welcome, guest | Sign In | My Account | Store | Cart

Here's a simple program to demonstrate Benford's Law, which also shows the simple power of matplotlib. It reads from a bunch of files (or stdin, if none specified), extracts the leading digits of all number-like strings found, and plots the distribution in a window together with the expected result if Benford's law applies.

Python, 52 lines
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
"""
Benford's law in Python.
"""

import re
import matplotlib.pyplot as plot

from math import log10

def plot_benford(iterable):
    """Plot leading digit distribution in a string iterable.
    """

    numbers = [float(n) for n in xrange(1, 10)]

    # Plot the frequencies as predicted by the law.
    benford = [log10(1 + 1 / d) for d in numbers]
    plot.plot(numbers, benford, 'ro', label = "Predicted")

    # Plot the actual digit frequencies.
    data = list(digits(iterable))
    plot.hist(data, range(1, 11), align = 'left', normed = True,
              rwidth = 0.7, label = "Actual")

    # Set plot parameters and show it in a window.
    plot.title("Benford's Law")
    plot.xlabel("Digit")
    plot.ylabel("Frequency")

    plot.xlim(0, 10)
    plot.xticks(numbers)
    plot.legend()

    plot.show()

def digits(iterable):
    """Yield leading digits of number-like strings in an iterable.
    """

    numexp = re.compile(r'\d+(\.\d+)?([eE]\d+)?')
    leading = set("123456789")

    for item in iterable:
        for match in numexp.finditer(str(item)):
            for digit in match.group(0):
                if digit in leading:
                    yield int(digit)
                    break

if __name__ == "__main__":
    import fileinput
    plot_benford(fileinput.input())