Here's a simple program to demonstrate Benford's Law, which also shows the simple power of matplotlib. It reads from a bunch of files (or stdin, if none specified), extracts the leading digits of all number-like strings found, and plots the distribution in a window together with the expected result if Benford's law applies.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | """
Benford's law in Python.
"""
import re
import matplotlib.pyplot as plot
from math import log10
def plot_benford(iterable):
"""Plot leading digit distribution in a string iterable.
"""
numbers = [float(n) for n in xrange(1, 10)]
# Plot the frequencies as predicted by the law.
benford = [log10(1 + 1 / d) for d in numbers]
plot.plot(numbers, benford, 'ro', label = "Predicted")
# Plot the actual digit frequencies.
data = list(digits(iterable))
plot.hist(data, range(1, 11), align = 'left', normed = True,
rwidth = 0.7, label = "Actual")
# Set plot parameters and show it in a window.
plot.title("Benford's Law")
plot.xlabel("Digit")
plot.ylabel("Frequency")
plot.xlim(0, 10)
plot.xticks(numbers)
plot.legend()
plot.show()
def digits(iterable):
"""Yield leading digits of number-like strings in an iterable.
"""
numexp = re.compile(r'\d+(\.\d+)?([eE]\d+)?')
leading = set("123456789")
for item in iterable:
for match in numexp.finditer(str(item)):
for digit in match.group(0):
if digit in leading:
yield int(digit)
break
if __name__ == "__main__":
import fileinput
plot_benford(fileinput.input())
|