#!/usr/bin/env python
""" Walker's alias method for random objects with different probablities
walkerrandom.py
Examples
--------
# 0 1 2 or 3 with probabilities .1 .2 .3 .4 --
wrand = Walkerrandom( [10, 20, 30 40] ) # builds the Walker tables
wrand.random() # each call -> 0 1 2 or 3
# for example, 1000 calls with random.seed(1) -> [96, 199, 334, 371]
# strings A B C or D with probabilities .1 .2 .3 .4 --
abcd = dict( A=1, D=4, C=3, B=2 )
# keys can be any immutables: 2d points, colors, atoms ...
wrand = Walkerrandom( abcd.values(), abcd.keys() )
wrand.random() # each call -> "A" "B" "C" or "D"
# fast: 1 randint(), 1 uniform(), table lookup
How it works
------------
For weights 10 20 30 40 as above, picture sticks A B C D of those lengths:
10 AAAAAAAAAA
20 BBBBBBBBBB BBBBBBBBBB
30 CCCCCCCCCC CCCCCCCCCC CCCCCCCCCC
40 DDDDDDDDDD DDDDDDDDDD DDDDDDDDDD DDDDDDDDDD
Split and rearrange them into equal-length rows, like this:
AAAAAAAAAA DDDDDDDDDDDDDDD -- 10 A + 15 D = 40% A + 60% D
BBBBBBBBBBBBBBBBBBBB DDDDD -- 20 B + 5 D = 80% B + 20% D
CCCCCCCCCCCCCCCCCCCCCCCCC -- 25 C = 100% C
DDDDDDDDDDDDDDDDDDDD CCCCC -- 20 D + 5 C = 80% D + 20% C
Clearly 10 % of the area is A, 20 % B, 30 % C and 40 % D --
we haven't changed areas, just rearranged.
Now to choose a random one of A or B or C or D,
throw a dart at a "dart board" of the sticks in these 4 rows:
if it hits row 0, return A with probablity 40 % / D 60 %
if it hits row 1, return B with probablity 80 % / D 20 %
...
This picture is in Devroye, p. 111 (rediscovered here).
Walker's algorithm essentially arranges a given lot of sticks
into equal-length rows: pick a row shorter than average
and a row longer than average, split the longer to fill the shorter,
iterate until they're all the same length.
Notes
To generate random colors similar to those in a given picture,
first collect color samples in a histogram:
for color in ...:
# cluster e.g. rrggbb -> rgb, 16^3 bins
# (many many methods, see Wikipedia Data_clustering)
colors[color] += 1
(cPickle to a file, write it, read it back in)
then use Walkerrandom to select colors with these frequencies:
colorrand = Walkerrandom( colors.values(), colors.keys() )
colorrand.random() # each call -> a color
References
L. Devroye, Non-Uniform Random Variate Generation, 1986, p. 107 ff.
http://cg.scs.carleton.ca/~luc/rnbookindex.html (800 pages)
Knuth, Stanford GraphBase, 1993, p. 392
C++ hat random container by AngleWyrm,
http://home.comcast.net/~anglewyrm/hat.html
"""
from __future__ import division
import random
__author__ = "Denis Bzowy"
__version__ = "16nov2008"
Test = 0
#...............................................................................
class Walkerrandom:
""" Walker's alias method for random objects with different probablities
"""
def __init__( self, weights, keys=None ):
""" builds the Walker tables prob and inx for calls to random().
The weights (a list or tuple or iterable) can be in any order;
they need not sum to 1.
"""
n = self.n = len(weights)
self.keys = keys
sumw = sum(weights)
prob = [w * n / sumw for w in weights] # av 1
inx = [-1] * n
short = [j for j, p in enumerate( prob ) if p < 1]
long = [j for j, p in enumerate( prob ) if p > 1]
while short and long:
j = short.pop()
k = long[-1]
# assert prob[j] <= 1 <= prob[k]
inx[j] = k
prob[k] -= (1 - prob[j]) # -= residual weight
if prob[k] < 1:
short.append( k )
long.pop()
if Test:
print "test Walkerrandom: j k pk: %d %d %.2g" % (j, k, prob[k])
self.prob = prob
self.inx = inx
if Test:
print "test", self
def __str__( self ):
""" e.g. "Walkerrandom prob: 0.4 0.8 1 0.8 inx: 3 3 -1 2" """
probstr = " ".join([ "%.2g" % x for x in self.prob ])
inxstr = " ".join([ "%.2g" % x for x in self.inx ])
return "Walkerrandom prob: %s inx: %s" % (probstr, inxstr)
#...............................................................................
def random( self ):
""" each call -> a random int or key with the given probability
fast: 1 randint(), 1 random.uniform(), table lookup
"""
u = random.uniform( 0, 1 )
j = random.randint( 0, self.n - 1 ) # or low bits of u
randint = j if u <= self.prob[j] \
else self.inx[j]
return self.keys[randint] if self.keys \
else randint
#...............................................................................
if __name__ == "__main__":
# little examples, self-contained --
N = 5
Nrand = 1000
randomseed = 1
try:
import bz.util
bz.util.scan_eq_args( globals(), __doc__ ) # N=5 ...
except ImportError:
pass
if randomseed:
random.seed( randomseed )
print Nrand, "Walkerrandom with weights .1 .2 .3 .4:"
w = range( 1, N )
wrand = Walkerrandom( w )
nrand = [0] * (N - 1)
for _ in range( Nrand ):
j = wrand.random()
nrand[j] += 1
s = str( nrand )
print s
if N==5 and Nrand==1000 and randomseed==1:
assert s == "[96, 199, 334, 371]"
print Nrand, "Walkerrandom strings with weights .1 .2 .3 .4:"
abcd = dict( A=1, D=4, C=3, B=2 )
wrand = Walkerrandom( abcd.values(), abcd.keys() )
from collections import defaultdict
nrand = defaultdict(int) # init 0
for _ in range( Nrand ):
j = wrand.random()
nrand[j] += 1
s = str( sorted( nrand.iteritems() ))
print s
if N==5 and Nrand==1000 and randomseed==1:
assert s == "[('A', 105), ('B', 181), ('C', 283), ('D', 431)]"
# end walkerrandom.py