from random import random
from math import log, ceil
class Node(object):
__slots__ = 'value', 'next', 'width'
def __init__(self, value, next, width):
self.value, self.next, self.width = value, next, width
class End(object):
'Sentinel object that always compares greater than another object'
def __cmp__(self, other):
return 1
NIL = Node(End(), [], []) # Singleton terminator node
class IndexableSkiplist:
'Sorted collection supporting O(lg n) insertion, removal, and lookup by rank.'
def __init__(self, expected_size=100):
self.size = 0
self.maxlevels = int(1 + log(expected_size, 2))
self.head = Node('HEAD', [NIL]*self.maxlevels, [1]*self.maxlevels)
def __len__(self):
return self.size
def __getitem__(self, i):
node = self.head
i += 1
for level in reversed(range(self.maxlevels)):
while node.width[level] <= i:
i -= node.width[level]
node = node.next[level]
return node.value
def insert(self, value):
# find first node on each level where node.next[levels].value > value
chain = [None] * self.maxlevels
steps_at_level = [0] * self.maxlevels
node = self.head
for level in reversed(range(self.maxlevels)):
while node.next[level].value <= value:
steps_at_level[level] += node.width[level]
node = node.next[level]
chain[level] = node
# insert a link to the newnode at each level
d = min(self.maxlevels, 1 - int(log(random(), 2.0)))
newnode = Node(value, [None]*d, [None]*d)
steps = 0
for level in range(d):
prevnode = chain[level]
newnode.next[level] = prevnode.next[level]
prevnode.next[level] = newnode
newnode.width[level] = prevnode.width[level] - steps
prevnode.width[level] = steps + 1
steps += steps_at_level[level]
for level in range(d, self.maxlevels):
chain[level].width[level] += 1
self.size += 1
def remove(self, value):
# find first node on each level where node.next[levels].value >= value
chain = [None] * self.maxlevels
node = self.head
for level in reversed(range(self.maxlevels)):
while node.next[level].value < value:
node = node.next[level]
chain[level] = node
if value != chain[0].next[0].value:
raise KeyError('Not Found')
# remove one link at each level
d = len(chain[0].next[0].next)
for level in range(d):
prevnode = chain[level]
prevnode.width[level] += prevnode.next[level].width[level] - 1
prevnode.next[level] = prevnode.next[level].next[level]
for level in range(d, self.maxlevels):
chain[level].width[level] -= 1
self.size -= 1
def __iter__(self):
'Iterate over values in sorted order'
node = self.head.next[0]
while node is not NIL:
yield node.value
node = node.next[0]
from collections import deque
from itertools import islice
class RunningMedian:
'Fast running median with O(lg n) updates where n is the window size'
def __init__(self, n, iterable):
self.it = iter(iterable)
self.queue = deque(islice(self.it, n))
self.skiplist = IndexableSkiplist(n)
for elem in self.queue:
self.skiplist.insert(elem)
def __iter__(self):
queue = self.queue
skiplist = self.skiplist
midpoint = len(queue) // 2
yield skiplist[midpoint]
for newelem in self.it:
oldelem = queue.popleft()
skiplist.remove(oldelem)
queue.append(newelem)
skiplist.insert(newelem)
yield skiplist[midpoint]
if __name__ == '__main__':
#########################################################################
# Demonstrate the RunningMedian() class
# Compare results to alternative class implemented using a regular list
from bisect import insort
from random import randrange
class RunningMedianSlow:
'Slow running-median with O(n) updates where n is the window size'
def __init__(self, n, iterable):
self.it = iter(iterable)
self.queue = deque(islice(self.it, n))
self.sortedlist = sorted(self.queue)
def __iter__(self):
queue = self.queue
sortedlist = self.sortedlist
midpoint = len(queue) // 2
yield sortedlist[midpoint]
for newelem in self.it:
oldelem = queue.popleft()
sortedlist.remove(oldelem)
queue.append(newelem)
insort(sortedlist, newelem)
yield sortedlist[midpoint]
M, N, window = 5000, 8000, 1001
data = [randrange(M) for i in range(N)]
result = list(RunningMedian(window, data))
expected = list(RunningMedianSlow(window, data))
assert result == expected
print 'Successful test of RunningMedian() with', N,
print 'items and a window of size', window, '\n'
#########################################################################
# Demonstrate and test the IndexableSkiplist() collection class
from random import shuffle
demo = list('FOURSQUARE')
excluded = list('XZ')
print 'Building an indexable skiplist containing %r:' % ''.join(demo)
s = IndexableSkiplist(len(demo)) # exercise __init__()
shuffle(demo)
for char in demo:
s.insert(char) # check insert()
print 'Adding: ', char, list(s), len(s)
for char in excluded:
print 'Attempting to remove:', char, '-->',
try:
s.remove(char) # check remove() item not in skiplist
except KeyError:
print 'Successfully not found'
assert len(s) == len(demo) # check __len__()
assert list(s) == sorted(demo) # check __iter__()
assert [s[i] for i in range(len(s))] == sorted(demo) # check __getitem__()
shuffle(demo)
for char in demo:
s.remove(char) # check remove() item in skiplist
print 'Removing: ', char, list(s), len(s)
assert list(s) == [] and len(s) == 0
#############################################################################
# Show the IndexableSkiplist's internal structure and describe its invariants
def iter_levels(skiplist, level):
'Iterate through nodes on a given level.'
node = skiplist.head
while node is not NIL:
yield node
node = node.next[level]
demo = list('HOPSCOTCH') # create a sample skiplist
s = IndexableSkiplist(len(demo))
for char in demo:
s.insert(char)
print '\nStructure of an indexable skiplist containing %r:' % ''.join(demo)
for i in reversed(range(s.maxlevels)): # show the link structure on each level
print 'Level %d: ' % i,
line = ''
for n in iter_levels(s, i):
line += '%s%d %s' % (n.value, n.width[i], ' ' * (n.width[i] - 1))
print line + 'NIL'
print '\nNote the following structure invariants:'
print '* Level 0 has widths all equal to one.'
print '* The sum of widths on each level == len(skiplist)+1.'
print '* Entries on higher levels are subsets of the lower levels.'
print '* Widths of entries on higer levels sum to the widths below.'
print ' For example, a P --> T link of two steps on one level'
print ' corresponds to P --> S --> T links of one step each on level 0.'