Welcome, guest | Sign In | My Account | Store | Cart
from random import *
from heapq import *

cmps = 0

class Int(int):
def __lt__(self, other):
global cmps
cmps += 1
return int.__lt__(self, other)

def count_cmps(f, data, k):
'Count comparisons in a call to f(k, data)'
global cmps
data = data[:]
shuffle(data)
cmps = 0
result = f(k, data)
print(cmps, f.__name__, result[:10])

def current_smallest(k, data):
return nsmallest(k, data)

def heapifying_smallest(k, data):
heapify(data)
result = [heappop(data) for j in range(k)]
data.extend(result)
return result

def select_nth(data, n):
if len(data) == 1:
assert n == 0
return data[0]
pivot = choice(data)
lhs, rhs = [], []
for elem in data:
(lhs if elem < pivot else rhs).append(elem)
if len(lhs) >= n+1:
return select_nth(lhs, n)
else:
return select_nth(rhs, n - len(lhs))

def selecting_smallest(k, data):
assert 0 <= k < len(data)
pivot = select_nth(data, k)
return sorted(elem for elem in data if elem <= pivot)[:k]

if __name__ == '__main__':
# show that select_nth works
data = list(range(20))
shuffle(data)
print([select_nth(data, k) for k in range(len(data))])

# compare nsmallest implementations
n = 100000
k = 100
data = list(map(Int, range(n)))
for f in current_smallest, heapifying_smallest, selecting_smallest:
for i in range(5):
count_cmps(f, data, k)