Welcome, guest | Sign In | My Account | Store | Cart
from random import *
from heapq import *

cmps = 0

class Int(int):
    def __lt__(self, other):
        global cmps
        cmps += 1
        return int.__lt__(self, other)

def count_cmps(f, data, k):
    'Count comparisons in a call to f(k, data)'
    global cmps
    data = data[:]
    shuffle(data)
    cmps = 0
    result = f(k, data)
    print(cmps, f.__name__, result[:10])

def current_smallest(k, data):
    return nsmallest(k, data)

def heapifying_smallest(k, data):
    heapify(data)
    result = [heappop(data) for j in range(k)]
    data.extend(result)
    return result

def select_nth(data, n):
    if len(data) == 1:
        assert n == 0
        return data[0]
    pivot = choice(data)
    lhs, rhs = [], []
    for elem in data:
        (lhs if elem < pivot else rhs).append(elem)
    if len(lhs) >= n+1:
        return select_nth(lhs, n)
    else:
        return select_nth(rhs, n - len(lhs))

def selecting_smallest(k, data):
    assert 0 <= k < len(data)
    pivot = select_nth(data, k)
    return sorted(elem for elem in data if elem <= pivot)[:k]

if __name__ == '__main__':
    # show that select_nth works
    data = list(range(20))
    shuffle(data)
    print([select_nth(data, k) for k in range(len(data))])

    # compare nsmallest implementations
    n = 100000
    k = 100
    data = list(map(Int, range(n)))
    for f in current_smallest, heapifying_smallest, selecting_smallest:
        for i in range(5):
            count_cmps(f, data, k)


    
        
    

History