Welcome, guest | Sign In | My Account | Store | Cart
from random import *
from heapq import *

cmps = 0

class Int(int):
    def __lt__(self, other):
        global cmps
        cmps += 1
        return int.__lt__(self, other)
    def __le__(self, other):
        global cmps
        cmps += 1
        return int.__le__(self, other)

def count_cmps(f, data, k):
    'Count comparisons in a call to f(k, data)'
    global cmps
    data = data[:]
    shuffle(data)
    cmps = 0
    result = f(k, data)
    assert result[:10] == list(range(10))
    return cmps

# -------- variants of nsmallest -------

def heapifying_smallest(k, data):
    heapify(data)
    result = [heappop(data) for j in range(k)]
    data.extend(result)
    return result

def select_nth(data, n):
    if len(data) == 1:
        return data[0]
    pivot = choice(data)
    lhs, rhs = [], []
    for elem in data:
        (lhs if elem < pivot else rhs).append(elem)
    if len(lhs) >= n+1:
        return select_nth(lhs, n)
    else:
        return select_nth(rhs, n - len(lhs))

def selecting_smallest(k, data):
    pivot = select_nth(data, k)
    return sorted(elem for elem in data if elem <= pivot)[:k]

def partitioning_smallest(n, data):
    if len(data) <= 1:
        return data
    pivot = choice(data)
    lhs, rhs = [], []
    for elem in data:
        (lhs if elem <= pivot else rhs).append(elem)
    if n < len(lhs):
        return partitioning_smallest(n, lhs)
    else:
        return sorted(lhs) + partitioning_smallest(n - len(lhs), rhs)


if __name__ == '__main__':
    # compare nsmallest implementations
    n, k = 100000, 100
    print('n: %d\tk: %d' % (n, k))
    data = list(map(Int, range(n)))
    for f in [nsmallest, heapifying_smallest,
             selecting_smallest, partitioning_smallest]:
        counts = sorted(count_cmps(f, data, k) for i in range(5))
        print(counts, f.__name__)

Diff to Previous Revision

--- revision 2 2011-02-12 06:59:55
+++ revision 3 2011-12-25 23:41:23
@@ -8,6 +8,10 @@
         global cmps
         cmps += 1
         return int.__lt__(self, other)
+    def __le__(self, other):
+        global cmps
+        cmps += 1
+        return int.__le__(self, other)
 
 def count_cmps(f, data, k):
     'Count comparisons in a call to f(k, data)'
@@ -16,10 +20,10 @@
     shuffle(data)
     cmps = 0
     result = f(k, data)
-    print(cmps, f.__name__, result[:10])
+    assert result[:10] == list(range(10))
+    return cmps
 
-def current_smallest(k, data):
-    return nsmallest(k, data)
+# -------- variants of nsmallest -------
 
 def heapifying_smallest(k, data):
     heapify(data)
@@ -29,7 +33,6 @@
 
 def select_nth(data, n):
     if len(data) == 1:
-        assert n == 0
         return data[0]
     pivot = choice(data)
     lhs, rhs = [], []
@@ -41,20 +44,28 @@
         return select_nth(rhs, n - len(lhs))
 
 def selecting_smallest(k, data):
-    assert 0 <= k < len(data)
     pivot = select_nth(data, k)
     return sorted(elem for elem in data if elem <= pivot)[:k]
 
+def partitioning_smallest(n, data):
+    if len(data) <= 1:
+        return data
+    pivot = choice(data)
+    lhs, rhs = [], []
+    for elem in data:
+        (lhs if elem <= pivot else rhs).append(elem)
+    if n < len(lhs):
+        return partitioning_smallest(n, lhs)
+    else:
+        return sorted(lhs) + partitioning_smallest(n - len(lhs), rhs)
+
+
 if __name__ == '__main__':
-    # show that select_nth works
-    data = list(range(20))
-    shuffle(data)
-    print([select_nth(data, k) for k in range(len(data))])
-
     # compare nsmallest implementations
-    n = 100000
-    k = 100
+    n, k = 100000, 100
+    print('n: %d\tk: %d' % (n, k))
     data = list(map(Int, range(n)))
-    for f in current_smallest, heapifying_smallest, selecting_smallest:
-        for i in range(5):
-            count_cmps(f, data, k)
+    for f in [nsmallest, heapifying_smallest,
+             selecting_smallest, partitioning_smallest]:
+        counts = sorted(count_cmps(f, data, k) for i in range(5))
+        print(counts, f.__name__)

History