A Heap wrapper class around the standard heapq, it helps avoid some common bugs with a useful OOP, and it manages a key parameter too.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 | """
Heap.py -- Heap queue algorithm, class version.
V.1.5, Jan 18 2007, by bearophile
This class provides an implementation of the heap queue algorithm, also known as the
priority queue algorithm, using the heapq standard module.
Heaps are arrays for which heap[k] <= heap[2*k+1] and heap[k] <= heap[2*k+2] for all k,
counting elements from zero. For the sake of comparison, non-existing elements are
considered to be infinite. The interesting property of a heap is that heap[0] is always its
smallest element.
The API below differs from textbook heap algorithms in two aspects: (a) We use zero-based
indexing. This makes the relationship between the index for a node and the indexes for its
children slightly less obvious, but is more suitable since Python uses zero-based indexing.
(b) Our pop method returns the smallest item, not the largest (called a "min heap" in
textbooks; a "max heap" is more common in texts because of its suitability for in-place
sorting).
To create a Heap, you can give it a sequence, or you can create an empty Heap and
popultate it later.
Some doctests:
>>> data = [1, 3, 5, 7, 9, 2, 4, 6, 8, 0]
>>> h = Heap()
>>> bool(h)
False
>>> for item in data:
... h.push(item)
>>> bool(h)
True
>>> h
Heap([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
>>> len(h)
10
>>> h.smallest()
0
>>> 7 in h
True
>>> 10 in h
False
>>> set((h,))
Traceback (most recent call last):
...
TypeError: Heap objects are unhashable.
>>> h
Heap([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
>>> h.replace(-5)
0
>>> h
Heap([-5, 1, 2, 3, 4, 5, 6, 7, 8, 9])
>>> h.replace(6)
-5
>>> print h
[1, 2, 3, 4, 5, 6, 6, 7, 8, 9]
>>> h == Heap([1, 3, 2, 6, 6, 5, 4, 7, 8, 9])
True
>>> sorted_data = []
>>> list(h)
[1, 3, 2, 6, 6, 5, 4, 7, 8, 9]
>>> while h:
... sorted_data.append(h.pop())
...
>>> sorted_data
[1, 2, 3, 4, 5, 6, 6, 7, 8, 9]
>>> h2 = Heap(h)
>>> id(h._heap) == (h2._heap)
False
>>> h == h2
True
>>> h != h2
False
>>> h.clear()
>>> h
Heap()
>>> h = Heap([1, 3, 5, 7, 9, 2, 4, 6, 8, 0])
>>> h.popn(4)
[0, 1, 2, 3]
>>> from random import choice
>>> l1 = [choice([0, 0.0]) for _ in xrange(100)]
>>> h1 = Heap(l1)
>>> l2 = [choice([0, 0.0]) for _ in xrange(100)]
>>> h2 = Heap(l2)
>>> h1 == h2
True
>>> h1 > h2
Traceback (most recent call last):
...
TypeError: no ordering relation is defined for Heap
>>> h = Heap()
>>> h.push(5)
>>> h.push(1)
>>> h.push(3)
>>> print h, repr(h)
[1, 3, 5] Heap([1, 3, 5])
>>> for el in h: print el
1
5
3
>>> print 2 in h, 3 in h
False True
>>> h = Heap(key=abs)
>>> h.push(-5)
>>> h.push(-1)
>>> h.push(-3)
>>> print h, repr(h)
[-1, -3, -5] Heap([-1, -3, -5])
>>> for el in h: print el
-1
-5
-3
>>> print -2 in h, -3 in h
False True
>>> l = [-1, -3, -5]
>>> h3 = Heap(l, inplace=True)
>>> id(l) == id(h3._heap)
True
>>> l = [-1, -3, -5]
>>> h4 = Heap(l)
>>> id(l) == id(h4._heap)
False
"""
from operator import itemgetter
from heapq import heapify, heappush, heappop, heapreplace, nlargest, nsmallest
from itertools import izip
class Heap(object):
"""Heap(sequence=None, key=None, inplace=False): this class provides an
implementation of the heap queue algorithm, also known as the priority queue
algorithm, using the standard heapq module.
key specifies a function of one argument that is used to extract a comparison key
from each list element, like the key argument of sort/sorted, ex: key=str.lower
Use inplace=True if sequence is a list and you want to heapify it in place."""
def __init__(self, sequence=None, key=None, inplace=False):
self._key = key
if key is None:
if sequence is None:
self._heap = []
elif isinstance(sequence, self.__class__) and sequence._key is None:
self._heap = sequence._heap[:]
else:
if inplace and isinstance(sequence, list):
self._heap = sequence
heapify(self._heap)
else:
self._heap = list(sequence)
heapify(self._heap)
# Replacing them for speed. Is this a problem for the GC?
self.smallest = self.__smallest_normal
self.push = self.__push_normal
self.pop = self.__pop_normal
self.popn = self.__popn_normal
self.replace = self.__replace_normal
else:
if sequence is None:
self._heap = []
self._itemid = 0
else:
self._heap = [(key(el), pos, el) for pos, el in enumerate(sequence)]
heapify(self._heap)
self._itemid = len(self._heap)
# Replacing them for speed. Is this a problem for the GC?
self.smallest = self.__smallest_key
self.push = self.__push_key
self.pop = self.__pop_key
self.popn = self.__popn_key
self.replace = self.__replace_key
def __smallest_key(self):
"""smallest(self): return the smalled item of the Heap."""
return self._heap[0][2]
def __smallest_normal(self):
"""smallest(self): return the smalled item of the Heap."""
return self._heap[0]
def __push_key(self, item):
"""push(item): push the value item onto the heap, maintaining the heap
invariant."""
# self._itemid helps to avoid using two times the same id, so they are all
# different, so item is never accessed inside __le__
heappush(self._heap, (self._key(item), self._itemid, item) )
self._itemid += 1
def __push_normal(self, item):
"""push(item): push the value item onto the heap, maintaining the heap
invariant."""
heappush(self._heap, item)
def __pop_key(self):
"""pop(): pops and return the smallest item from the heap, maintaining
the heap invariant. If the heap is empty, IndexError is raised."""
return heappop(self._heap)[2]
def __pop_normal(self):
"""pop(): pops and return the smallest item from the heap, maintaining
the heap invariant. If the heap is empty, IndexError is raised."""
return heappop(self._heap)
def __popn_key(self, n):
"""popn(n): pops and return the n smallest items from the heap, maintaining
the heap invariant. If the heap becomes empty, IndexError is raised."""
self__heap = self._heap
return [heappop(self__heap)[2] for _ in xrange(n)]
def __popn_normal(self, n):
"""popn(n): pops and return the n smallest items from the heap, maintaining
the heap invariant. If the heap becomes empty, IndexError is raised."""
self__heap = self._heap
return [heappop(self__heap) for _ in xrange(n)]
def __replace_key(self, item):
"""replace(item): pop and return the smallest item from the heap, and also
push the new item. The heap size doesn't change. If the heap is empty,
IndexError is raised. This is more efficient than heappop() followed by
heappush(), and can be more appropriate when using a fixed-size heap. Note
that the value returned may be larger than item. That constrains reasonable
uses of this routine unless written as part of a conditional replacement:
if item > heap[0]:
item = heapreplace(heap, item)
"""
return heapreplace(self._heap, (self._key(item), self._itemid, item) )[2]
self._itemid += 1
def __replace_normal(self, item):
"""replace(item): pop and return the smallest item from the heap, and also
push the new item. The heap size doesn't change. If the heap is empty,
IndexError is raised. This is more efficient than heappop() followed by
heappush(), and can be more appropriate when using a fixed-size heap. Note
that the value returned may be larger than item. That constrains reasonable
uses of this routine unless written as part of a conditional replacement:
if item > heap[0]:
item = heapreplace(heap, item)
"""
return heapreplace(self._heap, item)
def clear(self):
"""clear(): clears the items of the heap."""
del self._heap[:]
if self._key is not None:
self._itemid = 0
def __hash__(self):
raise TypeError("Heap objects are unhashable.")
def __iter__(self):
if self._key is None:
return self._heap.__iter__()
else:
return (triple[2] for triple in self._heap)
def __eq__(self, other):
if self._key is None:
return isinstance(other, self.__class__) and \
sorted(self._heap) == sorted(other._heap)
else:
if not isinstance(other, self.__class__) or len(self._heap) != len(other._heap):
return False
sorted_self = sorted(self._heap, key=itemgetter(0))
sorted_other = sorted(other._heap, key=itemgetter(0))
for (kel1,_,el1), (kel2,_,el2) in izip(sorted_self, sorted_other):
if kel1 != kel2 or el1 != el2:
return False
return True
def __ne__(self, other):
return not (self == other)
def __lt__(self, other):
raise TypeError('no ordering relation is defined for %s' %
self.__class__.__name__)
__gt__ = __le__ = __ge__ = __lt__
def __len__(self):
return len(self._heap)
def __nonzero__(self):
return bool(self._heap)
def __str__(self):
if self._key is None:
return str(sorted(self._heap))
else:
return str( [el[2] for el in sorted(self._heap, key=itemgetter(0))] )
def __repr__(self):
if self._heap:
return "%s(%s)" % (self.__class__.__name__, self)
else:
return self.__class__.__name__ + "()"
if __name__ == "__main__":
import doctest
doctest.testmod()
print "Doctests finished."
|
I think the comparison between a Heap with a key and a keyless Heap isn't tested enough, there may be some tricky corner cases. An improved and very well tested version of this code may be fit for the standard library too.
Tags: algorithms