Linked list implementation based on Python 2.7 collections.MutableSequence with a few benchmarks comparing the linked list with the built-in list type.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 | import collections
import time
# Requires Python 2.7
class Node(object):
def __init__(self, value):
self.value = value
self.next = None
self.prev = None
def __repr__(self):
if not self:
return '%s()' % (self.__class__.__name__,)
return '%s(%r)' % (self.__class__.__name__, self.value)
class LinkedList(collections.MutableSequence):
def __init__(self, iterable=None):
self.sentinel = Node(None)
self.sentinel.next = self.sentinel
self.sentinel.prev = self.sentinel
self.__len = 0
if iterable is not None:
self += iterable
def get_node(self, index):
node = sentinel = self.sentinel
i = 0
while i <= index:
node = node.next
if node == sentinel:
break
i += 1
if node == sentinel:
node = None
return node
def __getitem__(self, index):
node = self.__get_node(index)
return node.value
def __len__(self):
return self.__len
def __setitem__(self, index, value):
node = self.get_node(index)
node.value = value
def __delitem__(self, index):
node = self.get_node(index)
if node:
node.prev.next = node.next
if node.next:
node.next.prev = node.prev
node.prev = None
node.next = None
node.value = None
self.__len -= 1
def __repr__(self):
if not self:
return '%s()' % (self.__class__.__name__,)
list_ = [self.__get_node(i).value for i in range(len(self))]
return '%s(%r)' % (self.__class__.__name__, list_)
def append(self, value):
sentinel = self.sentinel
node = Node(value)
self.insert_between(node, sentinel.prev, sentinel)
def insert(self, index, value):
sentinel = self.sentinel
new_node = Node(value)
len_ = len(self)
if len_ == 0:
self.insert_between(new_node, sentinel, sentinel)
elif index >= 0 and index < len_:
node = self.get_node(index)
self.insert_between(new_node, node.prev, node)
elif index == len_:
self.insert_between(new_node, sentinel.prev, sentinel)
else:
raise IndexError
self.__len += 1
def insert_between(self, node, left_node, right_node):
if node and left_node and right_node:
node.prev = left_node
node.next = right_node
left_node.next = node
right_node.prev = node
else:
raise IndexError
class Stopwatch(object):
def __init__(self):
self.__start = 0.0
self.__stop = 0.0
self.__duration = 0.0
def start(self):
self.__start = time.time()
return self
def stop(self):
self.__stop = time.time()
self.__duration = self.__stop - self.__start
return self.__duration
def duration(self):
return self.__duration
class Profiler(object):
def __init__(self, size):
self.size = size
self.list = None
self.linked_list = None
self.sw_create_list = Stopwatch()
self.sw_create_linked_list = Stopwatch()
self.sw_pop_list = Stopwatch()
self.sw_pop_linked_list = Stopwatch()
def create_list(self):
self.sw_create_list.start()
self.list = [i for i in range(self.size)]
self.sw_create_list.stop()
def create_linked_list(self):
self.sw_create_linked_list.start()
self.linked_list = LinkedList()
for value in self.list:
self.linked_list.append(value)
self.sw_create_linked_list.stop()
def pop_list(self):
self.sw_pop_list.start()
for i in range(self.size):
del self.list[0]
self.sw_pop_list.stop()
def pop_linked_list(self):
self.sw_pop_linked_list.start()
for i in range(self.size):
del self.linked_list[0]
self.sw_pop_linked_list.stop()
def report(self):
print("%6s %10d" % ("Size", self.size))
print("%6s %10s %10s %10s" % ("Type", "Create", "Pop", "Total"))
print("%6s %10.2f %10.2f %10.2f" % ("List", self.sw_create_list.duration(), \
self.sw_pop_list.duration(), self.sw_create_list.duration() + self.sw_pop_list.duration()))
print("%6s %10.2f %10.2f %10.2f" % ("Linked", self.sw_create_linked_list.duration(), \
self.sw_pop_linked_list.duration(), self.sw_create_linked_list.duration() + \
self.sw_pop_linked_list.duration()))
print
def run(self):
self.create_list()
self.pop_list()
self.create_linked_list()
self.pop_linked_list()
self.report()
if __name__ == '__main__':
Profiler(1000).run()
Profiler(2000).run()
Profiler(5000).run()
Profiler(10000).run()
Profiler(20000).run()
Profiler(50000).run()
Profiler(100000).run()
print "Complete."
|
The conventional Python wisdom is to not to bother with linked lists and use the built-in list type or possibly a deque if one is popping items from both ends. Well, the conventional wisdom is usually right and so it is in this case.
Nonetheless I had an unusual call for a list in which I would often be popping left as well as inserting into and deleting from the middle, while traversing the list item by item, without requiring random access. This is a textbook case for a linked list. I was also curious to extend the new collections library, which is quite well-organized for subclassing .
So I decided to write a linked list and compare its performance with the regular list in deleting the list by popping off the first item repeatedly. The deletion results were as expected: the linked list was O(n) and the list O(n**2). On my machine the crossover point was around 5,000 items, becoming an order of magnitude different at 50,000.
Of course, deleting a list an item at a time from the beginning is about the worst-case scenario for using a list, and ignores the list's supreme strength for random access, so I hope readers understand that I offer this recipe for instructional purposes.
I found myself impressed with the performance of the Python list. Between the optimizations in C and those that occur further down in silicon for block moves, the Python list is a formidable object, even when misused.
The handcrafted linked list still has its place in embedded applications, or in applications lacking the container support found in today's frameworks, or in freshman computer science classes, but in Python the list rules unless one is dealing with huge lists. In the latter case one is better off searching out a specialized library for the occasion rather than using a basic linked list as in this recipe.
Yes, the Python structures are impressive but for special cases they can be beaten. Good work. Have you tried comparing your list with the blist at http://pypi.python.org/pypi/blist/ ? It is a dynamic list built on a B-Tree IIRC. Should be an interesting comparison.
Thanks, Muhammad! I added benchmarks for blist and it handles the popleft deletes like a champ, better than my little linked list:
Size 100000 Type Create Pop Total List 0.02 6.25 6.27 Linked 0.00 0.16 0.16 Blist 0.00 0.08 0.08
This is what I expected -- that a specialized library would be the right choice for huge lists.
Hmm...let's try that table again:
Why do you say 2.7 is required? I just ran this on 2.6 and it worked just fine.
What about fetch by index? That's one of the cases where python list is different from other list-based ADTs. The get and set item operations are constant time in a python list. The same thing can't be said about a linked list.