Welcome, guest | Sign In | My Account | Store | Cart

An ordered set using insertion order. It can also be seen as a linked-list with a uniqueness constraint and O(1) lookups.

Python, 204 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
#!/usr/bin/env python
import weakref

class OrderedSet(object):
    """
    A linked-list with a uniqueness constraint and O(1) lookups/removal.

    Modification during iteration is partially supported.  If you
    remove the just yielded element, it will go on to what was the
    next element.  If you remove the next element, it will use the
    new next element.  If you remove both, you get an error.
    """
    def __init__(self, iterable=(), allow_move=True):
        self._map = {}
        self._start = _SentinalNode()
        self._end = _SentinalNode()
        self._start.next = self._end
        self._end.prev = self._start
        self._allow_move = allow_move
        self.extend(iterable)

    def __contains__(self, element):
        return element in self._map

    def __eq__(self, other):
        raise TypeError("OrderedSet does not support comparisons")

    def __hash__(self):
        raise TypeError("OrderedSet is not hashable")

    def __iter__(self):
        curnode = self._start
        nextnode = curnode.next

        while True:
            if hasattr(curnode, 'next'):
                curnode = curnode.next
            elif hasattr(nextnode, 'next'):
                curnode = nextnode
            else:
                raise RuntimeError("OrderedSet modified inappropriately "
                    "during iteration")

            if type(curnode) is _SentinalNode:
                return

            nextnode = curnode.next
            yield curnode.content

    def __reversed__(self):
        curnode = self._end
        prevnode = curnode.prev

        while True:
            if hasattr(curnode, 'prev'):
                curnode = curnode.prev
            elif hasattr(prevnode, 'prev'):
                curnode = prevnode
            else:
                raise RuntimeError("OrderedSet modified inappropriately "
                    "during iteration")

            if type(curnode) is _SentinalNode:
                return

            prevnode = curnode.prev
            yield curnode.content

    def __len__(self):
        return len(self._map)

    def __repr__(self):
        return '%s(%r)' % (self.__class__.__name__, list(self))

    def append(self, element):
        """Add an element to the right side of the OrderedSet."""
        self._insertatnode(self._end.prev, element)

    def appendleft(self, element):
        """Add an element to the left side of the OrderedSet."""
        self._insertatnode(self._start, element)

    def clear(self):
        """Remove all elements from the OrderedSet."""
        while self:
            self.pop()

    def extend(self, iterable):
        """Extend the right side of the OrderedSet with elements from the iterable."""
        for element in iterable:
            self.append(element)

    def extendleft(self, iterable):
        """Extend the left side of the OrderedSet with elements from the iterable."""
        for element in iterable:
            self.appendleft(element)

    def insertleft(self, poselement, element):
        """Inserts element immediately left of poselement's position."""
        self._insertatnode(self._map[poselement].prev, element)

    def insertright(self, poselement, element):
        """Inserts element immediately right of poselement's position."""
        self._insertatnode(self._map[poselement], element)

    def _insertatnode(self, node, element):
        left = node
        right = node.next
        if element in self._map:
            if self._allow_move:
                self.remove(element)
            else:
                raise ValueError("element already exists")

        newnode = _Node()
        newnode.content = element
        newnode.prev = right.prev
        newnode.next = right
        right.prev = newnode
        left.next = newnode
        self._map[element] = newnode

    def pop(self):
        """Remove and return the rightmost element."""
        element = self._end.prev.content
        self.remove(element)
        return element

    def popleft(self):
        """Remove and return the leftmost element."""
        element = self._start.next.content
        self.remove(element)
        return element

    def remove(self, element):
        """Remove element from the OrderedSet."""
        node = self._map.pop(element)
        assert type(node) is not _SentinalNode
        left = node.prev
        right = node.next
        left.next = right
        right.prev = node.prev
        del node.prev
        del node.next


class _Node(object):
    __slots__ = '_prev', 'next', 'content', '__weakref__'
    # A weakref is used for prev so as to avoid creating cycles.

    def _prev_get(self):
        return self._prev()
    def _prev_set(self, value):
        self._prev = weakref.ref(value)
    def _prev_del(self):
        del self._prev
    prev = property(_prev_get, _prev_set, _prev_del)


class _SentinalNode(_Node):
    __slots__ = []


__test__ = {
    '__foo__': """
        >>> OrderedSet(range(10))
        OrderedSet([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
        >>> list(reversed(OrderedSet(range(10))))
        [9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
        >>> stuff = OrderedSet()
        >>> stuff.extendleft(range(20, 25))
        >>> stuff.pop()
        20
        >>> stuff
        OrderedSet([24, 23, 22, 21])
        >>> stuff.insertleft(23, 99)
        >>> stuff
        OrderedSet([24, 99, 23, 22, 21])
        >>> stuff.remove(21)
        >>> stuff
        OrderedSet([24, 99, 23, 22])
        >>> len(stuff)
        4
        >>> 23 in stuff
        True
        >>> 44 in stuff
        False

        >>> OrderedSet([1, 2, 3, 2])
        OrderedSet([1, 3, 2])
        >>> OrderedSet([1, 2, 3, 2], allow_move=False)
        Traceback (most recent call last):
            ...
        ValueError: element already exists
    """,
}


def _test():
    import doctest
    doctest.testmod()

if __name__ == '__main__':
    _test()

I originally conceived this as a linked list with O(1) removal that I could use for a scheduler. The use case for such things seemed a little rare though. One name change later (LinkedList -> OrderedSet) and it seems much broader.

The API (and their docstrings) is mostly taken from deque. There's many operations for sets that aren't supported; most of them don't imply positioning, so they wouldn't make sense. __eq__ could be supported, but then I'd have to figure out how Python uses it. ;)

This is a possible answer to the classic question, "How do I remove duplicates from my list without losing the order?"

Update: Despite the name, this recipe does not support the set API. A similar recipe that does support the set API (although without the ability to insert items at specific points) is http://code.activestate.com/recipes/576694/.

5 comments

Mark Robson 15 years, 8 months ago  # | flag

There appears to be a bug in _insertatnode which can be illustrated by

>>> stuff = OrderedSet()
>>> stuff.append(1)
>>> stuff
OrderedSet([1])
>>> stuff.append(1)
>>> stuff
OrderedSet([])

I think a change along the lines below fixes it

def _insertatnode(self, node, element):
    left = node
    right = node.next
    #start by determining if element exists already. Need to be careful
    #if node or node.next contains the element to be added
    existingNode = self._map.get(element)
    if existingNode:
        if not self._allow_move:
            raise ValueError("element already exists")
        if existingNode == left or existingNode == right:
            return #nothing to do. NB more than just optimisation
        #not optimal. element removed from map only to be added again
        self.remove(element)

    newnode = _Node()
    newnode.content = element
    newnode.prev = right.prev
    newnode.next = right
    right.prev = newnode
    left.next = newnode
    self._map[element] = newnode
Lei You 15 years, 3 months ago  # | flag

The above mentioned fix does not take into account the purpose of allow_move (basically to allow newly added element to be moved to the end)

so really the fix is to update the pointer left or right to make sure that it's not the existingNode.

def _insertatnode(self, node, element):
    left = node
    right = node.next

    if element in self._map:
        if self._allow_move:
            removed = self.remove(element)
            if removed == left:
              left = right.prev
            elif removed == right:
              right = left.next
        elif not self._ignore_duplicate:
            raise ValueError("element already exists")
        else:
            return

    newnode = _Node()
    newnode.content  = element
    newnode.prev = left
    newnode.next = right
    right.prev = newnode
    left.next = newnode
    self._map[element] = newnode

def remove(self, element):
    """Remove element from the OrderedSet."""
    node = self._map.pop(element)
    assert type(node) is not _SentinalNode
    left = node.prev
    right = node.next
    left.next = right
    right.prev = node.prev
    del node.prev
    del node.next
    return node
Lei You 15 years, 3 months ago  # | flag

Actually, I guess I was just being stupid. Sorry guys.

Nagappan Alagappan 14 years, 6 months ago  # | flag

Hello Olsen,

I'm planing to use this module in LDTPv2 - http://ldtp.freedesktop.org under LGPLv2 license. If you have any objections, please write me to nagappan (at) gmail (dot) com. I have just mentioned your name in the file. I could not find your email id to specify in ofderedset.py. You can email me that as well, if you are interested with my proposal :)

You have done a wonderful job.

Thanks Nagappan

Nagappan Alagappan 14 years, 6 months ago  # | flag

Hello Olsen,

While running this, I keep getting exception, so I dropped your OrderedSet class and picked up http://code.activestate.com/recipes/576694/ from your comments.

Thanks Nagappan

Created by Adam Olsen on Thu, 16 Aug 2007 (PSF)
Python recipes (4591)
Adam Olsen's recipes (2)

Required Modules

Other Information and Tasks