Welcome, guest | Sign In | My Account | Store | Cart

This structure is a kind of dictionary which allows you to map data intervals to values. You can then query the structure for a given point, and it returns the value associated to the interval which contains the point. Boundary values don't need to be an integer ; indeed in the unit test I use a datetime object.

Python, 217 lines
 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 from bisect import bisect_left, bisect_right from itertools import izip class intervalmap(object): """ This class maps a set of intervals to a set of values. >>> i = intervalmap() >>> i[0:5] = '0-5' >>> i[8:12] = '8-12' >>> print i 0-5 >>> print i 8-12 >>> print repr(i[-1]) None >>> print repr(i) None >>> i[4:9] = '4-9' >>> print [(j,i[j]) for j in range(6)] [(0, '0-5'), (1, '0-5'), (2, '0-5'), (3, '0-5'), (4, '4-9'), (5, '4-9')] >>> print list(i.items()) [((0, 4), '0-5'), ((4, 9), '4-9'), ((9, 12), '8-12')] >>> i[:0] = 'less than 0' >>> i[-5] 'less than 0' >>> i '0-5' >>> print list(i.items()) [((None, 0), 'less than 0'), ((0, 4), '0-5'), ((4, 9), '4-9'), ((9, 12), '8-12')] >>> i[21:] = 'more than twenty' >>> i 'more than twenty' >>> i[10.5:15.5] = '10.5-15.5' >>> i[11.5] '10.5-15.5' >>> i[0.5] '0-5' >>> print list(i.items()) [((None, 0),... ((9, 10.5), '8-12'), ((10.5, 15.5), '10.5-15.5'), ((21, None),... >>> i = intervalmap() >>> i[0:2] = 1 >>> i[2:8] = 2 >>> i[4:] = 3 >>> i[5:6] = 4 >>> i {[0, 2] => 1, [2, 4] => 2, [4, 5] => 3, [5, 6] => 4, [6, None] => 3} """ def __init__(self): """ Initializes an empty intervalmap. """ self._bounds = [] self._items = [] self._upperitem = None def __setitem__(self,_slice,_value): """ Sets an interval mapping. """ assert isinstance(_slice,slice), 'The key must be a slice object' if _slice.start is None: start_point = -1 else: start_point = bisect_left(self._bounds,_slice.start) if _slice.stop is None: end_point = -1 else: end_point = bisect_left(self._bounds,_slice.stop) if start_point>=0: if start_point < len(self._bounds) and self._bounds[start_point]<_slice.start: start_point += 1 if end_point>=0: self._bounds[start_point:end_point] = [_slice.start,_slice.stop] if start_point < len(self._items): self._items[start_point:end_point] = [self._items[start_point],_value] else: self._items[start_point:end_point] = [self._upperitem,_value] else: self._bounds[start_point:] = [_slice.start] if start_point < len(self._items): self._items[start_point:] = [self._items[start_point],_value] else: self._items[start_point:] = [self._upperitem] self._upperitem = _value else: if end_point>=0: self._bounds[:end_point] = [_slice.stop] self._items[:end_point] = [_value] else: self._bounds[:] = [] self._items[:] = [] self._upperitem = _value def __getitem__(self,_point): """ Gets a value from the mapping. """ assert not isinstance(_point,slice), 'The key cannot be a slice object' index = bisect_right(self._bounds,_point) if index < len(self._bounds): return self._items[index] else: return self._upperitem def items(self): """ Returns an iterator with each item being ((low_bound,high_bound), value). The items are returned in order. """ previous_bound = None for b,v in izip(self._bounds,self._items): if v is not None: yield (previous_bound,b), v previous_bound = b if self._upperitem is not None: yield (previous_bound,None), self._upperitem def values(self): """ Returns an iterator with each item being a stored value. The items are returned in order. """ for v in self._items: if v is not None: yield v if self._upperitem is not None: yield self._upperitem def __repr__(self): s = [] for b,v in self.items(): if v is not None: s.append('[%r, %r] => %r'%( b, b, v )) return '{'+', '.join(s)+'}' if __name__ == "__main__": # Test 1 i = intervalmap() i[9:] = "!" assert repr(i) == "{[9, None] => '!'}" i[:5] = "Hello" i[6:7] = "World" assert repr(i) == "{[None, 5] => 'Hello', [6, 7] => 'World', [9, None] => '!'}" i[8:10] = "(Test)" assert repr(i) == "{[None, 5] => 'Hello', [6, 7] => 'World', [8, 10] => '(Test)', [10, None] => '!'}" i[:3] = 'My,' assert repr(i) == "{[None, 3] => 'My,', [3, 5] => 'Hello', [6, 7] => 'World', [8, 10] => '(Test)', [10, None] => '!'}" i[5.5:6] = "Cruel" assert repr(i) == "{[None, 3] => 'My,', [3, 5] => 'Hello', [5.5, 6] => 'Cruel', [6, 7] => 'World', [8, 10] => '(Test)', [10, None] => '!'}" i[6:6.5] = "And Harsh" assert repr(i) == "{[None, 3] => 'My,', [3, 5] => 'Hello', [5.5, 6] => 'Cruel', [6, 6.5] => 'And Harsh', [6.5, 7] => 'World', [8, 10] => '(Test)', [10, None] => '!'}" i[5.9:6.6] = None assert repr(i) == "{[None, 3] => 'My,', [3, 5] => 'Hello', [5.5, 5.9000000000000004] => 'Cruel', [6.5999999999999996, 7] => 'World', [8, 10] => '(Test)', [10, None] => '!'}" assert ' '.join(i.values()) == "My, Hello Cruel World (Test) !" print 'Test 1 OK' # Test 2 i = intervalmap() i[:0] = 'A' i[2:5] = 'B' i[8:10] = 'C' i[12:] = 'D' assert repr(i) == "{[None, 0] => 'A', [2, 5] => 'B', [8, 10] => 'C', [12, None] => 'D'}" i[:] = 'K' assert repr(i) == "{[None, None] => 'K'}" assert i == 'K' i[0:10] = 'L' i[6:8] = 'M' i[20:] = 'J' assert i[-1] == 'K' assert i == 'L' assert i == 'M' assert i == 'L' assert i == 'K' assert i == 'J' print 'Test 2 OK' # Test 3 try: from datetime import datetime except: print 'Test 3 skipped' else: i = intervalmap() i[:datetime(2005,10,24)] = 'A' i[datetime(2005,11,11):datetime(2005,11,17)] = 'B' i[datetime(2005,11,30):] = 'C' assert i[datetime(2005,9,25)] == 'A' assert i[datetime(2005,10,23)] == 'A' assert i[datetime(2005,10,26)] == None assert i[datetime(2005,11,9)] == None assert i[datetime(2005,11,16)] == 'B' assert i[datetime(2005,11,23)] == None assert i[datetime(2005,11,29)] == None assert i[datetime(2005,11,30)] == 'C' assert i[datetime(2005,12,3)] == 'C' print 'Test 3 OK' try: import doctest except: print 'Skipping the doctests' else: print 'And now, the doctests' doctest.testmod(optionflags=doctest.ELLIPSIS)

This class uses bisect to ensure a O(log2 n) insert and lookup time.

The insert algorithm tries to do "the right thing" when overlapping intervals are inserted, see the unit tests. As a general rule, an inserted interval override every other mapping which was defined in its range. Other intervals are splitted if required.

To define a default value for the whole space, use i[:] = default_value (see the second unit test). David Leuschner 16 years, 1 month ago

bug + fix. I think I have found a bug when setting open ended intervals:

>>> i = intervalmap()
>>> i[0:2] = 1
>>> i[2:8] = 2
>>> i[4:] = 3
>>> i
{[0, 2] => 1, [2, 4] => 2, [4, None] => 3}

The representation is right but the internal state seems wrong:

>>> i._items
[None, 1, 2, 2]

The last element shouldn't be 2 but 3. When setting another interval the error also shows up in the representation:

>>> i[5:6] = 4
>>> i
{[0, 2] => 1, [2, 4] => 2, [4, 5] => 2, [5, 6] => 4, [6, None] => 3}

This is wrong. The interval [4, 5] should be mapped to 3 instead of 2. The following patch fixes this:

--- old.py      2005-11-28 14:29:52.000000000 +0100
+++ new.py      2005-11-28 14:30:45.000000000 +0100
@@ -77,9 +77,9 @@     def __setitem__(self,_slice,_value):
else:
self._bounds[start_point:] = [_slice.start]
if start_point &lt; len(self._items):
-                    self._items[start_point:end_point] = [self._items[start_point]]
+                    self._items[start_point:] = [self._items[start_point],_value]
else:
-                    self._items[start_point:end_point] = [self._upperitem]
+                    self._items[start_point:] = [self._upperitem]
self._upperitem = _value
else:
if end_point>=0:

All the other tests still succeed. Thanks. I've integrated your fix and added a unit test. Thanks a lot ! Kevin Jacobs 15 years, 11 months ago

Asymptotic analysis. A neat little recipe, but I am concerned that the asymptotic time complexity of interval insertion. I believe that the current version is not O(lg n) due to the O(n) step to update the bounds and items lists.

Can you comment, Nicolas? Nicolas Lehuen (author) 15 years, 9 months ago

You're right for insertion time, not lookup time. You're right, a seemingly innocuous statement like self._bounds[start_point:end_point] = [_slice.start,_slice.stop] is indeed O(n). So insertion time is not O(log n).

However, and in my usage pattern it's the most important thing, the lookup remains O(log n). This is the kind of data structure that you will write to once in a while but lookup a lot.

Solving the insertion time asymptotic limit would require using a tree structure with appropriate algorithm so that replacing a range of keys could be done in O(n log n). That is certainly feasable but a bit too much for this recipe.

Regards,

Nicolas

P.S. I'm sorry for the delay in answering your question, but the cookbook doesn't notify the recipe authors about new posts. It's about time this cookbook gets updated with recent innovations in mind, like e-mailing or even crazy technologies like RSS. Arnd Zapletal 15 years, 2 months ago

Unifying intervals. Useful recipe, thanks a lot! I easily managed to retrieve the interval bounds for an arbitrary given point. Therefore now my question: Is there a simple way to glue neighbouring intervals with identical values? У F 13 years, 2 months ago

Hi!

Thank you very much for your code!

I have a little idea to improve it: deletion of slices and optimizations after such delete. Here's the patch:

77a78,80
>             if end_point >= 0 and end_point < len(self._bounds) and self._bounds[end_point]<=_slice.stop:
>                 end_point += 1
>
99c102,107
<
---
>         self._optimize()
>
>     def __delitem__(self, _slice):
>         assert isinstance(_slice,slice), 'The key must be a slice object'
>         self.__setitem__(_slice, None)
>
110a119,128
>
>     def _optimize(self):
>         i = 0
>         while i < len(self._items)-1:
>             if self._items[i] == self._items[i+1]:
>                 #del items[i]
>                 del self._items[i]
>                 del self._bounds[i]
>             else:
>                 i += 1 Created by Nicolas Lehuen on Tue, 22 Nov 2005 (PSF)