An interval mapping data structure « Python recipes

This structure is a kind of dictionary which allows you to map data intervals to values. You can then query the structure for a given point, and it returns the value associated to the interval which contains the point. Boundary values don't need to be an integer ; indeed in the unit test I use a datetime object.

      from bisect import bisect_left, bisect_right
from itertools import izip

class intervalmap(object):
    """
        This class maps a set of intervals to a set of values.
        
        >>> i = intervalmap()
        >>> i[0:5] = '0-5'
        >>> i[8:12] = '8-12'
        >>> print i[2]
        0-5
        >>> print i[10]
        8-12
        >>> print repr(i[-1])
        None
        >>> print repr(i[17])
        None
        >>> i[4:9] = '4-9'
        >>> print [(j,i[j]) for j in range(6)]
        [(0, '0-5'), (1, '0-5'), (2, '0-5'), (3, '0-5'), (4, '4-9'), (5, '4-9')]
        >>> print list(i.items())
        [((0, 4), '0-5'), ((4, 9), '4-9'), ((9, 12), '8-12')]
        >>> i[:0] = 'less than 0'
        >>> i[-5]
        'less than 0'
        >>> i[0]
        '0-5'
        >>> print list(i.items())
        [((None, 0), 'less than 0'), ((0, 4), '0-5'), ((4, 9), '4-9'), ((9, 12), '8-12')]
        >>> i[21:] = 'more than twenty'
        >>> i[42]
        'more than twenty'
        >>> i[10.5:15.5] = '10.5-15.5'
        >>> i[11.5]
        '10.5-15.5'
        >>> i[0.5]
        '0-5'
        >>> print list(i.items())
        [((None, 0),... ((9, 10.5), '8-12'), ((10.5, 15.5), '10.5-15.5'), ((21, None),...
        >>> i = intervalmap()
        >>> i[0:2] = 1
        >>> i[2:8] = 2
        >>> i[4:] = 3
        >>> i[5:6] = 4  
        >>> i
        {[0, 2] => 1, [2, 4] => 2, [4, 5] => 3, [5, 6] => 4, [6, None] => 3}
    """

    def __init__(self):
        """
            Initializes an empty intervalmap.
        """
        self._bounds = []
        self._items = []
        self._upperitem = None
        
    def __setitem__(self,_slice,_value):
        """
            Sets an interval mapping.
        """
        assert isinstance(_slice,slice), 'The key must be a slice object'
    
        if _slice.start is None:
            start_point = -1
        else:
            start_point = bisect_left(self._bounds,_slice.start)
        
        if _slice.stop is None:
            end_point = -1
        else:
            end_point = bisect_left(self._bounds,_slice.stop)
        
        if start_point>=0:
            if start_point < len(self._bounds) and self._bounds[start_point]<_slice.start:
                start_point += 1 

            if end_point>=0:        
                self._bounds[start_point:end_point] = [_slice.start,_slice.stop]
                if start_point < len(self._items):
                    self._items[start_point:end_point] = [self._items[start_point],_value]
                else:
                    self._items[start_point:end_point] = [self._upperitem,_value]
            else:
                self._bounds[start_point:] = [_slice.start]
                if start_point < len(self._items):
                    self._items[start_point:] = [self._items[start_point],_value]
                else:
                    self._items[start_point:] = [self._upperitem]
                self._upperitem = _value
        else:
            if end_point>=0:
                self._bounds[:end_point] = [_slice.stop]
                self._items[:end_point] = [_value]
            else:
                self._bounds[:] = []
                self._items[:] = []
                self._upperitem = _value
    
    def __getitem__(self,_point):
        """
            Gets a value from the mapping.
        """
        assert not isinstance(_point,slice), 'The key cannot be a slice object'  
            
        index = bisect_right(self._bounds,_point)
        if index < len(self._bounds):
            return self._items[index]
        else:
            return self._upperitem

    def items(self):
        """
            Returns an iterator with each item being
            ((low_bound,high_bound), value). The items are returned
            in order.
        """
        previous_bound = None
        for b,v in izip(self._bounds,self._items):
            if v is not None:
                yield (previous_bound,b), v
            previous_bound = b
        if self._upperitem is not None:
            yield (previous_bound,None), self._upperitem

    def values(self):
        """
            Returns an iterator with each item being a stored value. The items
            are returned in order.
        """
        for v in self._items:
            if v is not None:
                yield v
        if self._upperitem is not None:
            yield self._upperitem

    def __repr__(self):
        s = []
        for b,v in self.items():
            if v is not None:
                s.append('[%r, %r] => %r'%(
                    b[0],
                    b[1],
                    v
                ))
        return '{'+', '.join(s)+'}'
        
if __name__ == "__main__":
    # Test 1
    i = intervalmap()
    i[9:] = "!"
    assert repr(i) == "{[9, None] => '!'}"
    i[:5] = "Hello"
    i[6:7] = "World"
    assert repr(i) == "{[None, 5] => 'Hello', [6, 7] => 'World', [9, None] => '!'}"
    i[8:10] = "(Test)"
    assert repr(i) == "{[None, 5] => 'Hello', [6, 7] => 'World', [8, 10] => '(Test)', [10, None] => '!'}"
    i[:3] = 'My,'
    assert repr(i) == "{[None, 3] => 'My,', [3, 5] => 'Hello', [6, 7] => 'World', [8, 10] => '(Test)', [10, None] => '!'}"
    i[5.5:6] = "Cruel"
    assert repr(i) == "{[None, 3] => 'My,', [3, 5] => 'Hello', [5.5, 6] => 'Cruel', [6, 7] => 'World', [8, 10] => '(Test)', [10, None] => '!'}"
    i[6:6.5] = "And Harsh"
    assert repr(i) == "{[None, 3] => 'My,', [3, 5] => 'Hello', [5.5, 6] => 'Cruel', [6, 6.5] => 'And Harsh', [6.5, 7] => 'World', [8, 10] => '(Test)', [10, None] => '!'}"
    i[5.9:6.6] = None
    assert repr(i) == "{[None, 3] => 'My,', [3, 5] => 'Hello', [5.5, 5.9000000000000004] => 'Cruel', [6.5999999999999996, 7] => 'World', [8, 10] => '(Test)', [10, None] => '!'}"
    assert ' '.join(i.values()) == "My, Hello Cruel World (Test) !"
    print 'Test 1 OK'

    # Test 2
    i = intervalmap()
    i[:0] = 'A'
    i[2:5] = 'B'
    i[8:10] = 'C'
    i[12:] = 'D'
    assert repr(i) == "{[None, 0] => 'A', [2, 5] => 'B', [8, 10] => 'C', [12, None] => 'D'}"
    i[:] = 'K'
    assert repr(i) == "{[None, None] => 'K'}"
    assert i[5] == 'K'
    i[0:10] = 'L'
    i[6:8] = 'M'
    i[20:] = 'J'
    assert i[-1] == 'K'
    assert i[5] == 'L'
    assert i[7] == 'M'
    assert i[9] == 'L'
    assert i[15] == 'K'
    assert i[42] == 'J'
    print 'Test 2 OK'
    
    # Test 3
    try:
        from datetime import datetime
    except:
        print 'Test 3 skipped'
    else:
        i = intervalmap()
        i[:datetime(2005,10,24)] = 'A'
        i[datetime(2005,11,11):datetime(2005,11,17)] = 'B'
        i[datetime(2005,11,30):] = 'C'
        assert i[datetime(2005,9,25)] == 'A'
        assert i[datetime(2005,10,23)] == 'A'
        assert i[datetime(2005,10,26)] == None
        assert i[datetime(2005,11,9)] == None
        assert i[datetime(2005,11,16)] == 'B'
        assert i[datetime(2005,11,23)] == None
        assert i[datetime(2005,11,29)] == None
        assert i[datetime(2005,11,30)] == 'C'
        assert i[datetime(2005,12,3)] == 'C'
        print 'Test 3 OK'

    try:
        import doctest
    except:
        print 'Skipping the doctests'
    else:
        print 'And now, the doctests'    
        doctest.testmod(optionflags=doctest.ELLIPSIS)

      

This class uses bisect to ensure a O(log2 n) insert and lookup time.

The insert algorithm tries to do "the right thing" when overlapping intervals are inserted, see the unit tests. As a general rule, an inserted interval override every other mapping which was defined in its range. Other intervals are splitted if required.

To define a default value for the whole space, use i[:] = default_value (see the second unit test).

Tags: algorithms

6 comments

David Leuschner 18 years, 5 months ago # | flag

bug + fix. I think I have found a bug when setting open ended intervals:

>>> i = intervalmap()
>>> i[0:2] = 1
>>> i[2:8] = 2
>>> i[4:] = 3
>>> i
{[0, 2] => 1, [2, 4] => 2, [4, None] => 3}

The representation is right but the internal state seems wrong:

>>> i._items
[None, 1, 2, 2]

The last element shouldn't be 2 but 3. When setting another interval the error also shows up in the representation:

>>> i[5:6] = 4
>>> i
{[0, 2] => 1, [2, 4] => 2, [4, 5] => 2, [5, 6] => 4, [6, None] => 3}

This is wrong. The interval [4, 5] should be mapped to 3 instead of 2. The following patch fixes this:

--- old.py      2005-11-28 14:29:52.000000000 +0100
+++ new.py      2005-11-28 14:30:45.000000000 +0100
@@ -77,9 +77,9 @@     def __setitem__(self,_slice,_value):
             else:
                 self._bounds[start_point:] = [_slice.start]
                 if start_point &lt; len(self._items):
-                    self._items[start_point:end_point] = [self._items[start_point]]
+                    self._items[start_point:] = [self._items[start_point],_value]
                 else:
-                    self._items[start_point:end_point] = [self._upperitem]
+                    self._items[start_point:] = [self._upperitem]
                 self._upperitem = _value
         else:
             if end_point>=0:

All the other tests still succeed.

Nicolas Lehuen (author) 18 years, 3 months ago # | flag

Thanks. I've integrated your fix and added a unit test. Thanks a lot !

Kevin Jacobs 18 years, 2 months ago # | flag

Asymptotic analysis. A neat little recipe, but I am concerned that the asymptotic time complexity of interval insertion. I believe that the current version is not O(lg n) due to the O(n) step to update the bounds and items lists.

Can you comment, Nicolas?

Nicolas Lehuen (author) 18 years, 1 month ago # | flag

You're right for insertion time, not lookup time. You're right, a seemingly innocuous statement like self._bounds[start_point:end_point] = [_slice.start,_slice.stop] is indeed O(n). So insertion time is not O(log n).

However, and in my usage pattern it's the most important thing, the lookup remains O(log n). This is the kind of data structure that you will write to once in a while but lookup a lot.

Solving the insertion time asymptotic limit would require using a tree structure with appropriate algorithm so that replacing a range of keys could be done in O(n log n). That is certainly feasable but a bit too much for this recipe.

Regards,

Nicolas

P.S. I'm sorry for the delay in answering your question, but the cookbook doesn't notify the recipe authors about new posts. It's about time this cookbook gets updated with recent innovations in mind, like e-mailing or even crazy technologies like RSS.

Arnd Zapletal 17 years, 6 months ago # | flag

Unifying intervals. Useful recipe, thanks a lot! I easily managed to retrieve the interval bounds for an arbitrary given point. Therefore now my question: Is there a simple way to glue neighbouring intervals with identical values?

У F 15 years, 6 months ago # | flag

Hi!

Thank you very much for your code!

I have a little idea to improve it: deletion of slices and optimizations after such delete. Here's the patch:

77a78,80
>             if end_point >= 0 and end_point < len(self._bounds) and self._bounds[end_point]<=_slice.stop:
>                 end_point += 1
> 
99c102,107
<     
---
>         self._optimize()
> 
>     def __delitem__(self, _slice):
>         assert isinstance(_slice,slice), 'The key must be a slice object'
>         self.__setitem__(_slice, None)
> 
110a119,128
> 
>     def _optimize(self):
>         i = 0
>         while i < len(self._items)-1:
>             if self._items[i] == self._items[i+1]:
>                 #del items[i]
>                 del self._items[i]
>                 del self._bounds[i]
>             else:
>                 i += 1

◄	Python recipes (4591)	►
◄	Nicolas Lehuen's recipes (7)	►

An interval mapping data structure (Python recipe) by Nicolas Lehuen
ActiveState Code (http://code.activestate.com/recipes/457411/)

6 comments

Forks

Tags

Required Modules

Other Information and Tasks

Accounts

Code Recipes

Feedback & Information

ActiveState

An interval mapping data structure (Python recipe) by Nicolas Lehuen ActiveState Code (http://code.activestate.com/recipes/457411/)

6 comments

Forks

Tags

Required Modules

Other Information and Tasks

Accounts

Code Recipes

Feedback & Information

ActiveState

An interval mapping data structure (Python recipe) by Nicolas Lehuen
ActiveState Code (http://code.activestate.com/recipes/457411/)