Welcome, guest | Sign In | My Account | Store | Cart
def itermerge( *iters ):
    '''Merge multiple sorted inputs into a single sorted output.

    Equivalent to:  sorted(itertools.chain(*iterables))

    >>> list(imerge([1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25]))
    [0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25]

    '''

    def merge( i1, i2 ):
        next1 = iter( i1 ).next
        next2 = iter( i2 ).next
        try:        
            v1 = next1()
        except StopIteration:
            while True:
                yield next2()
        try:
            v2 = next2()
        except StopIteration:
            while True:
                yield next1()
        while True:
            if v1 < v2:
                yield v1
                try:        
                    v1 = next1()
                except StopIteration:
                    yield v2
                    while True:
                        yield next2()
            else:
                yield v2
                try:
                    v2 = next2()
                except StopIteration:
                    yield v1
                    while True:
                        yield next1()
    iters_cnt = len( iters )
    if iters_cnt == 1:
        return iter( iters[0] )
    if iters_cnt == 2:
        return merge( iters[0], iters[1] )
    bisect = iters_cnt / 2
    return merge( itermerge( *iters[:bisect] ), itermerge( *iters[bisect:] ) )
if __name__ == '__main__':
    import doctest
    import itertools
    import heapq
    import random
    from timeit import Timer
    def test_data( dataset_cnt=5, sizerange=(100, 100) ):
        print ( "%s sets between %s and %s"
                    % ( dataset_cnt, sizerange[0], sizerange[1] ) )
        datasets = []
        for li in xrange( dataset_cnt ):
            d = []
            for i in xrange( int( random.uniform( *sizerange ) ) ):
                d.append( int( random.uniform( 0, 99 ) ) )
            d.sort()
            datasets.append( d )
        return tuple( datasets )

    def isort( *iters ):
        return iter( sorted( itertools.chain( *iters ) ) )


    #
    # From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/491285
    #  for comparison
    #


    def imerge(*iterables):
        '''Merge multiple sorted inputs into a single sorted output.

        Equivalent to:  sorted(itertools.chain(*iterables))

        >>> list(imerge([1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25]))
        [0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25]

        '''
        heappop, siftup, _StopIteration = heapq.heappop, heapq._siftup, StopIteration

        h = []
        h_append = h.append
        for it in map(iter, iterables):
            try:
                next = it.next
                h_append([next(), next])
            except _StopIteration:
                pass
        heapq.heapify(h)

        while 1:
            try:
                while 1:
                    v, next = s = h[0]      # raises IndexError when h is empty
                    yield v
                    s[0] = next()           # raises StopIteration when exhausted
                    siftup(h, 0)            # restore heap condition
            except _StopIteration:
                heappop(h)                  # remove empty iterator
            except IndexError:
                return
    #
    #  End
    #
    datasets = test_data( 10, ( 1, 10000 ) )
    verify_result = list( itertools.chain( *datasets ) )
    verify_result.sort()
    test_cnt = 3
    print "%s tests per timing run" % test_cnt

    def test( what ):
        name = what.__name__
        print name," "
        if list( what( *datasets ) ) == verify_result:
            print "Success!!"
        else:
            print "failure"
            return
        set_up = "from __main__ import %s, datasets; gc.enable()" % name
        test_it = "list( %s( *datasets ) )" % name
        print Timer( test_it, set_up ).timeit( test_cnt )

    test( isort )
    test( itermerge )
    test( imerge )

if __name__ == '__main__':
    import doctest
    doctest.testmod()

History

  • revision 2 (16 years ago)
  • previous revisions are not available