ActiveState Code

Recipe 213027: Splitting iterators


Implementation of isplit, a function that splits iterators into two equal ones, which return similar values, but are exact copies of one another.

Python
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import copy

class _IDup(object):
        """Internal class used only to keep a reference on the actual iterator,
        and to do housekeeping."""

        def __init__(self,iterin):
                self.__iter = iterin
                self.__iterno = 0
                self.__iteritems = []
                self.__hasstopped = None

        def registerIter(self,oldno=-1):
                iterno = self.__iterno
                self.__iterno += 1
                if oldno == -1:
                        self.__iteritems.append([])
                else:
                        self.__iteritems.append(
                                copy.deepcopy(self.__iteritems[oldno])
                                )
                return iterno

        def getNext(self,iterno):
                if self.__iteritems[iterno]:
                        iteritem = self.__iteritems[iterno].pop(0)
                elif self.__hasstopped is not None:
                        raise self.__hasstopped
                else:
                        try:
                                iteritem = self.__iter.next()
                        except StopIteration, e:
                                self.__hasstopped = e
                                raise
                        for id, i in enumerate(self.__iteritems):
                                if id <> iterno:
                                        i.append(copy.deepcopy(iteritem))
                return iteritem

class _IDupped(object):
        """Duplicated Iterator class. Each iterator you get by calling isplit
        or split on a splitted iterator will be of this type."""

        def __init__(self,idup,oldno=-1):
                self.__idup = idup
                self.__iterno = idup.registerIter(oldno)

        def next(self):
                return self.__idup.getNext(self.__iterno)

        def split(self):
                """Split this iterator into two pieces. The original iterator
                is still callable, as is the sub-iterator."""

                return _IDupped(self.__idup,self.__iterno)

        def __iter__(self):
                return self

def isplit(iterin,splitno=2):
        idup = _IDup(iterin)
        iduppeds = []
        for i in range(splitno):
                iduppeds.append(_IDupped(idup))
        return tuple(iduppeds)

# Create first few iterators.
test = ["hello","how","are","you?"]
x, y = isplit(iter(test))

# Test print of iterator y.
print "First item of y."
print y.next()

# Create new iterator z after first element of y.
z = y.split()

# Print rest of the elements.
print "Rest in x."
for i in x:
        print i
print "Rest in y."
for i in y:
        print i
print "Rest in z."
for i in z:
        print i

Discussion

The problem of having to split iterators occured to me when I had one iterator, which kept returning lines from a source-file, which I wanted two different functions to process simultaneously. These functions ran in the same context, but I thought it was cleaner passing each function its own iterator versus calling both functions from a loop which read the lines one by one from the file.

Comments

  1. 1. At 8:32 p.m. on 30 aug 2003, Raymond Hettinger said:

    Here's a simpler and faster implementation.

    import itertools
    
    def isplit(iterable):
        isplit.cnt = 0
        data = {}
        it = iter(iterable)
        def f(it):
            next = it.next
            for i in itertools.count():
                if i == isplit.cnt:
                    item = data[i] = next()
                    isplit.cnt += 1
                else:
                    item = data.pop(i)
                yield item
        return f(it), f(it)
    
    
    test = ["hello","how","are","you?"]
    x, y = isplit(test)
    print list(x), list(y)
    
  2. 2. At 4:10 p.m. on 8 sep 2003, Raymond Hettinger said:

    Version that can return more than two iterators.

    def multi_iter(iterable, n=2):
        "Return multiple iterators (default is 2) from a single iterable"
    
        def f(next, data, n, cnt):
            i = 0
            while 1:
                if i == cnt[0]:
                    item = next()
                    cnt[0] += 1
                    data[i] = [item, n-1]
                else:
                    item, refcnt = entry = data[i]
                    if refcnt == 1:
                        del data[i]
                    else:
                        entry[1] = refcnt - 1
                yield item
                i += 1
        data, cnt, next = {}, [0], iter(iterable).next
        return [f(next, data, n, cnt) for j in range(n)]
    
    # Example
    w, x, y, z = multi_iter("shrubbery", 4)
    print zip(w,x), list(y), tuple(z)
    

Sign in to comment