Welcome, guest | Sign In | My Account | Store | Cart
from operator import itemgetter
from itertools import groupby,imap

_undefined = object()

def iterblocks(iterable, start, end=_undefined, skip_delim=True):
    '''Create an iterator over consecutive items (I{blocks}) of the given iterable.

    @param start: The delimiter denoting the start of a block. It can be:
        1. a predicate C{p(item)} that returns true if the item is a
        delimiter and false otherwise, or
        2. a non-callable object C{obj}: equivalent to C{lambda item: item==start}
    @param end: If not None, the delimiter denoting the end of a block. Items
        after an end delimiter but before the next start delimiter are skipped.
        Takes the same values as C{start}.

    @param skip_delim: True if the delimiter(s) are to be skipped, false otherwise.
    @type skip_delim: C{bool}
    '''
    def get_predicate(arg):
        return arg if callable(arg) else (
               arg.__eq__ if hasattr(arg,'__eq__') else
               lambda item: item == arg)
    def stamped_items(items):
        count = 0
        startsblock = get_predicate(start)
        if end is _undefined:
            for item in items:
                if startsblock(item):
                    count += 1
                    if skip_delim: continue
                yield count,item
        else:
            endsblock = get_predicate(end)
            inblock = False
            for item in items:
                if inblock:
                    if endsblock(item):
                        inblock = False
                        if skip_delim: continue
                elif startsblock(item):
                    count += 1
                    inblock = True
                    if skip_delim: continue
                else: continue
                yield count,item
    get2nd = itemgetter(1)
    for count, block in groupby(stamped_items(iterable), itemgetter(0)):
        yield imap(get2nd, block)

if __name__ == '__main__':
    import re
    # a slow version of str.split
    for chars in iterblocks('Hello World', re.compile(r'\s').match):
        print ''.join(chars)
    source = """\
> name1....

line_11
line_12
line_13
...
> name2 ...

line_21
line_22
...""".splitlines()
    for lines in iterblocks(source, start=re.compile('>').match, end='...'):
        print list(lines)

History

  • revision 4 (14 years ago)
  • previous revisions are not available