Welcome, guest | Sign In | My Account | Store | Cart
class Paragraphs:
    def __init__(self, fileobj, separator=None):
        # self.seq: the underlying line-sequence
        # self.line_num: current index into self.seq (line number)
        # self.para_num: current index into self (paragraph number)
        import xreadlines
        try: self.seq = fileobj.xreadlines()
        except AttributeError: self.seq = xreadlines.xreadlines(fileobj)
        self.line_num = 0
        self.para_num = 0
        # allow for optional passing of separator-function
        if separator is None:
            def separator(line): return line == '\n'
        elif not callable(separator):
            raise TypeError, "separator argument must be callable"
        self.separator = separator
    def __getitem__(self, index):
        if index != self.para_num:
            raise TypeError, "Only sequential access supported"
        self.para_num += 1
        # start where we left off, and skip 0+ separator lines
        i = self.line_num
        while 1:
            # note: if this raises IndexError, it's OK to propagate
            # it, since we're also a finished-sequence in this case
            line = self.seq[i]
            i += 1
            if not self.separator(line): break
        # accumulate 1+ non-blank lines into list result
        result = [line]
        while 1:
            # here we must intercept IndexError, since we're not
            # finished, even when the underlying sequence is --
            # we have one or more lines in result to be returned
            try: line = self.seq[i]
            except IndexError: break
            i += 1
            if self.separator(line): break
        # update self state, return string result
        self.line_num = i
        return ''.join(result)

# here's an example function, showing off usage:
def show_paragraphs(filename,numpars=5):
    pp = Paragraphs(open(filename))
    for p in pp:
        print "Par#%d, line# %d: %s" % (
            pp.para_num, pp.line_num, repr(p))
        if pp.para_num>numpars: break


  • revision 2 (22 years ago)
  • previous revisions are not available