class Paragraphs:
def __init__(self, fileobj, separator=None):
# self.seq: the underlying line-sequence
# self.line_num: current index into self.seq (line number)
# self.para_num: current index into self (paragraph number)
import xreadlines
try: self.seq = fileobj.xreadlines()
except AttributeError: self.seq = xreadlines.xreadlines(fileobj)
self.line_num = 0
self.para_num = 0
# allow for optional passing of separator-function
if separator is None:
def separator(line): return line == '\n'
elif not callable(separator):
raise TypeError, "separator argument must be callable"
self.separator = separator
def __getitem__(self, index):
if index != self.para_num:
raise TypeError, "Only sequential access supported"
self.para_num += 1
# start where we left off, and skip 0+ separator lines
i = self.line_num
while 1:
# note: if this raises IndexError, it's OK to propagate
# it, since we're also a finished-sequence in this case
line = self.seq[i]
i += 1
if not self.separator(line): break
# accumulate 1+ non-blank lines into list result
result = [line]
while 1:
# here we must intercept IndexError, since we're not
# finished, even when the underlying sequence is --
# we have one or more lines in result to be returned
try: line = self.seq[i]
except IndexError: break
i += 1
if self.separator(line): break
result.append(line)
# update self state, return string result
self.line_num = i
return ''.join(result)
# here's an example function, showing off usage:
def show_paragraphs(filename,numpars=5):
pp = Paragraphs(open(filename))
for p in pp:
print "Par#%d, line# %d: %s" % (
pp.para_num, pp.line_num, repr(p))
if pp.para_num>numpars: break