Welcome, guest | Sign In | My Account | Store | Cart
# slicing is great, of course, but it only does one field at a time:
afield = theline[3:8]

# if you want to think in terms of field-length, struct.unpack may 
# sometimes be handier:
import struct

# get a 5-byte string, skip 3, then get 2 8-byte strings, then all the rest:
baseformat = "5s 3x 8s 8s"
numremain = len(theline)-struct.calcsize(baseformat)
format = "%s %ds" % (baseformat, numremain)
leading, s1, s2, trailing = struct.unpack(format, theline)

# of course, the computation of the last field's length is best
# encapsulated in a function:
def fields(baseformat, theline, lastfield=None):
    numremain = len(theline)-struct.calcsize(baseformat)
    format = "%s %d%s" % (baseformat, numremain, lastfield and "s" or "x")
    return struct.unpack(format, theline)
# note that caching/memoizing on (baseformat, len(theline), lastfield) may
# well be useful here if this is called in a loop -- an easy speedup

# split at five byte boundaries:
numfives, therest = divmod(len(theline), 5)
form5 = "%s %dx" % ("5s "*numfives, therest)
fivers = struct.unpack(form5, theline)

# again, this is no doubt best encapsulated:
def split_by(theline, n, lastfield=None):
    numblocks, therest = divmod(len(theline), n)
    baseblock = "%d%s"%(n,lastfield and "s" or "x")
    format = "%s %dx"%(baseblock*numblocks, therest)

# chopping a string into individual characters is of course easier:
chars = list(theline)

# if you prefer to think of your data as being cut up at specific columns,
# then slicing and list comprehensions may be handier:
cuts = [8,14,20,26,30]
pieces = [ theline[i:j] for i, j in zip([0]+cuts, cuts+[sys.maxint]) ]

# once more, encapsulation is advisable:
def split_at(theline, cuts, lastfield=None):
    pieces = [ theline[i:j] for i, j in zip([0]+cuts, cuts) ]
    if lastfield:
        pieces.append(theline(cuts[-1]:))
    return pieces

History