Welcome, guest | Sign In | My Account | Store | Cart
import struct, datetime, decimal, itertools
from collections import namedtuple

FI = namedtuple('FieldInfo', ('name', 'typ', 'size', 'deci',
                              'fmt', 'fmtsiz', 'keep', 'seekme'))

def dbfreader(f, names, nullreplace=None):
    """Returns an iterator over records in a Xbase DBF file.

    The first row returned contains the field names. The second row
    contains field specs: (type, size, decimal places). Subsequent rows
    contain the data records. If a record is marked as deleted, it is
    skipped.

    names is the field names to extract. The value of nullreplace is
    used with data of type 'N' as a replacement for '\0'.

    File should be opened for binary reads.

    """
    # See DBF format spec at:
    # http://www.pgts.com.au/download/public/xbase.htm#DBF_STRUCT

    numrec, lenheader = struct.unpack('<xxxxLH22x', f.read(32))
    numfields = (lenheader - 33) // 32

    fields = [FI('DeletionFlag', 'C', 1, 0,
                 '1s', struct.calcsize('1s'), True, 0)] # discarded in main loop

    for fieldno in xrange(numfields):
        name, typ, size, deci = struct.unpack('<11sc4xBB14x', f.read(32))
        name = name.replace('\0', '')       # eliminate NULs from string
        fmt = str(size) + 's'
        prev = fields[fieldno]
        fi = FI(name, typ, size, deci, fmt, struct.calcsize(fmt), name in names,
                prev.seekme + prev.size)
        fields.append(fi)

    selfields = [field for field in fields if field.keep]
    yield [field.name for field in selfields[1:]]
    yield [tuple(field[1:4]) for field in selfields[1:]]

    terminator = f.read(1)
    assert terminator == '\r'

    for i in xrange(numrec):
        refaddr = f.tell()
        record = []
        for field in selfields:
            f.seek(refaddr + field.seekme)
            record.append(struct.unpack(field.fmt, f.read(field.fmtsiz))[0])

        if record[0] != ' ':
            continue                        # deleted record
        result = []
        for sf, value in itertools.izip(selfields, record):
            if sf.name == 'DeletionFlag':
                continue
            if sf.typ == "N":
                value = value.replace('\0', '').lstrip()
                if value == '':
                    value = nullreplace
                elif sf.deci:
                    value = decimal.Decimal(value)
                else:
                    value = int(value)
            elif sf.typ == 'D':
                y, m, d = int(value[:4]), int(value[4:6]), int(value[6:8])
                value = datetime.date(y, m, d)
            elif sf.typ == 'L':
                value = (value in 'YyTt' and 'T') or (value in 'NnFf' and 'F') or '?'
            elif sf.typ == 'F':
                value = float(value)
            result.append(value)
        f.seek(refaddr + fields[-1].seekme + fields[-1].fmtsiz)
        yield result

def dbfwriter(f, fieldnames, fieldspecs, records, nullreplace=None):
    """Return a string suitable for writing directly to a binary dbf file.

    File f should be open for writing in a binary mode.

    Fieldnames should be no longer than ten characters and not include \x00.
    Fieldspecs are in the form (type, size, deci) where
        type is one of:
            C for ascii character data
            M for ascii character memo data (real memo fields not supported)
            D for datetime objects
            N for ints or decimal objects
            L for logical values 'T', 'F', or '?'
        size is the field width
        deci is the number of decimal places in the provided decimal object
    Records can be an iterable over the records (sequences of field values).

    The value of nullreplace is compared with values of type N and, if
    equal, replaced with '\0' in the output.

    """
    # header info
    ver = 3
    now = datetime.datetime.now()
    yr, mon, day = now.year-1900, now.month, now.day
    numrec = len(records)
    numfields = len(fieldspecs)
    lenheader = numfields * 32 + 33
    lenrecord = sum(field[1] for field in fieldspecs) + 1
    hdr = struct.pack('<BBBBLHH20x', ver, yr, mon, day, numrec, lenheader, lenrecord)
    f.write(hdr)

    # field specs
    for name, (typ, size, deci) in itertools.izip(fieldnames, fieldspecs):
        name = name.ljust(11, '\x00')
        fld = struct.pack('<11sc4xBB14x', name, typ, size, deci)
        f.write(fld)

    # terminator
    f.write('\r')

    # records
    for record in records:
        f.write(' ')                        # deletion flag
        for (typ, size, deci), value in itertools.izip(fieldspecs, record):
            if typ == "N":
                if value != nullreplace:
                    value = str(value).rjust(size, ' ')
                else:
                    value = '\0'.rjust(size, ' ')
            elif typ == 'D':
                value = value.strftime('%Y%m%d')
            elif typ == 'L':
                value = str(value)[0].upper()
            else:
                value = str(value)[:size].ljust(size, ' ')
            assert len(value) == size
            f.write(value)

    # End of file
    f.write('\x1A')

History