import struct, datetime, decimal, itertools from collections import namedtuple FI = namedtuple('FieldInfo', ('name', 'typ', 'size', 'deci', 'fmt', 'fmtsiz', 'keep', 'seekme')) def dbfreader(f, names, nullreplace=None): """Returns an iterator over records in a Xbase DBF file. The first row returned contains the field names. The second row contains field specs: (type, size, decimal places). Subsequent rows contain the data records. If a record is marked as deleted, it is skipped. names is the field names to extract. The value of nullreplace is used with data of type 'N' as a replacement for '\0'. File should be opened for binary reads. """ # See DBF format spec at: # http://www.pgts.com.au/download/public/xbase.htm#DBF_STRUCT numrec, lenheader = struct.unpack('<xxxxLH22x', f.read(32)) numfields = (lenheader - 33) // 32 fields = [FI('DeletionFlag', 'C', 1, 0, '1s', struct.calcsize('1s'), True, 0)] # discarded in main loop for fieldno in xrange(numfields): name, typ, size, deci = struct.unpack('<11sc4xBB14x', f.read(32)) name = name.replace('\0', '') # eliminate NULs from string fmt = str(size) + 's' prev = fields[fieldno] fi = FI(name, typ, size, deci, fmt, struct.calcsize(fmt), name in names, prev.seekme + prev.size) fields.append(fi) selfields = [field for field in fields if field.keep] yield [field.name for field in selfields[1:]] yield [tuple(field[1:4]) for field in selfields[1:]] terminator = f.read(1) assert terminator == '\r' for i in xrange(numrec): refaddr = f.tell() record = [] for field in selfields: f.seek(refaddr + field.seekme) record.append(struct.unpack(field.fmt, f.read(field.fmtsiz))[0]) if record[0] != ' ': continue # deleted record result = [] for sf, value in itertools.izip(selfields, record): if sf.name == 'DeletionFlag': continue if sf.typ == "N": value = value.replace('\0', '').lstrip() if value == '': value = nullreplace elif sf.deci: value = decimal.Decimal(value) else: value = int(value) elif sf.typ == 'D': y, m, d = int(value[:4]), int(value[4:6]), int(value[6:8]) value = datetime.date(y, m, d) elif sf.typ == 'L': value = (value in 'YyTt' and 'T') or (value in 'NnFf' and 'F') or '?' elif sf.typ == 'F': value = float(value) result.append(value) f.seek(refaddr + fields[-1].seekme + fields[-1].fmtsiz) yield result def dbfwriter(f, fieldnames, fieldspecs, records, nullreplace=None): """Return a string suitable for writing directly to a binary dbf file. File f should be open for writing in a binary mode. Fieldnames should be no longer than ten characters and not include \x00. Fieldspecs are in the form (type, size, deci) where type is one of: C for ascii character data M for ascii character memo data (real memo fields not supported) D for datetime objects N for ints or decimal objects L for logical values 'T', 'F', or '?' size is the field width deci is the number of decimal places in the provided decimal object Records can be an iterable over the records (sequences of field values). The value of nullreplace is compared with values of type N and, if equal, replaced with '\0' in the output. """ # header info ver = 3 now = datetime.datetime.now() yr, mon, day = now.year-1900, now.month, now.day numrec = len(records) numfields = len(fieldspecs) lenheader = numfields * 32 + 33 lenrecord = sum(field[1] for field in fieldspecs) + 1 hdr = struct.pack('<BBBBLHH20x', ver, yr, mon, day, numrec, lenheader, lenrecord) f.write(hdr) # field specs for name, (typ, size, deci) in itertools.izip(fieldnames, fieldspecs): name = name.ljust(11, '\x00') fld = struct.pack('<11sc4xBB14x', name, typ, size, deci) f.write(fld) # terminator f.write('\r') # records for record in records: f.write(' ') # deletion flag for (typ, size, deci), value in itertools.izip(fieldspecs, record): if typ == "N": if value != nullreplace: value = str(value).rjust(size, ' ') else: value = '\0'.rjust(size, ' ') elif typ == 'D': value = value.strftime('%Y%m%d') elif typ == 'L': value = str(value)[0].upper() else: value = str(value)[:size].ljust(size, ' ') assert len(value) == size f.write(value) # End of file f.write('\x1A')