1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219 | import struct, datetime, decimal, itertools
def dbfreader(f):
"""Returns an iterator over records in a Xbase DBF file.
The first row returned contains the field names.
The second row contains field specs: (type, size, decimal places).
Subsequent rows contain the data records.
If a record is marked as deleted, it is skipped.
File should be opened for binary reads.
"""
# See DBF format spec at:
# http://www.pgts.com.au/download/public/xbase.htm#DBF_STRUCT
numrec, lenheader = struct.unpack('<xxxxLH22x', f.read(32))
numfields = (lenheader - 33) // 32
fields = []
for fieldno in xrange(numfields):
name, typ, size, deci = struct.unpack('<11sc4xBB14x', f.read(32))
name = name.replace('\0', '') # eliminate NULs from string
fields.append((name, typ, size, deci))
yield [field[0] for field in fields]
yield [tuple(field[1:]) for field in fields]
terminator = f.read(1)
assert terminator == '\r'
fields.insert(0, ('DeletionFlag', 'C', 1, 0))
fmt = ''.join(['%ds' % fieldinfo[2] for fieldinfo in fields])
fmtsiz = struct.calcsize(fmt)
for i in xrange(numrec):
record = struct.unpack(fmt, f.read(fmtsiz))
if record[0] != ' ':
continue # deleted record
result = []
for (name, typ, size, deci), value in itertools.izip(fields, record):
if name == 'DeletionFlag':
continue
if typ == "N":
value = value.replace('\0', '').lstrip()
if value == '':
value = 0
elif deci:
value = decimal.Decimal(value)
else:
value = int(value)
elif typ == 'D':
y, m, d = int(value[:4]), int(value[4:6]), int(value[6:8])
value = datetime.date(y, m, d)
elif typ == 'L':
value = (value in 'YyTt' and 'T') or (value in 'NnFf' and 'F') or '?'
elif typ == 'F':
value = float(value)
result.append(value)
yield result
def dbfwriter(f, fieldnames, fieldspecs, records):
""" Return a string suitable for writing directly to a binary dbf file.
File f should be open for writing in a binary mode.
Fieldnames should be no longer than ten characters and not include \x00.
Fieldspecs are in the form (type, size, deci) where
type is one of:
C for ascii character data
M for ascii character memo data (real memo fields not supported)
D for datetime objects
N for ints or decimal objects
L for logical values 'T', 'F', or '?'
size is the field width
deci is the number of decimal places in the provided decimal object
Records can be an iterable over the records (sequences of field values).
"""
# header info
ver = 3
now = datetime.datetime.now()
yr, mon, day = now.year-1900, now.month, now.day
numrec = len(records)
numfields = len(fieldspecs)
lenheader = numfields * 32 + 33
lenrecord = sum(field[1] for field in fieldspecs) + 1
hdr = struct.pack('<BBBBLHH20x', ver, yr, mon, day, numrec, lenheader, lenrecord)
f.write(hdr)
# field specs
for name, (typ, size, deci) in itertools.izip(fieldnames, fieldspecs):
name = name.ljust(11, '\x00')
fld = struct.pack('<11sc4xBB14x', name, typ, size, deci)
f.write(fld)
# terminator
f.write('\r')
# records
for record in records:
f.write(' ') # deletion flag
for (typ, size, deci), value in itertools.izip(fieldspecs, record):
if typ == "N":
value = str(value).rjust(size, ' ')
elif typ == 'D':
value = value.strftime('%Y%m%d')
elif typ == 'L':
value = str(value)[0].upper()
else:
value = str(value)[:size].ljust(size, ' ')
assert len(value) == size
f.write(value)
# End of file
f.write('\x1A')
# -------------------------------------------------------
# Example calls
if __name__ == '__main__':
import sys, csv
from cStringIO import StringIO
from operator import itemgetter
# Read a database
filename = '/pydev/databases/orders.dbf'
if len(sys.argv) == 2:
filename = sys.argv[1]
f = open(filename, 'rb')
db = list(dbfreader(f))
f.close()
for record in db:
print record
fieldnames, fieldspecs, records = db[0], db[1], db[2:]
# Alter the database
del records[4]
records.sort(key=itemgetter(4))
# Remove a field
del fieldnames[0]
del fieldspecs[0]
records = [rec[1:] for rec in records]
# Create a new DBF
f = StringIO()
dbfwriter(f, fieldnames, fieldspecs, records)
# Read the data back from the new DBF
print '-' * 20
f.seek(0)
for line in dbfreader(f):
print line
f.close()
# Convert to CSV
print '.' * 20
f = StringIO()
csv.writer(f).writerow(fieldnames)
csv.writer(f).writerows(records)
print f.getvalue()
f.close()
# Example Output
"""
['ORDER_ID', 'CUSTMR_ID', 'EMPLOY_ID', 'ORDER_DATE', 'ORDER_AMT']
[('C', 10, 0), ('C', 11, 0), ('C', 11, 0), ('D', 8, 0), ('N', 12, 2)]
['10005 ', 'WALNG ', '555 ', datetime.date(1995, 5, 22), Decimal("173.40")]
['10004 ', 'BMARK ', '777 ', datetime.date(1995, 5, 18), Decimal("3194.20")]
['10029 ', 'SAWYH ', '777 ', datetime.date(1995, 6, 29), Decimal("97.30")]
['10013 ', 'RITEB ', '777 ', datetime.date(1995, 6, 2), Decimal("560.40")]
['10024 ', 'RATTC ', '444 ', datetime.date(1995, 6, 21), Decimal("2223.50")]
['10018 ', 'RATTC ', '444 ', datetime.date(1995, 6, 12), Decimal("1076.05")]
['10025 ', 'RATTC ', '444 ', datetime.date(1995, 6, 23), Decimal("185.80")]
['10038 ', 'OLDWO ', '111 ', datetime.date(1995, 7, 14), Decimal("863.96")]
['10002 ', 'MTIME ', '333 ', datetime.date(1995, 5, 16), Decimal("731.80")]
['10007 ', 'MORNS ', '444 ', datetime.date(1995, 5, 24), Decimal("1405.00")]
['10026 ', 'MORNS ', '555 ', datetime.date(1995, 6, 26), Decimal("17.40")]
['10030 ', 'LILLO ', '111 ', datetime.date(1995, 7, 3), Decimal("909.91")]
['10022 ', 'LAPLA ', '111 ', datetime.date(1995, 6, 19), Decimal("671.50")]
['10035 ', 'HIGHG ', '111 ', datetime.date(1995, 7, 11), Decimal("1984.83")]
['10033 ', 'FOODG ', '333 ', datetime.date(1995, 7, 6), Decimal("3401.32")]
--------------------
['CUSTMR_ID', 'EMPLOY_ID', 'ORDER_DATE', 'ORDER_AMT']
[('C', 11, 0), ('C', 11, 0), ('D', 8, 0), ('N', 12, 2)]
['MORNS ', '555 ', datetime.date(1995, 6, 26), Decimal("17.40")]
['SAWYH ', '777 ', datetime.date(1995, 6, 29), Decimal("97.30")]
['WALNG ', '555 ', datetime.date(1995, 5, 22), Decimal("173.40")]
['RATTC ', '444 ', datetime.date(1995, 6, 23), Decimal("185.80")]
['RITEB ', '777 ', datetime.date(1995, 6, 2), Decimal("560.40")]
['LAPLA ', '111 ', datetime.date(1995, 6, 19), Decimal("671.50")]
['MTIME ', '333 ', datetime.date(1995, 5, 16), Decimal("731.80")]
['OLDWO ', '111 ', datetime.date(1995, 7, 14), Decimal("863.96")]
['LILLO ', '111 ', datetime.date(1995, 7, 3), Decimal("909.91")]
['RATTC ', '444 ', datetime.date(1995, 6, 12), Decimal("1076.05")]
['MORNS ', '444 ', datetime.date(1995, 5, 24), Decimal("1405.00")]
['HIGHG ', '111 ', datetime.date(1995, 7, 11), Decimal("1984.83")]
['BMARK ', '777 ', datetime.date(1995, 5, 18), Decimal("3194.20")]
['FOODG ', '333 ', datetime.date(1995, 7, 6), Decimal("3401.32")]
....................
CUSTMR_ID,EMPLOY_ID,ORDER_DATE,ORDER_AMT
MORNS ,555 ,1995-06-26,17.40
SAWYH ,777 ,1995-06-29,97.30
WALNG ,555 ,1995-05-22,173.40
RATTC ,444 ,1995-06-23,185.80
RITEB ,777 ,1995-06-02,560.40
LAPLA ,111 ,1995-06-19,671.50
MTIME ,333 ,1995-05-16,731.80
OLDWO ,111 ,1995-07-14,863.96
LILLO ,111 ,1995-07-03,909.91
RATTC ,444 ,1995-06-12,1076.05
MORNS ,444 ,1995-05-24,1405.00
HIGHG ,111 ,1995-07-11,1984.83
BMARK ,777 ,1995-05-18,3194.20
FOODG ,333 ,1995-07-06,3401.32
"""
|
Visual FoxPro. With a visual foxpro dbf file I get an AssertionError regarding the terminator.
Can your recipe be modified easily to work with Visual FoxPro?
small typo. there is a small typo in the example code: main function is written twice and the second "version" has a bug in it (a missing argument in dbfwriter()); the first version works though
yield statement syntax error using ActiveState Python V2.2. Attempting to run the DBF reader portion of the code results in the following invalid syntax error ...
yield [field[0] for field in fields]
an accompanying warning message is issued indicates that 'yield' will become a reserved word.
Generators in Python 2.2. Generators were still an optional feature in Python 2.2.
Just add "from __future__ import generators" to the top of the module.
1 byte out.... Great code, as a newbie I would love to write code this tight. When I ran this on a DBF file it was one byte out , adding a new line to grab an extra byte sorted this out.
Then I had no problems. I dont know if this is due to an extra byte after the terminator '\r' ???. Anyway, that helped.
The other problem I had was empy dates. If the value was ' ' then I would get errors ie a=int(' ') gives an error
Changeing the code for the Date record to this
worked and gave me the date in a dmy format. If the string was ' ' then I get ' / / ' which is ok.
Hamish
Note that. File handlers for dbfwriter must be open in 'wb' mode, especially on Windows platform, because write method will convert 0xA to 0x0A 0x0D (CR to CRLF) and you'll get corrupted DBF.
problem writing dbf file to disk. I am butting my head against a wall trying to do something very simple with this: write a dbf file to disk (on XP). E.g modifying the sample script:
Access doesnt like the resultant file. And DBFReader function also complains about it.
Any insight would be appreciated.
Dennis Christopher
Python code for reading Visual FoxPro DBF. This is a Python module that read Visual FoxPro DBF file with memo:
http://www.physics.ox.ac.uk/users/santoso/Software.Repository.html
Hope this can be useful.
I have made a simple lib to read DBF files, that looks like the django db/model api. It supports searching into the databases, creating new databases from a model definition and creating a model from an existing database. There is also a lower-level interface, if you don't like the "django like" syntax. It's still in the early stages of development, I hope to have the time to finish it.
How come I can not update or delete a record?
Get you make it generic?
I recently found this code, and i am using it to read arcmap shapefile dbf files, when i read in the data using the the dbfreader function some of the fields are being read in as various length string of '*'. I dunno why the code is doing this, i think it maybe the data, but excel, and arcmap read the data perfectly fine, some of the data columns are over the character length, which may be the problem. Any help would be greatly appreciated also i can provide the data that is giving issues too.
Thanks, Andrew
To Andrew Leonard: I have a non-published DBF reader module which may be able to help you. You can find my e-mail address by google_search("John Machin xlrd").
New link to the spec: http://www.clicketyclick.dk/databases/xbase/format/dbf.html#DBF_STRUCT
We are using the code above with a few mods for dumping CodeBase files, which has been a HUGE help for our development. However, CodeBase can create character fields with lengths greater than 255, and the above code chokes to death on that. This may be the solution to the problem Andrew Leonard mentions above, since he mentions over-length data columns.
As it turns out, the following line:
should look like:
as fieldinfo[3] is the high order byte of the field length according to the spec at clicketyclick.dk, see comment above for URL. Once the above change is in place, files with long character fields read/dump properly.
CAVEAT: the spec mentions that this is the format of the high order byte for FoxPro and Clipper files for NON-NUMERIC data; the original posted code (comment, line 7) indicates that for some numeric types that fieldinfo[3] may be the number of decimal places. You may have to write some additional code if you have BOTH numeric and long character fields in your table. I didn't have any tables with decimal data to verify whether or not this is the case.
Good luck!
Ken