import pickle, json, csv, os, shutil
class DictDB(dict):
'''Alternate DB based on a dict subclass
Runs like gdbm's fast mode (all writes all delayed until close).
While open, the whole dict is kept in memory. Start-up and close
time's are potentially long because the whole dict must be read
from or written to disk.
Input file format is automatically discovered.
Output file format is selectable between pickle, json, and csv.
All three are backed by fast C implementations.
'''
def __init__(self, filename, flag='c', mode=None, format='pickle', *args, **kwds):
self.flag = flag # r=readonly, c=create, or n=new
self.mode = mode # None or an octal triple like 0666
self.format = format # 'csv', 'json', or 'pickle'
self.filename = filename
if flag != 'n' and os.access(filename, os.R_OK):
with open(filename, 'rb') as fileobj:
self.load(fileobj)
dict.__init__(self, *args, **kwds)
def sync(self):
'Write dict to disk'
if self.flag == 'r':
return
filename = self.filename
tempname = filename + '.tmp'
fileobj = open(tempname, 'wb')
try:
self.dump(fileobj)
except Exception:
os.remove(tempname)
raise
finally:
fileobj.close()
shutil.move(tempname, self.filename) # atomic commit
if self.mode is not None:
os.chmod(self.filename, self.mode)
def close(self):
self.sync()
def __enter__(self):
return self
def __exit__(self, *exc_info):
self.close()
def dump(self, fileobj):
if self.format == 'csv':
csv.writer(fileobj).writerows(self.iteritems())
elif self.format == 'json':
json.dump(self, fileobj, separators=(',', ':'))
elif self.format == 'pickle':
pickle.dump(self.items(), fileobj, -1)
else:
raise NotImplementedError('Unknown format: %r' % self.format)
def load(self, fileobj):
# try formats from most restrictive to least restrictive
for loader in (pickle.load, json.load, csv.reader):
fileobj.seek(0)
try:
return self.update(loader(fileobj))
except Exception:
pass
raise ValueError('File not in recognized format')
def dbopen(filename, flag='c', mode=None, format='pickle'):
# Function to emulate API of shelve.open() or anydbm.open()
return DictDB(filename, flag, mode, format)
if __name__ == '__main__':
import random
# Make and use a persistent dictionary
with dbopen('/tmp/tmp.shl', 'c', format='json') as d:
print(d, 'start')
d['abc'] = '123'
d['rand'] = random.randrange(10000)
print(d, 'updated')
# Show what the file looks like on disk
with open('/tmp/tmp.shl', 'rb') as f:
print(f.read())
Diff to Previous Revision
--- revision 6 2011-09-06 09:49:45
+++ revision 7 2011-09-06 10:27:14
@@ -1,5 +1,4 @@
-import pickle, json, csv
-import os, shutil
+import pickle, json, csv, os, shutil
class DictDB(dict):
'''Alternate DB based on a dict subclass
@@ -82,11 +81,13 @@
if __name__ == '__main__':
import random
- with dbopen('/tmp/tmp.shl', 'c', format='json') as s:
- print(s, 'start')
- s['abc'] = '123'
- s['rand'] = random.randrange(10000)
- print(s, 'updated')
+ # Make and use a persistent dictionary
+ with dbopen('/tmp/tmp.shl', 'c', format='json') as d:
+ print(d, 'start')
+ d['abc'] = '123'
+ d['rand'] = random.randrange(10000)
+ print(d, 'updated')
+ # Show what the file looks like on disk
with open('/tmp/tmp.shl', 'rb') as f:
- print (f.read())
+ print(f.read())