import pickle, json, csv, os, shutil
class PersistentDict(dict):
''' Persistent dictionary with an API compatible with shelve and anydbm.
The dict is kept in memory, so the dictionary operations run as fast as
a regular dictionary.
Write to disk is delayed until close or sync (similar to gdbm's fast mode).
Input file format is automatically discovered.
Output file format is selectable between pickle, json, and csv.
All three serialization formats are backed by fast C implementations.
'''
def __init__(self, filename, flag='c', mode=None, format='pickle', *args, **kwds):
self.flag = flag # r=readonly, c=create, or n=new
self.mode = mode # None or an octal triple like 0644
self.format = format # 'csv', 'json', or 'pickle'
self.filename = filename
if flag != 'n' and os.access(filename, os.R_OK):
fileobj = open(filename, 'rb' if format=='pickle' else 'r')
with fileobj:
self.load(fileobj)
dict.__init__(self, *args, **kwds)
def sync(self):
'Write dict to disk'
if self.flag == 'r':
return
filename = self.filename
tempname = filename + '.tmp'
fileobj = open(tempname, 'wb' if self.format=='pickle' else 'w')
try:
self.dump(fileobj)
except Exception:
os.remove(tempname)
raise
finally:
fileobj.close()
shutil.move(tempname, self.filename) # atomic commit
if self.mode is not None:
os.chmod(self.filename, self.mode)
def close(self):
self.sync()
def __enter__(self):
return self
def __exit__(self, *exc_info):
self.close()
def dump(self, fileobj):
if self.format == 'csv':
csv.writer(fileobj).writerows(self.items())
elif self.format == 'json':
json.dump(self, fileobj, separators=(',', ':'))
elif self.format == 'pickle':
pickle.dump(dict(self), fileobj, 2)
else:
raise NotImplementedError('Unknown format: ' + repr(self.format))
def load(self, fileobj):
# try formats from most restrictive to least restrictive
for loader in (pickle.load, json.load, csv.reader):
fileobj.seek(0)
try:
return self.update(loader(fileobj))
except Exception:
pass
raise ValueError('File not in a supported format')
if __name__ == '__main__':
import random
# Make and use a persistent dictionary
with PersistentDict('/tmp/demo.json', 'c', format='json') as d:
print(d, 'start')
d['abc'] = '123'
d['rand'] = random.randrange(10000)
print(d, 'updated')
# Show what the file looks like on disk
with open('/tmp/demo.json', 'rb') as f:
print(f.read())
Diff to Previous Revision
--- revision 9 2011-09-06 12:23:23
+++ revision 10 2011-09-06 20:01:46
@@ -1,16 +1,16 @@
import pickle, json, csv, os, shutil
-class DictDB(dict):
- '''Alternate DB based on a dict subclass
+class PersistentDict(dict):
+ ''' Persistent dictionary with an API compatible with shelve and anydbm.
- Runs like gdbm's fast mode (all writes all delayed until close).
- While open, the whole dict is kept in memory. Start-up and close
- time's are potentially long because the whole dict must be read
- from or written to disk.
+ The dict is kept in memory, so the dictionary operations run as fast as
+ a regular dictionary.
+
+ Write to disk is delayed until close or sync (similar to gdbm's fast mode).
Input file format is automatically discovered.
Output file format is selectable between pickle, json, and csv.
- All three are backed by fast C implementations.
+ All three serialization formats are backed by fast C implementations.
'''
@@ -60,7 +60,7 @@
elif self.format == 'pickle':
pickle.dump(dict(self), fileobj, 2)
else:
- raise NotImplementedError('Unknown format: %r' % self.format)
+ raise NotImplementedError('Unknown format: ' + repr(self.format))
def load(self, fileobj):
# try formats from most restrictive to least restrictive
@@ -70,12 +70,7 @@
return self.update(loader(fileobj))
except Exception:
pass
- raise ValueError('File not in recognized format')
-
-
-def dbopen(filename, flag='c', mode=None, format='pickle'):
- # Function to emulate API of shelve.open() or anydbm.open()
- return DictDB(filename, flag, mode, format)
+ raise ValueError('File not in a supported format')
@@ -83,7 +78,7 @@
import random
# Make and use a persistent dictionary
- with dbopen('/tmp/demo.json', 'c', format='json') as d:
+ with PersistentDict('/tmp/demo.json', 'c', format='json') as d:
print(d, 'start')
d['abc'] = '123'
d['rand'] = random.randrange(10000)