import pickle, json, csv
import os, shutil
class DictDB(dict):
'''Alternate DB based on a dict subclass
Runs like gdbm's fast mode (all writes all delayed until close).
While open, the whole dict is kept in memory. Start-up and close
time's are potentially long because the whole dict must be read
from or written to disk.
Input file format is automatically discovered.
Output file format is selectable between pickle, json, and csv.
All three are backed by fast C implementations.
'''
def __init__(self, filename, flag='c', mode=None, format='pickle', *args, **kwds):
self.flag = flag # r=readonly, c=create, or n=new
self.mode = mode # None or an octal triple like 0666
self.format = format # 'csv', 'json', or 'pickle'
self.filename = filename
if flag != 'n' and os.access(filename, os.R_OK):
with open(filename, 'rb') as fileobj:
self.load(fileobj)
dict.__init__(self, *args, **kwds)
def sync(self):
'Write dict to disk'
if self.flag == 'r':
return
filename = self.filename
tempname = filename + '.tmp'
fileobj = open(tempname, 'wb')
try:
self.dump(fileobj)
except Exception:
os.remove(tempname)
raise
finally:
fileobj.close()
shutil.move(tempname, self.filename) # atomic commit
if self.mode is not None:
os.chmod(self.filename, self.mode)
def close(self):
self.sync()
def __enter__(self):
return self
def __exit__(self, *exc_info):
self.close()
def dump(self, fileobj):
if self.format == 'csv':
csv.writer(fileobj).writerows(self.iteritems())
elif self.format == 'json':
json.dump(self, fileobj, separators=(',', ':'))
elif self.format == 'pickle':
pickle.dump(self.items(), fileobj, -1)
else:
raise NotImplementedError('Unknown format: %r' % self.format)
def load(self, fileobj):
# try formats from most restrictive to least restrictive
for loader in (pickle.load, json.load, csv.reader):
fileobj.seek(0)
try:
return self.update(loader(fileobj))
except Exception:
pass
raise ValueError('File not in recognized format')
def dbopen(filename, flag='c', mode=None, format='pickle'):
# Function to emulate API of shelve.open() or anydbm.open()
return DictDB(filename, flag, mode, format)
if __name__ == '__main__':
import random
with dbopen('/tmp/tmp.shl', 'c', format='json') as s:
print(s, 'start')
s['abc'] = '123'
s['rand'] = random.randrange(10000)
print(s, 'updated')
with open('/tmp/tmp.shl', 'rb') as f:
print (f.read())
Diff to Previous Revision
--- revision 5 2009-02-05 17:12:01
+++ revision 6 2011-09-06 09:49:45
@@ -1,47 +1,44 @@
-'''Alternate DB based on a dict subclass
-
-Runs like gdbm's fast mode (all writes all delayed until close).
-While open, the whole dict is kept in memory. Start-up and
-close time's are potentially long because the whole dict must be
-read or written to disk.
-
-Input file format is automatically discovered.
-Output file format is selectable between pickle, json, and csv.
-All three are backed by fast C implementations.
-
-'''
-
import pickle, json, csv
import os, shutil
class DictDB(dict):
+ '''Alternate DB based on a dict subclass
- def __init__(self, filename, flag=None, mode=None, format=None, *args, **kwds):
- self.flag = flag or 'c' # r=readonly, c=create, or n=new
- self.mode = mode # None or octal triple like 0x666
- self.format = format or 'csv' # csv, json, or pickle
+ Runs like gdbm's fast mode (all writes all delayed until close).
+ While open, the whole dict is kept in memory. Start-up and close
+ time's are potentially long because the whole dict must be read
+ from or written to disk.
+
+ Input file format is automatically discovered.
+ Output file format is selectable between pickle, json, and csv.
+ All three are backed by fast C implementations.
+
+ '''
+
+ def __init__(self, filename, flag='c', mode=None, format='pickle', *args, **kwds):
+ self.flag = flag # r=readonly, c=create, or n=new
+ self.mode = mode # None or an octal triple like 0666
+ self.format = format # 'csv', 'json', or 'pickle'
self.filename = filename
if flag != 'n' and os.access(filename, os.R_OK):
- file = __builtins__.open(filename, 'rb')
- try:
- self.load(file)
- finally:
- file.close()
- self.update(*args, **kwds)
+ with open(filename, 'rb') as fileobj:
+ self.load(fileobj)
+ dict.__init__(self, *args, **kwds)
def sync(self):
+ 'Write dict to disk'
if self.flag == 'r':
return
filename = self.filename
tempname = filename + '.tmp'
- file = __builtins__.open(tempname, 'wb')
+ fileobj = open(tempname, 'wb')
try:
- self.dump(file)
+ self.dump(fileobj)
except Exception:
- file.close()
os.remove(tempname)
raise
- file.close()
+ finally:
+ fileobj.close()
shutil.move(tempname, self.filename) # atomic commit
if self.mode is not None:
os.chmod(self.filename, self.mode)
@@ -49,41 +46,47 @@
def close(self):
self.sync()
- def dump(self, file):
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *exc_info):
+ self.close()
+
+ def dump(self, fileobj):
if self.format == 'csv':
- csv.writer(file).writerows(self.iteritems())
+ csv.writer(fileobj).writerows(self.iteritems())
elif self.format == 'json':
- json.dump(self, file, separators=(',', ':'))
+ json.dump(self, fileobj, separators=(',', ':'))
elif self.format == 'pickle':
- pickle.dump(self.items(), file, -1)
+ pickle.dump(self.items(), fileobj, -1)
else:
raise NotImplementedError('Unknown format: %r' % self.format)
- def load(self, file):
+ def load(self, fileobj):
# try formats from most restrictive to least restrictive
for loader in (pickle.load, json.load, csv.reader):
- file.seek(0)
+ fileobj.seek(0)
try:
- return self.update(loader(file))
+ return self.update(loader(fileobj))
except Exception:
pass
raise ValueError('File not in recognized format')
-def dbopen(filename, flag=None, mode=None, format=None):
+def dbopen(filename, flag='c', mode=None, format='pickle'):
+ # Function to emulate API of shelve.open() or anydbm.open()
return DictDB(filename, flag, mode, format)
if __name__ == '__main__':
import random
- os.chdir('/dbm_sqlite/alt')
- print(os.getcwd())
- s = dbopen('tmp.shl', 'c', format='json')
- print(s, 'start')
- s['abc'] = '123'
- s['rand'] = random.randrange(10000)
- s.close()
- f = __builtins__.open('tmp.shl', 'rb')
- print (f.read())
- f.close()
+
+ with dbopen('/tmp/tmp.shl', 'c', format='json') as s:
+ print(s, 'start')
+ s['abc'] = '123'
+ s['rand'] = random.randrange(10000)
+ print(s, 'updated')
+
+ with open('/tmp/tmp.shl', 'rb') as f:
+ print (f.read())