Welcome, guest | Sign In | My Account | Store | Cart
import pickle, json, csv
import os, shutil

class DictDB(dict):
    '''Alternate DB based on a dict subclass

    Runs like gdbm's fast mode (all writes all delayed until close).
    While open, the whole dict is kept in memory.  Start-up and close
    time's are potentially long because the whole dict must be read
    from or written to disk.

    Input file format is automatically discovered.
    Output file format is selectable between pickle, json, and csv.
    All three are backed by fast C implementations.

    '''

    def __init__(self, filename, flag='c', mode=None, format='pickle', *args, **kwds):
        self.flag = flag                    # r=readonly, c=create, or n=new
        self.mode = mode                    # None or an octal triple like 0666
        self.format = format                # 'csv', 'json', or 'pickle'
        self.filename = filename
        if flag != 'n' and os.access(filename, os.R_OK):
            with open(filename, 'rb') as fileobj:
                self.load(fileobj)
        dict.__init__(self, *args, **kwds)

    def sync(self):
        'Write dict to disk'
        if self.flag == 'r':
            return
        filename = self.filename
        tempname = filename + '.tmp'
        fileobj = open(tempname, 'wb')
        try:
            self.dump(fileobj)
        except Exception:
            os.remove(tempname)
            raise
        finally:
            fileobj.close()
        shutil.move(tempname, self.filename)    # atomic commit
        if self.mode is not None:
            os.chmod(self.filename, self.mode)

    def close(self):
        self.sync()

    def __enter__(self):
        return self

    def __exit__(self, *exc_info):
        self.close()

    def dump(self, fileobj):
        if self.format == 'csv':
            csv.writer(fileobj).writerows(self.iteritems())
        elif self.format == 'json':
            json.dump(self, fileobj, separators=(',', ':'))
        elif self.format == 'pickle':
            pickle.dump(self.items(), fileobj, -1)
        else:
            raise NotImplementedError('Unknown format: %r' % self.format)

    def load(self, fileobj):
        # try formats from most restrictive to least restrictive
        for loader in (pickle.load, json.load, csv.reader):
            fileobj.seek(0)
            try:
                return self.update(loader(fileobj))
            except Exception:
                pass
        raise ValueError('File not in recognized format')


def dbopen(filename, flag='c', mode=None, format='pickle'):
    # Function to emulate API of shelve.open() or anydbm.open()
    return DictDB(filename, flag, mode, format)



if __name__ == '__main__':
    import random

    with dbopen('/tmp/tmp.shl', 'c', format='json') as s:
        print(s, 'start')
        s['abc'] = '123'
        s['rand'] = random.randrange(10000)
        print(s, 'updated')

    with open('/tmp/tmp.shl', 'rb') as f:
        print (f.read())

Diff to Previous Revision

--- revision 5 2009-02-05 17:12:01
+++ revision 6 2011-09-06 09:49:45
@@ -1,47 +1,44 @@
-'''Alternate DB based on a dict subclass
-
-Runs like gdbm's fast mode (all writes all delayed until close).
-While open, the whole dict is kept in memory.  Start-up and
-close time's are potentially long because the whole dict must be
-read or written to disk.
-
-Input file format is automatically discovered.
-Output file format is selectable between pickle, json, and csv.
-All three are backed by fast C implementations.
-
-'''
-
 import pickle, json, csv
 import os, shutil
 
 class DictDB(dict):
+    '''Alternate DB based on a dict subclass
 
-    def __init__(self, filename, flag=None, mode=None, format=None, *args, **kwds):
-        self.flag = flag or 'c'             # r=readonly, c=create, or n=new
-        self.mode = mode                    # None or octal triple like 0x666
-        self.format = format or 'csv'       # csv, json, or pickle
+    Runs like gdbm's fast mode (all writes all delayed until close).
+    While open, the whole dict is kept in memory.  Start-up and close
+    time's are potentially long because the whole dict must be read
+    from or written to disk.
+
+    Input file format is automatically discovered.
+    Output file format is selectable between pickle, json, and csv.
+    All three are backed by fast C implementations.
+
+    '''
+
+    def __init__(self, filename, flag='c', mode=None, format='pickle', *args, **kwds):
+        self.flag = flag                    # r=readonly, c=create, or n=new
+        self.mode = mode                    # None or an octal triple like 0666
+        self.format = format                # 'csv', 'json', or 'pickle'
         self.filename = filename
         if flag != 'n' and os.access(filename, os.R_OK):
-            file = __builtins__.open(filename, 'rb')
-            try:
-                self.load(file)
-            finally:
-                file.close()
-        self.update(*args, **kwds)
+            with open(filename, 'rb') as fileobj:
+                self.load(fileobj)
+        dict.__init__(self, *args, **kwds)
 
     def sync(self):
+        'Write dict to disk'
         if self.flag == 'r':
             return
         filename = self.filename
         tempname = filename + '.tmp'
-        file = __builtins__.open(tempname, 'wb')
+        fileobj = open(tempname, 'wb')
         try:
-            self.dump(file)
+            self.dump(fileobj)
         except Exception:
-            file.close()
             os.remove(tempname)
             raise
-        file.close()
+        finally:
+            fileobj.close()
         shutil.move(tempname, self.filename)    # atomic commit
         if self.mode is not None:
             os.chmod(self.filename, self.mode)
@@ -49,41 +46,47 @@
     def close(self):
         self.sync()
 
-    def dump(self, file):
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *exc_info):
+        self.close()
+
+    def dump(self, fileobj):
         if self.format == 'csv':
-            csv.writer(file).writerows(self.iteritems())
+            csv.writer(fileobj).writerows(self.iteritems())
         elif self.format == 'json':
-            json.dump(self, file, separators=(',', ':'))
+            json.dump(self, fileobj, separators=(',', ':'))
         elif self.format == 'pickle':
-            pickle.dump(self.items(), file, -1)
+            pickle.dump(self.items(), fileobj, -1)
         else:
             raise NotImplementedError('Unknown format: %r' % self.format)
 
-    def load(self, file):
+    def load(self, fileobj):
         # try formats from most restrictive to least restrictive
         for loader in (pickle.load, json.load, csv.reader):
-            file.seek(0)
+            fileobj.seek(0)
             try:
-                return self.update(loader(file))
+                return self.update(loader(fileobj))
             except Exception:
                 pass
         raise ValueError('File not in recognized format')
 
 
-def dbopen(filename, flag=None, mode=None, format=None):
+def dbopen(filename, flag='c', mode=None, format='pickle'):
+    # Function to emulate API of shelve.open() or anydbm.open()
     return DictDB(filename, flag, mode, format)
 
 
 
 if __name__ == '__main__':
     import random
-    os.chdir('/dbm_sqlite/alt')
-    print(os.getcwd())
-    s = dbopen('tmp.shl', 'c', format='json')
-    print(s, 'start')
-    s['abc'] = '123'
-    s['rand'] = random.randrange(10000)
-    s.close()
-    f = __builtins__.open('tmp.shl', 'rb')
-    print (f.read())
-    f.close()
+
+    with dbopen('/tmp/tmp.shl', 'c', format='json') as s:
+        print(s, 'start')
+        s['abc'] = '123'
+        s['rand'] = random.randrange(10000)
+        print(s, 'updated')
+
+    with open('/tmp/tmp.shl', 'rb') as f:
+        print (f.read())

History