Welcome, guest | Sign In | My Account | Store | Cart
import pickle, json, csv, os, shutil

class DictDB(dict):
    '''Alternate DB based on a dict subclass

    Runs like gdbm's fast mode (all writes all delayed until close).
    While open, the whole dict is kept in memory.  Start-up and close
    time's are potentially long because the whole dict must be read
    from or written to disk.

    Input file format is automatically discovered.
    Output file format is selectable between pickle, json, and csv.
    All three are backed by fast C implementations.

    '''

    def __init__(self, filename, flag='c', mode=None, format='pickle', *args, **kwds):
        self.flag = flag                    # r=readonly, c=create, or n=new
        self.mode = mode                    # None or an octal triple like 0644
        self.format = format                # 'csv', 'json', or 'pickle'
        self.filename = filename
        if flag != 'n' and os.access(filename, os.R_OK):
            fileobj = open(filename, 'rb' if format=='pickle' else 'r')
            with fileobj:
                self.load(fileobj)
        dict.__init__(self, *args, **kwds)

    def sync(self):
        'Write dict to disk'
        if self.flag == 'r':
            return
        filename = self.filename
        tempname = filename + '.tmp'
        fileobj = open(tempname, 'wb' if self.format=='pickle' else 'w')
        try:
            self.dump(fileobj)
        except Exception:
            os.remove(tempname)
            raise
        finally:
            fileobj.close()
        shutil.move(tempname, self.filename)    # atomic commit
        if self.mode is not None:
            os.chmod(self.filename, self.mode)

    def close(self):
        self.sync()

    def __enter__(self):
        return self

    def __exit__(self, *exc_info):
        self.close()

    def dump(self, fileobj):
        if self.format == 'csv':
            csv.writer(fileobj).writerows(self.items())
        elif self.format == 'json':
            json.dump(self, fileobj, separators=(',', ':'))
        elif self.format == 'pickle':
            pickle.dump(dict(self), fileobj, 2)
        else:
            raise NotImplementedError('Unknown format: %r' % self.format)

    def load(self, fileobj):
        # try formats from most restrictive to least restrictive
        for loader in (pickle.load, json.load, csv.reader):
            fileobj.seek(0)
            try:
                return self.update(loader(fileobj))
            except Exception:
                pass
        raise ValueError('File not in recognized format')


def dbopen(filename, flag='c', mode=None, format='pickle'):
    # Function to emulate API of shelve.open() or anydbm.open()
    return DictDB(filename, flag, mode, format)



if __name__ == '__main__':
    import random

    # Make and use a persistent dictionary
    with dbopen('/tmp/demo.json', 'c', format='json') as d:
        print(d, 'start')
        d['abc'] = '123'
        d['rand'] = random.randrange(10000)
        print(d, 'updated')

    # Show what the file looks like on disk
    with open('/tmp/demo.json', 'rb') as f:
        print(f.read())

Diff to Previous Revision

--- revision 8 2011-09-06 10:57:29
+++ revision 9 2011-09-06 12:23:23
@@ -58,7 +58,7 @@
         elif self.format == 'json':
             json.dump(self, fileobj, separators=(',', ':'))
         elif self.format == 'pickle':
-            pickle.dump(list(self.items()), fileobj, 2)
+            pickle.dump(dict(self), fileobj, 2)
         else:
             raise NotImplementedError('Unknown format: %r' % self.format)
 

History