# -*- coding: UTF-8 -*-
# Created by Florian Leitner on 2007-10-15.
# Copyright (c) 2007 Florian Leitner.
# All rights reserved.
# GNU GPL LICENSE
#
# This module is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; latest version thereof,
# available at: .
#
# This module is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this module; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
"""gcache module
Provides wrappers to allow generators to be accessed by subscript indices.
The generator is only iterated as far as required to return the requested
items.
Determining its length or using negative indices will automatically lead to
the complete consumption of the generator, as well as using any special syntax
like:
if elt in glist:
...
Copyright (c) 2007 Florian Leitner. All rights reserved.
"""
import os
from bsddb3 import db, dbshelve
__version__ = "1.0"
__author__ = "Florian Leitner"
class dbcache(list):
"""Extends the full functionality of lists to a generator.
The generator items are stored in an external database. The storages is
inteded to be as small as possible by storing only one copy of each
generator item (uniqueness being defined as id(item)).
Note the cache will restore items although if you extend this list beyond
the generator, then delete items in your code, fetching them again from
the cache (the item now has a new id!) and the again put them back into
the cache. This does not break anything, but it is not very efficient.
Using id() should be thread safe (the number id() returns is the address
in memory), additionally the temporary storage created by this mechanism
in /tmp uses id(self) and os.getpid() to create its own, thread- and
process-safe cache.
"""
dbfile_directory = "/tmp"
dbfile_prefix = "bdbcache-"
dbfile_suffix = ".bdb"
def __init__(self, generator, directory=None, prefix=None, suffix=None):
"""Create a new generator list cache.
generator - the generator to cache
directory - optional directory to create the cache
(defaults to dbfile_directory)
prefix - filename prefix for the cache (defaults to dbfile_prefix)
suffix - filename suffix for the cache (defaults to dbfile_suffix)
Following attributes are bound to the instance:
generator - the generator itself
uid - the unique ID of this cache ("%s-%s" % (os.getpid(), id(self))
path - the path to the cachefile
cache - the cache object
Note that using negative indices automatically leads to consuming
the whole generator.
"""
assert hasattr(generator, "next") and \
hasattr(generator, "__iter__")
super(dbcache, self).__init__()
if directory is None:
directory = dbcache.dbfile_directory
if prefix is None:
prefix = dbcache.dbfile_prefix
if suffix is None:
suffix = dbcache.dbfile_suffix
self.generator = generator
self.uid = "%i-%i" % (os.getpid(), id(self))
self.path = "%s/%s%s%s" % \
(directory, suffix, self.uid, prefix)
try:
os.unlink(self.path)
except OSError:
pass
self.__idx = None
self.__len = None
self.cache = dbshelve.DBShelf()
self.cache.open(self.path, None, db.DB_HASH, db.DB_CREATE)
def __del__(self):
"""Delete the instance and destroy the cache file (if it still
exists).
"""
self.cache.close()
del self.cache
try:
os.unlink(self.path)
except OSError:
pass
def __contains__(self, elm):
"""Checks if id(elt) == id(s[i]) for any i already cached."""
key = self.__key(elm)
return self.cache.has_key(key)
def __getitem__(self, k):
"""Using a negative index will consume the generator."""
if isinstance(k, slice):
return [self.__getitem__(i)
for i in xrange(k.start, k.stop, k.step)]
size = self.__size()
if size <= k:
try:
self.__cache(k - size + 1)
except StopIteration:
raise IndexError("list index out of range")
key = super(dbcache, self).__getitem__(k)
return self.cache.get(key)
def __getslice__(self, i, j):
"""Using negative indices will consume the generator."""
size = self.__size()
if size <= j:
try:
self.__cache(j - size + 1)
except StopIteration:
# note: Python does not raise anything if j is to large!
pass
return [self.cache.get(key)
for key in super(dbcache, self).__getslice__(i, j)]
def __iter__(self):
"""Return an iterator over all items - consumes the generator."""
self.__idx = 0
self.__len = self.__size()
return self
def __len__(self):
"""Calling len(s) will consume the generator."""
try:
while True:
self.__next()
except StopIteration:
pass
return self.__size()
def __setitem__(self, k, v):
"""Replace s[k] with v and returns the original element."""
old_key = super(dbcache, self).__getitem__(k)
new_key = self.__key(v)
old_val = self.cache.get(old_key)
super(dbcache, self).__setitem__(k, new_key)
if super(dbcache, self).count(old_key) == 0:
self.cache.delete(old_key)
if not self.cache.has_key(new_key):
self.cache.put(new_key, v)
return old_val
def __cache(self, n):
"""Cache the next n elements or raises StopIteration."""
i = 0
while i < n:
self.__next()
i += 1
def __key(self, elt):
"""Return the key for an element."""
return str(id(elt))
def __next(self):
"""Get the next element..."""
elm = self.generator.next()
# ...which raised a StopIteration - or now add it to the mapping
key = self.__key(elm)
super(dbcache, self).append(key)
if not self.cache.has_key(key):
self.cache.put(key, elm)
return elm
def __size(self):
"""Get the current size of the cache list."""
return super(dbcache, self).__len__()
def append(self, elm):
key = self.__key(elm)
super(dbcache, self).append(key)
if not self.cache.has_key(key):
self.cache.put(key, elm)
def extend(self, iterable):
for elm in iterable:
self.append(elm)
def count(self, elm):
"""Return number of i's for which id(s[i]) == id(elm)."""
key = self.__key(elm)
return super(dbcache, self).count(key)
def index(self, elm, *args):
"""Return smallest i such that id(s[i]) == id(elm)."""
key = self.__key(elm)
return super(dbcache, self).index(key, *args)
def insert(self, i, elm):
key = self.__key(elm)
super(dbcache, self).insert(i, key)
if not self.cache.has_key(key):
self.cache.put(key, elm)
def next(self):
"""Next element - can consume the generator."""
if self.__idx < self.__len:
key = super(dbcache, self).__getitem__(self.__idx)
self.__idx += 1
return self.cache.get(key)
else:
return self.__next()
def remove(self, elm):
key = self.__key(elm)
super(dbcache, self).remove(key)
if super(dbcache, self).count(key) == 0:
self.cache.delete(key)
def pop(self, *i):
key = super(dbcache, self).pop(*i)
val = self.cache.get(key)
if super(dbcache, self).count(key) == 0:
self.cache.delete(key)
return val
def sort(self, *comp, **kws):
keys = [super(dbcache, self).__getitem__(i)
for i in xrange(self.__size())]
values = [self.cache.get(k) for k in keys]
values.sort(*comp, **kws)
new_keys = [self.__key(v) for v in values]
for i, k in enumerate(new_keys):
super(dbcache, self).__setitem__(i, k)