Welcome, guest | Sign In | My Account | Store | Cart

Full-featured O(1) LRU cache backported from Python3.3. The full Py3.3 API is supported (thread safety, maxsize, keyword args, type checking, __wrapped__, and cache_info). Includes Py3.3 optimizations for better memory utilization, fewer dependencies, and fewer dict lookups.

Python, 164 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
from collections import namedtuple
from functools import update_wrapper
from threading import RLock

_CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"])

class _HashedSeq(list):
    __slots__ = 'hashvalue'

    def __init__(self, tup, hash=hash):
        self[:] = tup
        self.hashvalue = hash(tup)

    def __hash__(self):
        return self.hashvalue

def _make_key(args, kwds, typed,
             kwd_mark = (object(),),
             fasttypes = {int, str, frozenset, type(None)},
             sorted=sorted, tuple=tuple, type=type, len=len):
    'Make a cache key from optionally typed positional and keyword arguments'
    key = args
    if kwds:
        sorted_items = sorted(kwds.items())
        key += kwd_mark
        for item in sorted_items:
            key += item
    if typed:
        key += tuple(type(v) for v in args)
        if kwds:
            key += tuple(type(v) for k, v in sorted_items)
    elif len(key) == 1 and type(key[0]) in fasttypes:
        return key[0]
    return _HashedSeq(key)

def lru_cache(maxsize=100, typed=False):
    """Least-recently-used cache decorator.

    If *maxsize* is set to None, the LRU features are disabled and the cache
    can grow without bound.

    If *typed* is True, arguments of different types will be cached separately.
    For example, f(3.0) and f(3) will be treated as distinct calls with
    distinct results.

    Arguments to the cached function must be hashable.

    View the cache statistics named tuple (hits, misses, maxsize, currsize) with
    f.cache_info().  Clear the cache and statistics with f.cache_clear().
    Access the underlying function with f.__wrapped__.

    See:  http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used

    """

    # Users should only access the lru_cache through its public API:
    #       cache_info, cache_clear, and f.__wrapped__
    # The internals of the lru_cache are encapsulated for thread safety and
    # to allow the implementation to change (including a possible C version).

    def decorating_function(user_function):

        cache = dict()
        stats = [0, 0]                  # make statistics updateable non-locally
        HITS, MISSES = 0, 1             # names for the stats fields
        make_key = _make_key
        cache_get = cache.get           # bound method to lookup key or return None
        _len = len                      # localize the global len() function
        lock = RLock()                  # because linkedlist updates aren't threadsafe
        root = []                       # root of the circular doubly linked list
        root[:] = [root, root, None, None]      # initialize by pointing to self
        nonlocal_root = [root]                  # make updateable non-locally
        PREV, NEXT, KEY, RESULT = 0, 1, 2, 3    # names for the link fields

        if maxsize == 0:

            def wrapper(*args, **kwds):
                # no caching, just do a statistics update after a successful call
                result = user_function(*args, **kwds)
                stats[MISSES] += 1
                return result

        elif maxsize is None:

            def wrapper(*args, **kwds):
                # simple caching without ordering or size limit
                key = make_key(args, kwds, typed)
                result = cache_get(key, root)   # root used here as a unique not-found sentinel
                if result is not root:
                    stats[HITS] += 1
                    return result
                result = user_function(*args, **kwds)
                cache[key] = result
                stats[MISSES] += 1
                return result

        else:

            def wrapper(*args, **kwds):
                # size limited caching that tracks accesses by recency
                key = make_key(args, kwds, typed) if kwds or typed else args
                with lock:
                    link = cache_get(key)
                    if link is not None:
                        # record recent use of the key by moving it to the front of the list
                        root, = nonlocal_root
                        link_prev, link_next, key, result = link
                        link_prev[NEXT] = link_next
                        link_next[PREV] = link_prev
                        last = root[PREV]
                        last[NEXT] = root[PREV] = link
                        link[PREV] = last
                        link[NEXT] = root
                        stats[HITS] += 1
                        return result
                result = user_function(*args, **kwds)
                with lock:
                    root, = nonlocal_root
                    if key in cache:
                        # getting here means that this same key was added to the
                        # cache while the lock was released.  since the link
                        # update is already done, we need only return the
                        # computed result and update the count of misses.
                        pass
                    elif _len(cache) >= maxsize:
                        # use the old root to store the new key and result
                        oldroot = root
                        oldroot[KEY] = key
                        oldroot[RESULT] = result
                        # empty the oldest link and make it the new root
                        root = nonlocal_root[0] = oldroot[NEXT]
                        oldkey = root[KEY]
                        oldvalue = root[RESULT]
                        root[KEY] = root[RESULT] = None
                        # now update the cache dictionary for the new links
                        del cache[oldkey]
                        cache[key] = oldroot
                    else:
                        # put result in a new link at the front of the list
                        last = root[PREV]
                        link = [last, root, key, result]
                        last[NEXT] = root[PREV] = cache[key] = link
                    stats[MISSES] += 1
                return result

        def cache_info():
            """Report cache statistics"""
            with lock:
                return _CacheInfo(stats[HITS], stats[MISSES], maxsize, len(cache))

        def cache_clear():
            """Clear the cache and cache statistics"""
            with lock:
                cache.clear()
                root = nonlocal_root[0]
                root[:] = [root, root, None, None]
                stats[:] = [0, 0]

        wrapper.__wrapped__ = user_function
        wrapper.cache_info = cache_info
        wrapper.cache_clear = cache_clear
        return update_wrapper(wrapper, user_function)

    return decorating_function

Documentation and examples: http://docs.python.org/dev/library/functools.html#functools.lru_cache

The Python3.3 update added the typed option for differentiating equal arguments differing types. This makes it possible to cache different results for f(1) and for f(1.0).

Python3.3 also added a __wrapped__ attribute that can be used for bypassing or replacing the cache.

The Python 3.3 version was also refactored, beautified, and optimized. It consumes less memory than the 3.2 version. It makes half as many hash calls. And, it inlines the relevant parts of OrderedDict while eliminating unused functionality. It relies exclusively on Python basics such as 1) plain python lists to implement a circular doubly linked list, 2) a plain python dictionary, and 3) a plain tuple used for storing the positional arguments, sorted keyword arguments, and optional type information.

The backport works on Python2.6 or later, Python3.0 or later, Python with Psyco, and with PyPy.

The backport entailed replacing Python 3's nonlocal keyword with nested scope lookup of a mutable list (for the hit/miss statistics). Also,the __wrapped__ attribute was added manually (because older versions functools.wraps didn't support it). The original 3.3 is at: http://hg.python.org/cpython/file/97b0cf9df420/Lib/functools.py#l139

To backport to even earlier versions of Python, replace the named tuple with an equivalent handmade class and replace the with-statement with an equivalent try/finally.

No print-statements were harmed during this backport ;-)

5 comments

Fabio Zadrozny 11 years, 3 months ago  # | flag

Just in case someone wants a space-based version (i.e.: keys are not pruned based on the number of keys, but based on the total space hold in the cache given a function which calculates the space), there's a version below (and note that I did remove some things I didn't want for speed and simplicity: it has no statistics, it's not thread-safe, must have a max size, doesn't accept kwargs)

def lru_cache(maxsize=100, get_space=lambda obj:1):
    assert maxsize > 0

    def decorating_function(user_function):
        cache = dict()
        cache_get = cache.get  # bound method to lookup key or return None
        _get_space = get_space  # make it local
        total_space = [0]
        root = []  # root of the circular doubly linked list
        nonlocal_root = [root]  # make updateable non-locally
        root[:] = [root, root, None, None, 0]  # initialize by pointing to self
        PREV, NEXT = 0, 1  # names for the link fields

        def wrapper(*args):
            # size limited caching that tracks accesses by recency
            key = tuple(args)
            link = cache_get(key)
            if link is not None:
                # record recent use of the key by moving it to the front of the list
                # HIT
                root, = nonlocal_root
                link_prev, link_next, key, result, _space = link
                link_prev[NEXT] = link_next
                link_next[PREV] = link_prev
                last = root[PREV]
                last[NEXT] = root[PREV] = link
                link[PREV] = last
                link[NEXT] = root
                return result

            # MISS
            result = user_function(*args)
            space = _get_space(result)
            total_space[0] += space

            root = nonlocal_root[0]

            last = root[PREV]
            link = [last, root, key, result, space]
            cache[key] = last[NEXT] = root[PREV] = link

            while total_space[0] > maxsize:
                # purge least recently used cache entry
                _old_prev, old_next, old_key, _old_result, space = root[NEXT]
                total_space[0] -= space
                root[NEXT] = old_next
                old_next[PREV] = root
                del cache[old_key]

            return result

        def cache_clear():
            """Clear the cache and cache statistics"""
            total_space = [0]
            cache.clear()
            root = nonlocal_root[0]
            root[:] = [root, root, None, None, 0]

        wrapper.__wrapped__ = user_function
        wrapper.cache_clear = cache_clear
        return update_wrapper(wrapper, user_function)

    return decorating_function
Brian Kearns 11 years ago  # | flag

line 133: "oldvalue = root[RESULT]" seems unnecessary?

Joseph Martinot-Lagarde 10 years, 6 months ago  # | flag

To keep compatibility with python 2.6, you need to replace

fasttypes = {int, str, frozenset, type(None)},

with

fasttypes = set(int, str, frozenset, type(None)),
John Stanford 10 years ago  # | flag

I think the statement for 2.6 compatibility is:

fasttypes=set([int, str, frozenset, type(None)]),

Jason R. Coombs 9 years, 11 months ago  # | flag

I've packaged the above code, unchanged, as backports.functools_lru_cache: https://pypi.python.org/pypi/backports.functools_lru_cache/1.0