Compare speeds of different kinds of access to variables « Python recipes

Compare speeds of locals, nested scopes, global, builtins, instance variables, and class variables.

      # Show relative speeds of local, nonlocal, global, and built-in access.

trials = [1] * 500

def read_local(trials=trials):
    v_local = 1
    for t in trials:
        v_local;    v_local;    v_local;    v_local;    v_local
        v_local;    v_local;    v_local;    v_local;    v_local
        v_local;    v_local;    v_local;    v_local;    v_local
        v_local;    v_local;    v_local;    v_local;    v_local
        v_local;    v_local;    v_local;    v_local;    v_local

def make_nonlocal_reader():
    v_nonlocal = 1
    def inner(trials=trials):
        for t in trials:
            v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal
            v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal
            v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal
            v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal
            v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal
    inner.__name__ = 'read_nonlocal'
    return inner

read_nonlocal = make_nonlocal_reader()

v_global = 1
def read_global(trials=trials):
    for t in trials:
        v_global; v_global; v_global; v_global; v_global
        v_global; v_global; v_global; v_global; v_global
        v_global; v_global; v_global; v_global; v_global
        v_global; v_global; v_global; v_global; v_global
        v_global; v_global; v_global; v_global; v_global

def read_builtin(trials=trials):
    for t in trials:
        oct; oct; oct; oct; oct
        oct; oct; oct; oct; oct
        oct; oct; oct; oct; oct
        oct; oct; oct; oct; oct
        oct; oct; oct; oct; oct

class A(object):
    def m(self):
        pass

def read_classvar(trials=trials, A=A):
    A.x = 1
    for t in trials:
        A.x;    A.x;    A.x;    A.x;    A.x
        A.x;    A.x;    A.x;    A.x;    A.x
        A.x;    A.x;    A.x;    A.x;    A.x
        A.x;    A.x;    A.x;    A.x;    A.x
        A.x;    A.x;    A.x;    A.x;    A.x

def read_instancevar(trials=trials, a=A()):
    a.x = 1
    for t in trials:
        a.x;    a.x;    a.x;    a.x;    a.x
        a.x;    a.x;    a.x;    a.x;    a.x
        a.x;    a.x;    a.x;    a.x;    a.x
        a.x;    a.x;    a.x;    a.x;    a.x
        a.x;    a.x;    a.x;    a.x;    a.x

def read_unboundmethod(trials=trials, A=A):
    # real unbound methods are only in Py2.x
    for t in trials:
        A.m;    A.m;    A.m;    A.m;    A.m
        A.m;    A.m;    A.m;    A.m;    A.m
        A.m;    A.m;    A.m;    A.m;    A.m
        A.m;    A.m;    A.m;    A.m;    A.m
        A.m;    A.m;    A.m;    A.m;    A.m

def read_boundmethod(trials=trials, a=A()):
    for t in trials:
        a.m;    a.m;    a.m;    a.m;    a.m
        a.m;    a.m;    a.m;    a.m;    a.m
        a.m;    a.m;    a.m;    a.m;    a.m
        a.m;    a.m;    a.m;    a.m;    a.m
        a.m;    a.m;    a.m;    a.m;    a.m

def write_local(trials=trials):
    v_local = 1
    for t in trials:
        v_local = 1; v_local = 1; v_local = 1; v_local = 1; v_local = 1
        v_local = 1; v_local = 1; v_local = 1; v_local = 1; v_local = 1
        v_local = 1; v_local = 1; v_local = 1; v_local = 1; v_local = 1
        v_local = 1; v_local = 1; v_local = 1; v_local = 1; v_local = 1
        v_local = 1; v_local = 1; v_local = 1; v_local = 1; v_local = 1

def make_nonlocal_writer():
    v_nonlocal = 1
    def inner(trials=trials):
        nonlocal v_nonlocal            # this is invalid syntax in Py2.x
        for t in trials:
            v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1
            v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1
            v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1
            v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1
            v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1
    inner.__name__ = 'write_nonlocal'
    return inner

write_nonlocal = make_nonlocal_writer()

def write_global(trials=trials):
    global v_global
    for t in trials:
        v_global = 1; v_global = 1; v_global = 1; v_global = 1; v_global = 1
        v_global = 1; v_global = 1; v_global = 1; v_global = 1; v_global = 1
        v_global = 1; v_global = 1; v_global = 1; v_global = 1; v_global = 1
        v_global = 1; v_global = 1; v_global = 1; v_global = 1; v_global = 1
        v_global = 1; v_global = 1; v_global = 1; v_global = 1; v_global = 1

def write_classvar(trials=trials, A=A):
    for t in trials:
        A.x = 1;    A.x = 1;    A.x = 1;    A.x = 1;    A.x = 1
        A.x = 1;    A.x = 1;    A.x = 1;    A.x = 1;    A.x = 1
        A.x = 1;    A.x = 1;    A.x = 1;    A.x = 1;    A.x = 1
        A.x = 1;    A.x = 1;    A.x = 1;    A.x = 1;    A.x = 1
        A.x = 1;    A.x = 1;    A.x = 1;    A.x = 1;    A.x = 1

def write_instancevar(trials=trials, a=A()):
    for t in trials:
        a.x = 1;    a.x = 1;    a.x = 1;    a.x = 1;    a.x = 1
        a.x = 1;    a.x = 1;    a.x = 1;    a.x = 1;    a.x = 1
        a.x = 1;    a.x = 1;    a.x = 1;    a.x = 1;    a.x = 1
        a.x = 1;    a.x = 1;    a.x = 1;    a.x = 1;    a.x = 1
        a.x = 1;    a.x = 1;    a.x = 1;    a.x = 1;    a.x = 1

def loop_overhead(trials=trials):
    for t in trials:
        pass


if __name__=='__main__':
    from timeit import Timer

    for f in [read_local, read_nonlocal, read_global, read_builtin,
              read_classvar, read_instancevar, read_unboundmethod, read_boundmethod,
              write_local, write_nonlocal, write_global,
              write_classvar, write_instancevar,
              loop_overhead]:
        print('{:5.3f}\t{}'.format(min(Timer(f).repeat(7, 1000)), f.__name__))

      

Comparative timings for Python 3.2.1 and Python 2.7.2. Both are 64-bit darwin builds running on a 2.4Ghz Intel Core 2 Duo:

Py3.2    Py2.7
-----   -----
0.108   0.182   read_local
0.149   0.242   read_nonlocal
0.216   0.310   read_global
0.364   0.450   read_builtin
0.534   0.629   read_classvar (or 0.636 in a Py2.7 old-style class)
0.682   0.808   read_instancevar (or 0.637 in a Py2.7 old-style class)
        0.903   read_unboundmethod (0.938 in a Py2.7 old-style class)
0.779   0.904   read_boundmethod (or 1.215 in a Py2.7 old-style class)


0.128   0.178   write_local
0.192           write_nonlocal
0.442   0.576   write_global
2.134   2.414   write_classvar (or 1.010 in a Py2.7 old-style class)
0.967   1.177   write_instancevar (or 0.942 in a Py2.7 old-style class)

0.012   0.011   loop_overhead

Tags: optimization

7 comments

Louis RIVIERE 12 years, 8 months ago # | flag

hence BindingConstants at compile time :)

Eric Snow 12 years, 8 months ago # | flag

Nice and concrete! With the comparative timings, it's not only comparative horizontally, but vertically as well, right?

You could certainly include other things like access via desciptors or getattr/setattr, but I think you nailed the "first tier" of variable access.

Michael Schurter 12 years, 8 months ago # | flag

Does using __slots__ affect performance?

Titusz Pan 12 years, 8 months ago # | flag

Here is a comparison between Python 2.7.2 and PyPy 1.5 on Win7/32-bit:

Py2.7   PyPy1.5
-----   -------
0.160   0.004     read_local
0.239   0.004     read_nonlocal
0.281   0.004     read_global
0.543   0.005     read_builtin
0.972   0.004     read_instancevar
1.036   0.004     read_unboundmethod
1.068   0.005     read_boundmethod
0.191   0.004     write_local
0.193   0.004     write_nonlocal
0.475   0.007     write_global
2.624   0.672     write_classvar
1.253   0.007     write_instancevar
0.011   0.004     loop_overhead

I think that is quite impressive... To make this work I had to do 2 things:

remove line 102 (nonlocal does not work on py2.x)
remove read_classvar from test as it did not complete with pypy

Justin 12 years, 8 months ago # | flag

I just did this with Pypy 1.6 on Mac and received basically the same results except everything but write_classvar came to 0.001 which makes me think. I only had to comment out "nonlocal", but this has further implications. I haven't looked close at it yet, but yes it is impressive.

Jan Kaliszewski 12 years, 7 months ago # | flag

@Michael:

According to my quick test, __slots__ are a bit faster:

Python 3.2.1rc1 (default, May 18 2011, 18:34:29)
[GCC 4.6.1 20110507 (prerelease)] on linux2`
Type "help", "copyright", "credits" or "license" for more information.

>>> from timeit import Timer
>>> class C(object): pass
... 
>>> class D(object): __slots__ = 'attr'
... 
>>> c = C()
>>> d = D()
>>> c.attr = 9
>>> d.attr = 9
>>> def test_c(cc=c, trials=([1] * 500)):
...     for t in trials:
...             cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr;
... 
>>> def test_d(dd=d, trials=([1] * 500)):
...     for t in trials:
...             dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr;
... 
>>> def test_L(LL=c, trials=([1] * 500)):
...     for t in trials:
...             LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; 
... 
>>> for f in test_c, test_d, test_L, test_c, test_d, test_L:
...     print('{0:5.3f}\t{1}'.format(min(Timer(f).repeat(7, 1000)), f.__name__))
... 
1.430   test_c
1.304   test_d
0.149   test_L
1.433   test_c
1.304   test_d
0.149   test_L

s_h_a_i_o 12 years, 7 months ago # | flag

Thanks to its JIT generating C bytecode, pypy is about 40 times faster than cpython, at least for variable access. However, global performances (http://speed.pypy.org/comparison/) indicate that pypy is usually slightly faster than cpython, but not of that margin.

Hence there must be other operations that are much longer with pypy-JIT than with psyco. Do you know which ones ? I wonder why the *40 speed on access becomes *2 speed on average.

◄	Python recipes (4591)	►
◄	Raymond Hettinger's recipes (97)	►

Compare speeds of different kinds of access to variables (Python recipe) by Raymond Hettinger
ActiveState Code (http://code.activestate.com/recipes/577834/)

7 comments

Tags

Required Modules

Other Information and Tasks

Accounts

Code Recipes

Feedback & Information

ActiveState

Compare speeds of different kinds of access to variables (Python recipe) by Raymond Hettinger ActiveState Code (http://code.activestate.com/recipes/577834/)

7 comments

Tags

Required Modules

Other Information and Tasks

Accounts

Code Recipes

Feedback & Information

ActiveState

Compare speeds of different kinds of access to variables (Python recipe) by Raymond Hettinger
ActiveState Code (http://code.activestate.com/recipes/577834/)