Welcome, guest | Sign In | My Account | Store | Cart

Compare speeds of locals, nested scopes, global, builtins, instance variables, and class variables.

Python, 146 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# Show relative speeds of local, nonlocal, global, and built-in access.

trials = [1] * 500

def read_local(trials=trials):
    v_local = 1
    for t in trials:
        v_local;    v_local;    v_local;    v_local;    v_local
        v_local;    v_local;    v_local;    v_local;    v_local
        v_local;    v_local;    v_local;    v_local;    v_local
        v_local;    v_local;    v_local;    v_local;    v_local
        v_local;    v_local;    v_local;    v_local;    v_local

def make_nonlocal_reader():
    v_nonlocal = 1
    def inner(trials=trials):
        for t in trials:
            v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal
            v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal
            v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal
            v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal
            v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal
    inner.__name__ = 'read_nonlocal'
    return inner

read_nonlocal = make_nonlocal_reader()

v_global = 1
def read_global(trials=trials):
    for t in trials:
        v_global; v_global; v_global; v_global; v_global
        v_global; v_global; v_global; v_global; v_global
        v_global; v_global; v_global; v_global; v_global
        v_global; v_global; v_global; v_global; v_global
        v_global; v_global; v_global; v_global; v_global

def read_builtin(trials=trials):
    for t in trials:
        oct; oct; oct; oct; oct
        oct; oct; oct; oct; oct
        oct; oct; oct; oct; oct
        oct; oct; oct; oct; oct
        oct; oct; oct; oct; oct

class A(object):
    def m(self):
        pass

def read_classvar(trials=trials, A=A):
    A.x = 1
    for t in trials:
        A.x;    A.x;    A.x;    A.x;    A.x
        A.x;    A.x;    A.x;    A.x;    A.x
        A.x;    A.x;    A.x;    A.x;    A.x
        A.x;    A.x;    A.x;    A.x;    A.x
        A.x;    A.x;    A.x;    A.x;    A.x

def read_instancevar(trials=trials, a=A()):
    a.x = 1
    for t in trials:
        a.x;    a.x;    a.x;    a.x;    a.x
        a.x;    a.x;    a.x;    a.x;    a.x
        a.x;    a.x;    a.x;    a.x;    a.x
        a.x;    a.x;    a.x;    a.x;    a.x
        a.x;    a.x;    a.x;    a.x;    a.x

def read_unboundmethod(trials=trials, A=A):
    # real unbound methods are only in Py2.x
    for t in trials:
        A.m;    A.m;    A.m;    A.m;    A.m
        A.m;    A.m;    A.m;    A.m;    A.m
        A.m;    A.m;    A.m;    A.m;    A.m
        A.m;    A.m;    A.m;    A.m;    A.m
        A.m;    A.m;    A.m;    A.m;    A.m

def read_boundmethod(trials=trials, a=A()):
    for t in trials:
        a.m;    a.m;    a.m;    a.m;    a.m
        a.m;    a.m;    a.m;    a.m;    a.m
        a.m;    a.m;    a.m;    a.m;    a.m
        a.m;    a.m;    a.m;    a.m;    a.m
        a.m;    a.m;    a.m;    a.m;    a.m

def write_local(trials=trials):
    v_local = 1
    for t in trials:
        v_local = 1; v_local = 1; v_local = 1; v_local = 1; v_local = 1
        v_local = 1; v_local = 1; v_local = 1; v_local = 1; v_local = 1
        v_local = 1; v_local = 1; v_local = 1; v_local = 1; v_local = 1
        v_local = 1; v_local = 1; v_local = 1; v_local = 1; v_local = 1
        v_local = 1; v_local = 1; v_local = 1; v_local = 1; v_local = 1

def make_nonlocal_writer():
    v_nonlocal = 1
    def inner(trials=trials):
        nonlocal v_nonlocal            # this is invalid syntax in Py2.x
        for t in trials:
            v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1
            v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1
            v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1
            v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1
            v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1
    inner.__name__ = 'write_nonlocal'
    return inner

write_nonlocal = make_nonlocal_writer()

def write_global(trials=trials):
    global v_global
    for t in trials:
        v_global = 1; v_global = 1; v_global = 1; v_global = 1; v_global = 1
        v_global = 1; v_global = 1; v_global = 1; v_global = 1; v_global = 1
        v_global = 1; v_global = 1; v_global = 1; v_global = 1; v_global = 1
        v_global = 1; v_global = 1; v_global = 1; v_global = 1; v_global = 1
        v_global = 1; v_global = 1; v_global = 1; v_global = 1; v_global = 1

def write_classvar(trials=trials, A=A):
    for t in trials:
        A.x = 1;    A.x = 1;    A.x = 1;    A.x = 1;    A.x = 1
        A.x = 1;    A.x = 1;    A.x = 1;    A.x = 1;    A.x = 1
        A.x = 1;    A.x = 1;    A.x = 1;    A.x = 1;    A.x = 1
        A.x = 1;    A.x = 1;    A.x = 1;    A.x = 1;    A.x = 1
        A.x = 1;    A.x = 1;    A.x = 1;    A.x = 1;    A.x = 1

def write_instancevar(trials=trials, a=A()):
    for t in trials:
        a.x = 1;    a.x = 1;    a.x = 1;    a.x = 1;    a.x = 1
        a.x = 1;    a.x = 1;    a.x = 1;    a.x = 1;    a.x = 1
        a.x = 1;    a.x = 1;    a.x = 1;    a.x = 1;    a.x = 1
        a.x = 1;    a.x = 1;    a.x = 1;    a.x = 1;    a.x = 1
        a.x = 1;    a.x = 1;    a.x = 1;    a.x = 1;    a.x = 1

def loop_overhead(trials=trials):
    for t in trials:
        pass


if __name__=='__main__':
    from timeit import Timer

    for f in [read_local, read_nonlocal, read_global, read_builtin,
              read_classvar, read_instancevar, read_unboundmethod, read_boundmethod,
              write_local, write_nonlocal, write_global,
              write_classvar, write_instancevar,
              loop_overhead]:
        print('{:5.3f}\t{}'.format(min(Timer(f).repeat(7, 1000)), f.__name__))

Comparative timings for Python 3.2.1 and Python 2.7.2. Both are 64-bit darwin builds running on a 2.4Ghz Intel Core 2 Duo:

Py3.2    Py2.7
-----   -----
0.108   0.182   read_local
0.149   0.242   read_nonlocal
0.216   0.310   read_global
0.364   0.450   read_builtin
0.534   0.629   read_classvar (or 0.636 in a Py2.7 old-style class)
0.682   0.808   read_instancevar (or 0.637 in a Py2.7 old-style class)
        0.903   read_unboundmethod (0.938 in a Py2.7 old-style class)
0.779   0.904   read_boundmethod (or 1.215 in a Py2.7 old-style class)


0.128   0.178   write_local
0.192           write_nonlocal
0.442   0.576   write_global
2.134   2.414   write_classvar (or 1.010 in a Py2.7 old-style class)
0.967   1.177   write_instancevar (or 0.942 in a Py2.7 old-style class)

0.012   0.011   loop_overhead

7 comments

Louis RIVIERE 12 years, 8 months ago  # | flag
Eric Snow 12 years, 8 months ago  # | flag

Nice and concrete! With the comparative timings, it's not only comparative horizontally, but vertically as well, right?

You could certainly include other things like access via desciptors or getattr/setattr, but I think you nailed the "first tier" of variable access.

Michael Schurter 12 years, 8 months ago  # | flag

Does using __slots__ affect performance?

Titusz Pan 12 years, 8 months ago  # | flag

Here is a comparison between Python 2.7.2 and PyPy 1.5 on Win7/32-bit:

Py2.7   PyPy1.5
-----   -------
0.160   0.004     read_local
0.239   0.004     read_nonlocal
0.281   0.004     read_global
0.543   0.005     read_builtin
0.972   0.004     read_instancevar
1.036   0.004     read_unboundmethod
1.068   0.005     read_boundmethod
0.191   0.004     write_local
0.193   0.004     write_nonlocal
0.475   0.007     write_global
2.624   0.672     write_classvar
1.253   0.007     write_instancevar
0.011   0.004     loop_overhead

I think that is quite impressive... To make this work I had to do 2 things:

  • remove line 102 (nonlocal does not work on py2.x)
  • remove read_classvar from test as it did not complete with pypy
Justin 12 years, 8 months ago  # | flag

I just did this with Pypy 1.6 on Mac and received basically the same results except everything but write_classvar came to 0.001 which makes me think. I only had to comment out "nonlocal", but this has further implications. I haven't looked close at it yet, but yes it is impressive.

Jan Kaliszewski 12 years, 7 months ago  # | flag

@Michael:

According to my quick test, __slots__ are a bit faster:

Python 3.2.1rc1 (default, May 18 2011, 18:34:29)
[GCC 4.6.1 20110507 (prerelease)] on linux2`
Type "help", "copyright", "credits" or "license" for more information.

>>> from timeit import Timer
>>> class C(object): pass
... 
>>> class D(object): __slots__ = 'attr'
... 
>>> c = C()
>>> d = D()
>>> c.attr = 9
>>> d.attr = 9
>>> def test_c(cc=c, trials=([1] * 500)):
...     for t in trials:
...             cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr; cc.attr;
... 
>>> def test_d(dd=d, trials=([1] * 500)):
...     for t in trials:
...             dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr; dd.attr;
... 
>>> def test_L(LL=c, trials=([1] * 500)):
...     for t in trials:
...             LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; LL     ; 
... 
>>> for f in test_c, test_d, test_L, test_c, test_d, test_L:
...     print('{0:5.3f}\t{1}'.format(min(Timer(f).repeat(7, 1000)), f.__name__))
... 
1.430   test_c
1.304   test_d
0.149   test_L
1.433   test_c
1.304   test_d
0.149   test_L
s_h_a_i_o 12 years, 7 months ago  # | flag

Thanks to its JIT generating C bytecode, pypy is about 40 times faster than cpython, at least for variable access. However, global performances (http://speed.pypy.org/comparison/) indicate that pypy is usually slightly faster than cpython, but not of that margin.

Hence there must be other operations that are much longer with pypy-JIT than with psyco. Do you know which ones ? I wonder why the *40 speed on access becomes *2 speed on average.