Compare speeds of locals, nested scopes, global, builtins, instance variables, and class variables.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 | # Show relative speeds of local, nonlocal, global, and built-in access.
trials = [1] * 500
def read_local(trials=trials):
v_local = 1
for t in trials:
v_local; v_local; v_local; v_local; v_local
v_local; v_local; v_local; v_local; v_local
v_local; v_local; v_local; v_local; v_local
v_local; v_local; v_local; v_local; v_local
v_local; v_local; v_local; v_local; v_local
def make_nonlocal_reader():
v_nonlocal = 1
def inner(trials=trials):
for t in trials:
v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal
v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal
v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal
v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal
v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal; v_nonlocal
inner.__name__ = 'read_nonlocal'
return inner
read_nonlocal = make_nonlocal_reader()
v_global = 1
def read_global(trials=trials):
for t in trials:
v_global; v_global; v_global; v_global; v_global
v_global; v_global; v_global; v_global; v_global
v_global; v_global; v_global; v_global; v_global
v_global; v_global; v_global; v_global; v_global
v_global; v_global; v_global; v_global; v_global
def read_builtin(trials=trials):
for t in trials:
oct; oct; oct; oct; oct
oct; oct; oct; oct; oct
oct; oct; oct; oct; oct
oct; oct; oct; oct; oct
oct; oct; oct; oct; oct
class A(object):
def m(self):
pass
def read_classvar(trials=trials, A=A):
A.x = 1
for t in trials:
A.x; A.x; A.x; A.x; A.x
A.x; A.x; A.x; A.x; A.x
A.x; A.x; A.x; A.x; A.x
A.x; A.x; A.x; A.x; A.x
A.x; A.x; A.x; A.x; A.x
def read_instancevar(trials=trials, a=A()):
a.x = 1
for t in trials:
a.x; a.x; a.x; a.x; a.x
a.x; a.x; a.x; a.x; a.x
a.x; a.x; a.x; a.x; a.x
a.x; a.x; a.x; a.x; a.x
a.x; a.x; a.x; a.x; a.x
def read_unboundmethod(trials=trials, A=A):
# real unbound methods are only in Py2.x
for t in trials:
A.m; A.m; A.m; A.m; A.m
A.m; A.m; A.m; A.m; A.m
A.m; A.m; A.m; A.m; A.m
A.m; A.m; A.m; A.m; A.m
A.m; A.m; A.m; A.m; A.m
def read_boundmethod(trials=trials, a=A()):
for t in trials:
a.m; a.m; a.m; a.m; a.m
a.m; a.m; a.m; a.m; a.m
a.m; a.m; a.m; a.m; a.m
a.m; a.m; a.m; a.m; a.m
a.m; a.m; a.m; a.m; a.m
def write_local(trials=trials):
v_local = 1
for t in trials:
v_local = 1; v_local = 1; v_local = 1; v_local = 1; v_local = 1
v_local = 1; v_local = 1; v_local = 1; v_local = 1; v_local = 1
v_local = 1; v_local = 1; v_local = 1; v_local = 1; v_local = 1
v_local = 1; v_local = 1; v_local = 1; v_local = 1; v_local = 1
v_local = 1; v_local = 1; v_local = 1; v_local = 1; v_local = 1
def make_nonlocal_writer():
v_nonlocal = 1
def inner(trials=trials):
nonlocal v_nonlocal # this is invalid syntax in Py2.x
for t in trials:
v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1
v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1
v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1
v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1
v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1; v_nonlocal = 1
inner.__name__ = 'write_nonlocal'
return inner
write_nonlocal = make_nonlocal_writer()
def write_global(trials=trials):
global v_global
for t in trials:
v_global = 1; v_global = 1; v_global = 1; v_global = 1; v_global = 1
v_global = 1; v_global = 1; v_global = 1; v_global = 1; v_global = 1
v_global = 1; v_global = 1; v_global = 1; v_global = 1; v_global = 1
v_global = 1; v_global = 1; v_global = 1; v_global = 1; v_global = 1
v_global = 1; v_global = 1; v_global = 1; v_global = 1; v_global = 1
def write_classvar(trials=trials, A=A):
for t in trials:
A.x = 1; A.x = 1; A.x = 1; A.x = 1; A.x = 1
A.x = 1; A.x = 1; A.x = 1; A.x = 1; A.x = 1
A.x = 1; A.x = 1; A.x = 1; A.x = 1; A.x = 1
A.x = 1; A.x = 1; A.x = 1; A.x = 1; A.x = 1
A.x = 1; A.x = 1; A.x = 1; A.x = 1; A.x = 1
def write_instancevar(trials=trials, a=A()):
for t in trials:
a.x = 1; a.x = 1; a.x = 1; a.x = 1; a.x = 1
a.x = 1; a.x = 1; a.x = 1; a.x = 1; a.x = 1
a.x = 1; a.x = 1; a.x = 1; a.x = 1; a.x = 1
a.x = 1; a.x = 1; a.x = 1; a.x = 1; a.x = 1
a.x = 1; a.x = 1; a.x = 1; a.x = 1; a.x = 1
def loop_overhead(trials=trials):
for t in trials:
pass
if __name__=='__main__':
from timeit import Timer
for f in [read_local, read_nonlocal, read_global, read_builtin,
read_classvar, read_instancevar, read_unboundmethod, read_boundmethod,
write_local, write_nonlocal, write_global,
write_classvar, write_instancevar,
loop_overhead]:
print('{:5.3f}\t{}'.format(min(Timer(f).repeat(7, 1000)), f.__name__))
|
Comparative timings for Python 3.2.1 and Python 2.7.2. Both are 64-bit darwin builds running on a 2.4Ghz Intel Core 2 Duo:
Py3.2 Py2.7
----- -----
0.108 0.182 read_local
0.149 0.242 read_nonlocal
0.216 0.310 read_global
0.364 0.450 read_builtin
0.534 0.629 read_classvar (or 0.636 in a Py2.7 old-style class)
0.682 0.808 read_instancevar (or 0.637 in a Py2.7 old-style class)
0.903 read_unboundmethod (0.938 in a Py2.7 old-style class)
0.779 0.904 read_boundmethod (or 1.215 in a Py2.7 old-style class)
0.128 0.178 write_local
0.192 write_nonlocal
0.442 0.576 write_global
2.134 2.414 write_classvar (or 1.010 in a Py2.7 old-style class)
0.967 1.177 write_instancevar (or 0.942 in a Py2.7 old-style class)
0.012 0.011 loop_overhead
Tags: optimization
hence BindingConstants at compile time :)
Nice and concrete! With the comparative timings, it's not only comparative horizontally, but vertically as well, right?
You could certainly include other things like access via desciptors or getattr/setattr, but I think you nailed the "first tier" of variable access.
Does using __slots__ affect performance?
Here is a comparison between Python 2.7.2 and PyPy 1.5 on Win7/32-bit:
I think that is quite impressive... To make this work I had to do 2 things:
read_classvar
from test as it did not complete with pypyI just did this with Pypy 1.6 on Mac and received basically the same results except everything but write_classvar came to 0.001 which makes me think. I only had to comment out "nonlocal", but this has further implications. I haven't looked close at it yet, but yes it is impressive.
@Michael:
According to my quick test, __slots__ are a bit faster:
Thanks to its JIT generating C bytecode, pypy is about 40 times faster than cpython, at least for variable access. However, global performances (http://speed.pypy.org/comparison/) indicate that pypy is usually slightly faster than cpython, but not of that margin.
Hence there must be other operations that are much longer with pypy-JIT than with psyco. Do you know which ones ? I wonder why the *40 speed on access becomes *2 speed on average.