Converts a code object to a source code snippet and back: c == recompile(*uncompile(c))
This is useful, for example, if you want to apply an AST transformation to the code.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 | """
Oren Tirosh <orent@hishome.net>
Convert code objects (functions bodies only) to source code and back.
This doesn't actually decompile the bytecode - it simply fetches the
source code from the .py file and then carefully compiles it back to
a 100% identical code object:
c == recompile(*uncompile(c))
Not supported:
Lambdas
Nested functions (you can still process the function containing them)
Anything for which inspect.getsource can't get the source de
"""
import ast, inspect, re
from types import CodeType as code, FunctionType as function
import __future__
PyCF_MASK = sum(v for k, v in vars(__future__).items() if k.startswith('CO_FUTURE'))
class Error(Exception):
pass
class Unsupported(Error):
pass
class NoSource(Error):
pass
def uncompile(c):
""" uncompile(codeobj) -> [source, filename, mode, flags, firstlineno, privateprefix] """
if c.co_flags & inspect.CO_NESTED or c.co_freevars:
raise Unsupported('nested functions not supported')
if c.co_name == '<lambda>':
raise Unsupported('lambda functions not supported')
if c.co_filename == '<string>':
raise Unsupported('code without source file not supported')
filename = inspect.getfile(c)
try:
lines, firstlineno = inspect.getsourcelines(c)
except IOError:
raise NoSource('source code not available')
source = ''.join(lines)
# __X is mangled to _ClassName__X in methods. Find this prefix:
privateprefix = None
for name in c.co_names:
m = re.match('^(_[A-Za-z][A-Za-z0-9_]*)__.*$', name)
if m:
privateprefix = m.group(1)
break
return [source, filename, 'exec', c.co_flags & PyCF_MASK, firstlineno, privateprefix]
def recompile(source, filename, mode, flags=0, firstlineno=1, privateprefix=None):
""" recompile output of uncompile back to a code object. source may also be preparsed AST """
if isinstance(source, ast.AST):
a = source
else:
a = parse_snippet(source, filename, mode, flags, firstlineno)
node = a.body[0]
if not isinstance(node, ast.FunctionDef):
raise Error('Expecting function AST node')
c0 = compile(a, filename, mode, flags, True)
# This code object defines the function. Find the function's actual body code:
for c in c0.co_consts:
if not isinstance(c, code):
continue
if c.co_name == node.name and c.co_firstlineno == node.lineno:
break
else:
raise Error('Function body code not found')
# Re-mangle private names:
if privateprefix is not None:
def fixnames(names):
isprivate = re.compile('^__.*(?<!__)$').match
return tuple(privateprefix + name if isprivate(name) else name for name in names)
c = code(c.co_argcount, c.co_nlocals, c.co_stacksize, c.co_flags, c.co_code, c.co_consts,
fixnames(c.co_names), fixnames(c.co_varnames), c.co_filename, c.co_name,
c.co_firstlineno, c.co_lnotab, c.co_freevars, c.co_cellvars)
return c
def parse_snippet(source, filename, mode, flags, firstlineno, privateprefix_ignored=None):
""" Like ast.parse, but accepts indented code snippet with a line number offset. """
args = filename, mode, flags | ast.PyCF_ONLY_AST, True
prefix = '\n'
try:
a = compile(prefix + source, *args)
except IndentationError:
# Already indented? Wrap with dummy compound statement
prefix = 'with 0:\n'
a = compile(prefix + source, *args)
# peel wrapper
a.body = a.body[0].body
ast.increment_lineno(a, firstlineno - 2)
return a
def test_roundtrip():
import os
print 'Importing everything in the medicine cabinet:'
for filename in os.listdir(os.path.dirname(os.__file__)):
name, ext = os.path.splitext(filename)
if ext != '.py' or name == 'antigravity':
continue
try:
__import__(name)
except ImportError:
pass # some stuff in system library can't be imported
print 'Done importing. We apologize for the noise above.\n'
print 'Round-tripping functions to source code and back:'
success = 0
failed = 0
unsupported = 0
errors = 0
import gc
allfuncs = [obj for obj in gc.get_objects() if type(obj) is function]
for func in allfuncs:
c = func.func_code
if type(c) is not code:
continue # PyPy builtin-code
try:
rc = recompile(*uncompile(c))
if c == rc:
success += 1
else:
failed += 1
except Unsupported:
unsupported += 1
except NoSource:
errors += 1
print '\r%d successful roundtrip, %d failed roundtrip, %d unsupported, %d nosource ' % (success, failed, unsupported, errors),
if __name__ == '__main__':
test_roundtrip()
|
I've been reading Thomas Kluyver's excellent Green Tree Snakes documentation of AST and saw the following comment:
- Python itself doesn’t provide a way to turn a compiled code object into an AST, or an AST into a string of code. Third party tools, like Meta, allow you to do this - but they might not be as well supported.
It inspired me to write this little recipe to convert a code object to a Python source code snippet and compile it back to an identical code object. Of course, this round-trip is useless by itself. It is only used for verification. The intended use is to get the source, ast.parse it, apply some transformation and then compile it back to a code object.
Here is an example of how to use it for writing a decorator that applies an AST transformation to a function:
def transform(visitor):
""" Decorator to apply a NodeTransformer to a single function """
def wrapper(func):
# uncompile function
unc = uncompile(func.func_code)
# convert to ast and apply visitor
tree = parse_snippet(*unc)
visitor.visit(tree)
ast.fix_missing_locations(tree)
unc[0] = tree
# recompile and patch function's code
func.func_code = recompile(*unc)
return func
return wrapper
# From Thomas Kluyver's "Green Tree Snakes":
class AssertCmpTransformer(ast.NodeTransformer):
def visit_Assert(self, node):
if isinstance(node.test, ast.Compare) and \
len(node.test.ops) == 1 and \
isinstance(node.test.ops[0], ast.Eq):
call = ast.Call(func=ast.Name(id='assert_equal', ctx=ast.Load()),
args=[node.test.left, node.test.comparators[0]],
keywords=[])
# Wrap the call in an Expr node, because the return value isn't used.
newnode = ast.Expr(value=call)
ast.copy_location(newnode, node)
ast.fix_missing_locations(newnode)
return newnode
# Remember to return the original node if we don't want to change it.
return node
def assert_equal(a, b):
if a != b:
raise AssertionError('%r is not equal to %r' % (a, b))
@transform(AssertCmpTransformer())
def falsehood():
assert 1 == 2, "The universe is wrong"
The approach used by this code is to recompile replace the code object representing the function's body. Any AST transformation will not affect the function's default argument values, for example. An alternative approach is to recompile and execute the code generating the function rather than the function's body code. However, using exec may have side-effects.
P.S. This recipe also works in PyPy
Another possible use: fast breakpoints in a Python debugger. No need to enable the trace hook - just patch the AST and replace the code object with one that calls the debugger.
could you maybe point out how to do it through the second approach using pdb?
When setting up a breakpoint in a function, store the original code object (e.g. as an attribute of the function), fetch the code source, parse it, find the ast node corresponding to the line number and insert a node with a call to the breakpoint function, compile and replace the function's code object. The details are a tricky. For example, any frames already on the stack or in a running generator will still run the original unpatched code object.
Modifying pdb to use this may require significant effort.