Welcome, guest | Sign In | My Account | Store | Cart

Converts a code object to a source code snippet and back: c == recompile(*uncompile(c))

This is useful, for example, if you want to apply an AST transformation to the code.

Python, 148 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
"""
Oren Tirosh <orent@hishome.net>

Convert code objects (functions bodies only) to source code and back.
This doesn't actually decompile the bytecode - it simply fetches the
source code from the .py file and then carefully compiles it back to 
a 100% identical code object:

    c == recompile(*uncompile(c))

Not supported:
    Lambdas
    Nested functions  (you can still process the function containing them)
    Anything for which inspect.getsource can't get the source de
"""

import ast, inspect, re
from types import CodeType as code, FunctionType as function

import __future__
PyCF_MASK = sum(v for k, v in vars(__future__).items() if k.startswith('CO_FUTURE'))

class Error(Exception):
    pass

class Unsupported(Error):
    pass

class NoSource(Error):
    pass

def uncompile(c):
    """ uncompile(codeobj) -> [source, filename, mode, flags, firstlineno, privateprefix] """
    if c.co_flags & inspect.CO_NESTED or c.co_freevars:
        raise Unsupported('nested functions not supported')
    if c.co_name == '<lambda>':
        raise Unsupported('lambda functions not supported')
    if c.co_filename == '<string>':
        raise Unsupported('code without source file not supported')

    filename = inspect.getfile(c)
    try:
        lines, firstlineno = inspect.getsourcelines(c)
    except IOError:
        raise NoSource('source code not available')
    source = ''.join(lines)

    # __X is mangled to _ClassName__X in methods. Find this prefix:
    privateprefix = None
    for name in c.co_names:
        m = re.match('^(_[A-Za-z][A-Za-z0-9_]*)__.*$', name)
        if m:
            privateprefix = m.group(1)
            break

    return [source, filename, 'exec', c.co_flags & PyCF_MASK, firstlineno, privateprefix]

def recompile(source, filename, mode, flags=0, firstlineno=1, privateprefix=None):
    """ recompile output of uncompile back to a code object. source may also be preparsed AST """
    if isinstance(source, ast.AST):
        a = source
    else:
        a = parse_snippet(source, filename, mode, flags, firstlineno)
    node = a.body[0]
    if not isinstance(node, ast.FunctionDef):
        raise Error('Expecting function AST node')

    c0 = compile(a, filename, mode, flags, True)

    # This code object defines the function. Find the function's actual body code:
    for c in c0.co_consts:
        if not isinstance(c, code):
            continue
        if c.co_name == node.name and c.co_firstlineno == node.lineno:
            break
    else:
        raise Error('Function body code not found')

    # Re-mangle private names:
    if privateprefix is not None:

        def fixnames(names):
            isprivate = re.compile('^__.*(?<!__)$').match
            return tuple(privateprefix + name if isprivate(name) else name for name in names)

        c = code(c.co_argcount, c.co_nlocals, c.co_stacksize, c.co_flags, c.co_code, c.co_consts,
                fixnames(c.co_names), fixnames(c.co_varnames), c.co_filename, c.co_name,
                c.co_firstlineno, c.co_lnotab, c.co_freevars, c.co_cellvars)
    return c

def parse_snippet(source, filename, mode, flags, firstlineno, privateprefix_ignored=None):
    """ Like ast.parse, but accepts indented code snippet with a line number offset. """
    args = filename, mode, flags | ast.PyCF_ONLY_AST, True
    prefix = '\n'
    try:
        a = compile(prefix + source, *args)
    except IndentationError:
        # Already indented? Wrap with dummy compound statement
        prefix = 'with 0:\n'
        a = compile(prefix + source, *args)
        # peel wrapper
        a.body = a.body[0].body
    ast.increment_lineno(a, firstlineno - 2)
    return a

def test_roundtrip():
    import os

    print 'Importing everything in the medicine cabinet:'
    for filename in os.listdir(os.path.dirname(os.__file__)):
        name, ext = os.path.splitext(filename)
        if ext != '.py' or name == 'antigravity':
            continue
        try:
            __import__(name)
        except ImportError:
            pass    # some stuff in system library can't be imported
    print 'Done importing. We apologize for the noise above.\n'

    print 'Round-tripping functions to source code and back:'
    success = 0
    failed = 0
    unsupported = 0
    errors = 0

    import gc
    allfuncs = [obj for obj in gc.get_objects() if type(obj) is function]

    for func in allfuncs:
        c = func.func_code
        if type(c) is not code:
            continue    # PyPy builtin-code

        try:
            rc = recompile(*uncompile(c))
            if c == rc:
                success += 1
            else:
                failed += 1
        except Unsupported:
            unsupported += 1
        except NoSource:
            errors += 1

        print '\r%d successful roundtrip, %d failed roundtrip, %d unsupported, %d nosource ' % (success, failed, unsupported, errors),

if __name__ == '__main__':
    test_roundtrip()

I've been reading Thomas Kluyver's excellent Green Tree Snakes documentation of AST and saw the following comment:

  • Python itself doesn’t provide a way to turn a compiled code object into an AST, or an AST into a string of code. Third party tools, like Meta, allow you to do this - but they might not be as well supported.

It inspired me to write this little recipe to convert a code object to a Python source code snippet and compile it back to an identical code object. Of course, this round-trip is useless by itself. It is only used for verification. The intended use is to get the source, ast.parse it, apply some transformation and then compile it back to a code object.

Here is an example of how to use it for writing a decorator that applies an AST transformation to a function:

def transform(visitor):
    """ Decorator to apply a NodeTransformer to a single function """
    def wrapper(func):
        # uncompile function
        unc = uncompile(func.func_code)

        # convert to ast and apply visitor
        tree = parse_snippet(*unc)
        visitor.visit(tree)
        ast.fix_missing_locations(tree)
        unc[0] = tree

        # recompile and patch function's code
        func.func_code = recompile(*unc)
        return func

    return wrapper

# From Thomas Kluyver's "Green Tree Snakes":
class AssertCmpTransformer(ast.NodeTransformer):
    def visit_Assert(self, node):
        if isinstance(node.test, ast.Compare) and \
                len(node.test.ops) == 1 and \
                isinstance(node.test.ops[0], ast.Eq):
            call = ast.Call(func=ast.Name(id='assert_equal', ctx=ast.Load()),
                            args=[node.test.left, node.test.comparators[0]],
                            keywords=[])
            # Wrap the call in an Expr node, because the return value isn't used.
            newnode = ast.Expr(value=call)
            ast.copy_location(newnode, node)
            ast.fix_missing_locations(newnode)
            return newnode

        # Remember to return the original node if we don't want to change it.
        return node

def assert_equal(a, b):
    if a != b:
        raise AssertionError('%r is not equal to %r' % (a, b))

@transform(AssertCmpTransformer())
def falsehood():
    assert 1 == 2, "The universe is wrong"

The approach used by this code is to recompile replace the code object representing the function's body. Any AST transformation will not affect the function's default argument values, for example. An alternative approach is to recompile and execute the code generating the function rather than the function's body code. However, using exec may have side-effects.

P.S. This recipe also works in PyPy

3 comments

Oren Tirosh (author) 11 years, 2 months ago  # | flag

Another possible use: fast breakpoints in a Python debugger. No need to enable the trace hook - just patch the AST and replace the code object with one that calls the debugger.

Martin Ortbauer 11 years, 1 month ago  # | flag

could you maybe point out how to do it through the second approach using pdb?

Oren Tirosh (author) 11 years, 1 month ago  # | flag

When setting up a breakpoint in a function, store the original code object (e.g. as an attribute of the function), fetch the code source, parse it, find the ast node corresponding to the line number and insert a node with a call to the breakpoint function, compile and replace the function's code object. The details are a tricky. For example, any frames already on the stack or in a running generator will still run the original unpatched code object.

Modifying pdb to use this may require significant effort.