Welcome, guest | Sign In | My Account | Store | Cart
"""
Oren Tirosh <orent@hishome.net>

Convert code objects (functions bodies only) to source code and back.
This doesn't actually decompile the bytecode - it simply fetches the
source code from the .py file and then carefully compiles it back to 
a 100% identical code object:

    c == recompile(*uncompile(c))

Not supported:
    Lambdas
    Nested functions  (you can still process the function containing them)
    Anything for which inspect.getsource can't get the source de
"""

import ast, inspect, re
from types import CodeType as code, FunctionType as function

import __future__
PyCF_MASK = sum(v for k, v in vars(__future__).items() if k.startswith('CO_FUTURE'))

class Error(Exception):
    pass

class Unsupported(Error):
    pass

class NoSource(Error):
    pass

def uncompile(c):
    """ uncompile(codeobj) -> [source, filename, mode, flags, firstlineno, privateprefix] """
    if c.co_flags & inspect.CO_NESTED or c.co_freevars:
        raise Unsupported('nested functions not supported')
    if c.co_name == '<lambda>':
        raise Unsupported('lambda functions not supported')
    if c.co_filename == '<string>':
        raise Unsupported('code without source file not supported')

    filename = inspect.getfile(c)
    try:
        lines, firstlineno = inspect.getsourcelines(c)
    except IOError:
        raise NoSource('source code not available')
    source = ''.join(lines)

    # __X is mangled to _ClassName__X in methods. Find this prefix:
    privateprefix = None
    for name in c.co_names:
        m = re.match('^(_[A-Za-z][A-Za-z0-9_]*)__.*$', name)
        if m:
            privateprefix = m.group(1)
            break

    return [source, filename, 'exec', c.co_flags & PyCF_MASK, firstlineno, privateprefix]

def recompile(source, filename, mode, flags=0, firstlineno=1, privateprefix=None):
    """ recompile output of uncompile back to a code object. source may also be preparsed AST """
    if isinstance(source, ast.AST):
        a = source
    else:
        a = parse_snippet(source, filename, mode, flags, firstlineno)
    node = a.body[0]
    if not isinstance(node, ast.FunctionDef):
        raise Error('Expecting function AST node')

    c0 = compile(a, filename, mode, flags, True)

    # This code object defines the function. Find the function's actual body code:
    for c in c0.co_consts:
        if not isinstance(c, code):
            continue
        if c.co_name == node.name and c.co_firstlineno == node.lineno:
            break
    else:
        raise Error('Function body code not found')

    # Re-mangle private names:
    if privateprefix is not None:

        def fixnames(names):
            isprivate = re.compile('^__.*(?<!__)$').match
            return tuple(privateprefix + name if isprivate(name) else name for name in names)

        c = code(c.co_argcount, c.co_nlocals, c.co_stacksize, c.co_flags, c.co_code, c.co_consts,
                fixnames(c.co_names), fixnames(c.co_varnames), c.co_filename, c.co_name,
                c.co_firstlineno, c.co_lnotab, c.co_freevars, c.co_cellvars)
    return c

def parse_snippet(source, filename, mode, flags, firstlineno, privateprefix_ignored=None):
    """ Like ast.parse, but accepts indented code snippet with a line number offset. """
    args = filename, mode, flags | ast.PyCF_ONLY_AST, True
    prefix = '\n'
    try:
        a = compile(prefix + source, *args)
    except IndentationError:
        # Already indented? Wrap with dummy compound statement
        prefix = 'with 0:\n'
        a = compile(prefix + source, *args)
        # peel wrapper
        a.body = a.body[0].body
    ast.increment_lineno(a, firstlineno - 2)
    return a

def test_roundtrip():
    import os

    print 'Importing everything in the medicine cabinet:'
    for filename in os.listdir(os.path.dirname(os.__file__)):
        name, ext = os.path.splitext(filename)
        if ext != '.py' or name == 'antigravity':
            continue
        try:
            __import__(name)
        except ImportError:
            pass    # some stuff in system library can't be imported
    print 'Done importing. We apologize for the noise above.\n'

    print 'Round-tripping functions to source code and back:'
    success = 0
    failed = 0
    unsupported = 0
    errors = 0

    import gc
    allfuncs = [obj for obj in gc.get_objects() if type(obj) is function]

    for func in allfuncs:
        c = func.func_code
        if type(c) is not code:
            continue    # PyPy builtin-code

        try:
            rc = recompile(*uncompile(c))
            if c == rc:
                success += 1
            else:
                failed += 1
        except Unsupported:
            unsupported += 1
        except NoSource:
            errors += 1

        print '\r%d successful roundtrip, %d failed roundtrip, %d unsupported, %d nosource ' % (success, failed, unsupported, errors),

if __name__ == '__main__':
    test_roundtrip()

History