"""
Oren Tirosh <orent@hishome.net>
Convert code objects (functions bodies only) to source code and back.
This doesn't actually decompile the bytecode - it simply fetches the
source code from the .py file and then carefully compiles it back to
a 100% identical code object:
c == recompile(*uncompile(c))
Not supported:
Lambdas
Nested functions (you can still process the function containing them)
Anything for which inspect.getsource can't get the source de
"""
import ast, inspect, re
from types import CodeType as code, FunctionType as function
import __future__
PyCF_MASK = sum(v for k, v in vars(__future__).items() if k.startswith('CO_FUTURE'))
class Error(Exception):
pass
class Unsupported(Error):
pass
class NoSource(Error):
pass
def uncompile(c):
""" uncompile(codeobj) -> [source, filename, mode, flags, firstlineno, privateprefix] """
if c.co_flags & inspect.CO_NESTED or c.co_freevars:
raise Unsupported('nested functions not supported')
if c.co_name == '<lambda>':
raise Unsupported('lambda functions not supported')
if c.co_filename == '<string>':
raise Unsupported('code without source file not supported')
filename = inspect.getfile(c)
try:
lines, firstlineno = inspect.getsourcelines(c)
except IOError:
raise NoSource('source code not available')
source = ''.join(lines)
# __X is mangled to _ClassName__X in methods. Find this prefix:
privateprefix = None
for name in c.co_names:
m = re.match('^(_[A-Za-z][A-Za-z0-9_]*)__.*$', name)
if m:
privateprefix = m.group(1)
break
return [source, filename, 'exec', c.co_flags & PyCF_MASK, firstlineno, privateprefix]
def recompile(source, filename, mode, flags=0, firstlineno=1, privateprefix=None):
""" recompile output of uncompile back to a code object. source may also be preparsed AST """
if isinstance(source, ast.AST):
a = source
else:
a = parse_snippet(source, filename, mode, flags, firstlineno)
node = a.body[0]
if not isinstance(node, ast.FunctionDef):
raise Error('Expecting function AST node')
c0 = compile(a, filename, mode, flags, True)
# This code object defines the function. Find the function's actual body code:
for c in c0.co_consts:
if not isinstance(c, code):
continue
if c.co_name == node.name and c.co_firstlineno == node.lineno:
break
else:
raise Error('Function body code not found')
# Re-mangle private names:
if privateprefix is not None:
def fixnames(names):
isprivate = re.compile('^__.*(?<!__)$').match
return tuple(privateprefix + name if isprivate(name) else name for name in names)
c = code(c.co_argcount, c.co_nlocals, c.co_stacksize, c.co_flags, c.co_code, c.co_consts,
fixnames(c.co_names), fixnames(c.co_varnames), c.co_filename, c.co_name,
c.co_firstlineno, c.co_lnotab, c.co_freevars, c.co_cellvars)
return c
def parse_snippet(source, filename, mode, flags, firstlineno, privateprefix_ignored=None):
""" Like ast.parse, but accepts indented code snippet with a line number offset. """
args = filename, mode, flags | ast.PyCF_ONLY_AST, True
prefix = '\n'
try:
a = compile(prefix + source, *args)
except IndentationError:
# Already indented? Wrap with dummy compound statement
prefix = 'with 0:\n'
a = compile(prefix + source, *args)
# peel wrapper
a.body = a.body[0].body
ast.increment_lineno(a, firstlineno - 2)
return a
def test_roundtrip():
import os
print 'Importing everything in the medicine cabinet:'
for filename in os.listdir(os.path.dirname(os.__file__)):
name, ext = os.path.splitext(filename)
if ext != '.py' or name == 'antigravity':
continue
try:
__import__(name)
except ImportError:
pass # some stuff in system library can't be imported
print 'Done importing. We apologize for the noise above.\n'
print 'Round-tripping functions to source code and back:'
success = 0
failed = 0
unsupported = 0
errors = 0
import gc
allfuncs = [obj for obj in gc.get_objects() if type(obj) is function]
for func in allfuncs:
c = func.func_code
if type(c) is not code:
continue # PyPy builtin-code
try:
rc = recompile(*uncompile(c))
if c == rc:
success += 1
else:
failed += 1
except Unsupported:
unsupported += 1
except NoSource:
errors += 1
print '\r%d successful roundtrip, %d failed roundtrip, %d unsupported, %d nosource ' % (success, failed, unsupported, errors),
if __name__ == '__main__':
test_roundtrip()