This is a revamp of a recipe that Ned Batchelder posted on his blog a few years ago (thanks Ned!). That page is pretty insightful, recipe aside.
This recipe works for all versions of Python back to 2.4 (at least). Warning: using one version of Python to inspect a pyc file from another Python version may not work too well.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 | #! /usr/bin/env python
"""inspect_pyc module
This is a refactor of a recipe from Ned Batchelder's blog. He has
given me permission to publish this. You can find the post at the
following URL:
http://nedbatchelder.com/blog/200804/the_structure_of_pyc_files.html
You may use this module as a script: "./inspect_pyc.py <PYC_FILE>".
"""
import dis, marshal, struct, sys, time, types, warnings
try:
from StringIO import StringIO
except ImportError:
from io import StringIO
INDENT = " " * 3
MAX_HEX_LEN = 16
NAME_OFFSET = 20
def to_hexstr(bytes_value, level=0, wrap=False):
indent = INDENT*level
line = " ".join(("%02x",) * MAX_HEX_LEN)
last = " ".join(("%02x",) * (len(bytes_value) % MAX_HEX_LEN))
lines = (line,) * (len(bytes_value) // MAX_HEX_LEN)
if last:
lines += (last,)
if wrap:
template = indent + ("\n"+indent).join(lines)
else:
template = " ".join(lines)
try:
return template % tuple(bytes_value)
except TypeError:
return template % tuple(ord(char) for char in bytes_value)
def unpack_pyc(filename):
f = open(filename, "rb")
magic = f.read(4)
unixtime = struct.unpack("L", f.read(4))[0]
timestamp = time.asctime(time.localtime(unixtime))
code = marshal.load(f)
f.close()
return filename, magic, unixtime, timestamp, code
def show_consts(consts, level=0):
indent = INDENT*level
i = 0
for obj in consts:
if isinstance(obj, types.CodeType):
print(indent+"%s (code object)" % i)
show_code(obj, level=level+1)
else:
print(indent+"%s %r" % (i, obj))
i += 1
def show_bytecode(code, level=0):
indent = INDENT*level
print(to_hexstr(code.co_code, level, wrap=True))
print(indent+"disassembled:")
buffer = StringIO()
sys.stdout = buffer
dis.disassemble(code)
sys.stdout = sys.__stdout__
print(indent + buffer.getvalue().replace("\n", "\n"+indent))
def show_code(code, level=0):
indent = INDENT*level
for name in dir(code):
if not name.startswith("co_"):
continue
if name in ("co_code", "co_consts"):
continue
value = getattr(code, name)
if isinstance(value, str):
value = repr(value)
elif name == "co_flags":
value = "0x%05x" % value
elif name == "co_lnotab":
value = "0x(%s)" % to_hexstr(value)
print("%s%s%s" % (indent, (name+":").ljust(NAME_OFFSET), value))
print("%sco_consts" % indent)
show_consts(code.co_consts, level=level+1)
print("%sco_code" % indent)
show_bytecode(code, level=level+1)
def show_file(filename):
filename, magic, unixtime, timestamp, code = unpack_pyc(filename)
magic = "0x(%s)" % to_hexstr(magic)
print(" ## inspecting pyc file ##")
print("filename: %s" % filename)
print("magic number: %s" % magic)
print("timestamp: %s (%s)" % (unixtime, timestamp))
print("code")
show_code(code, level=1)
print(" ## done inspecting pyc file ##")
if __name__ == "__main__":
USAGE = " usage: %s <PYC FILENAME>" % sys.argv[0]
if len(sys.argv) == 1:
sys.exit("Error: Too few arguments\n%s" % USAGE)
if len(sys.argv) > 2:
warnings.warn("Ignoring extra arguments: %s" % (sys.argv[2:],))
if sys.argv[1] == "-h":
print(USAGE)
else:
show_file(sys.argv[1])
|
The file format has changed slightly as of Python 3.3+, so the recipe above no longer works. In addition to the two original four-byte fields there is a new four-byte field that encodes the size of the source file as a long. Consequently the marshaled code object now begins at position 12. A version-independent PYC reader will need to take into account the magic number and read or skip the file size field accordingly.
Thank you for providing this wonderful script. However, the script puked when I used it with my Solaris machine, which is a big-endian machine. The time format was stored in little-endian format in a .pyc file.
The following line should be changed to make it big-endian machine friendly. ORIGINAL : unixtime = struct.unpack("L", f.read(4))[0] UPDATED : unixtime = struct.unpack("<l", f.read(4))[0]<="" p="">
Because of '<', my post was not displayed correctly. The following is the correct one.
ORIGINAL
unixtime = struct.unpack("L", f.read(4))[0]
UPDATED
unixtime = struct.unpack("<L", f.read(4))[0]