Welcome, guest | Sign In | My Account | Store | Cart

This is a revamp of a recipe that Ned Batchelder posted on his blog a few years ago (thanks Ned!). That page is pretty insightful, recipe aside.

This recipe works for all versions of Python back to 2.4 (at least). Warning: using one version of Python to inspect a pyc file from another Python version may not work too well.

Python, 117 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#! /usr/bin/env python
"""inspect_pyc module

This is a refactor of a recipe from Ned Batchelder's blog.  He has
given me permission to publish this.  You can find the post at the
following URL:

  http://nedbatchelder.com/blog/200804/the_structure_of_pyc_files.html

You may use this module as a script: "./inspect_pyc.py <PYC_FILE>".

"""

import dis, marshal, struct, sys, time, types, warnings
try:
    from StringIO import StringIO
except ImportError:
    from io import StringIO


INDENT = " " * 3
MAX_HEX_LEN = 16
NAME_OFFSET = 20


def to_hexstr(bytes_value, level=0, wrap=False):
    indent = INDENT*level
    line = " ".join(("%02x",) * MAX_HEX_LEN)
    last = " ".join(("%02x",) * (len(bytes_value) % MAX_HEX_LEN))
    lines = (line,) * (len(bytes_value) // MAX_HEX_LEN)
    if last:
        lines += (last,)
    if wrap:
        template = indent + ("\n"+indent).join(lines)
    else:
        template = " ".join(lines)
    try:
        return template % tuple(bytes_value)
    except TypeError:
        return template % tuple(ord(char) for char in bytes_value)

def unpack_pyc(filename):
    f = open(filename, "rb")
    magic = f.read(4)
    unixtime = struct.unpack("L", f.read(4))[0]
    timestamp = time.asctime(time.localtime(unixtime))
    code = marshal.load(f)
    f.close()
    return filename, magic, unixtime, timestamp, code

def show_consts(consts, level=0):
    indent = INDENT*level
    i = 0
    for obj in consts:
        if isinstance(obj, types.CodeType):
            print(indent+"%s (code object)" % i)
            show_code(obj, level=level+1)
        else:
            print(indent+"%s %r" % (i, obj))
        i += 1

def show_bytecode(code, level=0):
    indent = INDENT*level
    print(to_hexstr(code.co_code, level, wrap=True))
    print(indent+"disassembled:")
    buffer = StringIO()
    sys.stdout = buffer
    dis.disassemble(code)
    sys.stdout = sys.__stdout__
    print(indent + buffer.getvalue().replace("\n", "\n"+indent))

def show_code(code, level=0):
    indent = INDENT*level

    for name in dir(code):
        if not name.startswith("co_"):
            continue
        if name in ("co_code", "co_consts"):
            continue
        value = getattr(code, name)
        if isinstance(value, str):
            value = repr(value)
        elif name == "co_flags":
            value = "0x%05x" % value
        elif name == "co_lnotab":
            value = "0x(%s)" % to_hexstr(value)
        print("%s%s%s" % (indent, (name+":").ljust(NAME_OFFSET), value))
    print("%sco_consts" % indent)
    show_consts(code.co_consts, level=level+1)
    print("%sco_code" % indent)
    show_bytecode(code, level=level+1)

def show_file(filename):
    filename, magic, unixtime, timestamp, code = unpack_pyc(filename)
    magic = "0x(%s)" % to_hexstr(magic)

    print("  ## inspecting pyc file ##")
    print("filename:     %s" % filename)
    print("magic number: %s" % magic)
    print("timestamp:    %s (%s)" % (unixtime, timestamp))
    print("code")
    show_code(code, level=1)
    print("  ## done inspecting pyc file ##")


if __name__ == "__main__":
    USAGE = "  usage: %s <PYC FILENAME>" % sys.argv[0]

    if len(sys.argv) == 1:
        sys.exit("Error: Too few arguments\n%s" % USAGE)
    if len(sys.argv) > 2:
        warnings.warn("Ignoring extra arguments: %s" % (sys.argv[2:],))

    if sys.argv[1] == "-h":
        print(USAGE)
    else:
        show_file(sys.argv[1])

3 comments

Ian Kelly 10 years, 4 months ago  # | flag

The file format has changed slightly as of Python 3.3+, so the recipe above no longer works. In addition to the two original four-byte fields there is a new four-byte field that encodes the size of the source file as a long. Consequently the marshaled code object now begins at position 12. A version-independent PYC reader will need to take into account the magic number and read or skip the file size field accordingly.

Aki Niimura 8 years, 11 months ago  # | flag

Thank you for providing this wonderful script. However, the script puked when I used it with my Solaris machine, which is a big-endian machine. The time format was stored in little-endian format in a .pyc file.

The following line should be changed to make it big-endian machine friendly. ORIGINAL : unixtime = struct.unpack("L", f.read(4))[0] UPDATED : unixtime = struct.unpack("<l", f.read(4))[0]<="" p="">

Aki Niimura 8 years, 11 months ago  # | flag

Because of '<', my post was not displayed correctly. The following is the correct one.

ORIGINAL unixtime = struct.unpack("L", f.read(4))[0]

UPDATED unixtime = struct.unpack("<L", f.read(4))[0]

Created by Eric Snow on Tue, 27 Sep 2011 (MIT)
Python recipes (4591)
Eric Snow's recipes (39)

Required Modules

  • (none specified)

Other Information and Tasks