Welcome, guest | Sign In | My Account | Store | Cart

The following recipe allows you to delay import module statements until the module is actually needed. This can lead to a much faster startup time for large programs with lots of imports. Installing is easy, just call importer.install().

Python, 156 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# Copyright rPath, Inc., 2006
# Available under the python license
""" Defines an on-demand importer that only actually loads modules when their
    attributes are accessed.  NOTE: if the ondemand module is viewed using
    introspection, like dir(), isinstance, etc, it will appear as a
    ModuleProxy, not a module, and will not have the correct attributes.
    Barring introspection, however, the module will behave as normal.
"""
import sys
import imp
import os
import types

def makeImportedModule(name, pathname, desc, scope):
    """ Returns a ModuleProxy that has access to a closure w/
        information about the module to load, but is otherwise
        empty.  On an attempted access of any member of the module,
        the module is loaded.
    """

    def _loadModule():
        """ Load the given module, and insert it into the parent
            scope, and also the original importing scope.
        """

        mod = sys.modules.get(name, None)
        if mod is None or not isinstance(mod, types.ModuleType):
            try:
                file = open(pathname, 'U')
            except:
                file = None

            try:
                mod = imp.load_module(name, file, pathname, desc)
            finally:
                if file is not None:
                    file.close()

            sys.modules[name] = mod

        scope[name] = mod

        frame = sys._getframe(2)
        global_scope = frame.f_globals
        local_scope = frame.f_locals

        # check to see if this module exists for any part of the name
        # we are importing, e.g. if you are importing foo.bar.baz,
        # look for foo.bar.baz, bar.baz, and baz.
        moduleParts = name.split('.')
        names = [ '.'.join(moduleParts[-x:]) for x in range(len(moduleParts)) ]
        for modulePart in names:
            if modulePart in local_scope:
                if local_scope[modulePart].__class__.__name__ == 'ModuleProxy':
                    # FIXME: this makes me cringe, but I haven't figured out a
                    # better way to ensure that the module proxy we're
                    # looking at is actually a proxy for this module
                    if pathname in repr(local_scope[modulePart]):
                        local_scope[modulePart] = mod
            if modulePart in global_scope:
                if global_scope[modulePart].__class__.__name__ == 'ModuleProxy':
                    if pathname in repr(global_scope[modulePart]):
                        global_scope[modulePart] = mod

        return mod

    class ModuleProxy(object):
        __slots__ = []
        # we don't add any docs for the module in case the
        # user tries accessing '__doc__'
        def __hasattr__(self, key):
            mod = _loadModule()
            return hasattr(mod, key)

        def __getattr__(self, key):
            mod = _loadModule()
            return getattr(mod, key)

        def __setattr__(self, key, value):
            mod = _loadModule()
            return setattr(mod, key, value)

        def __repr__(self):
            return "<moduleProxy '%s' from '%s'>" % (name, pathname)

    return ModuleProxy()

class OnDemandLoader(object):
    """ The loader takes a name and info about the module to load and
        "loads" it - in this case returning loading a proxy that
        will only load the class when an attribute is accessed.
    """
    def __init__(self, name, file, pathname, desc, scope):
        self.file = file
        self.name = name
        self.pathname = pathname
        self.desc = desc
        self.scope = scope

    def load_module(self, fullname):
        if fullname in __builtins__:
            try:
                mod = imp.load_module(self.name, self.file,
                                      self.pathname, self.desc)
            finally:
                if self.file:
                    self.file.close()
            sys.modules[fullname] = mod
        else:
            if self.file:
                self.file.close()
            mod = makeImportedModule(self.name, self.pathname, self.desc,
                                     self.scope)
            sys.modules[fullname] = mod
        return mod

class OnDemandImporter(object):
    """ The on-demand importer imports a module proxy that
        inserts the desired module into the calling scope only when
        an attribute from the module is actually used.
    """

    def find_module(self, fullname, path=None):
        origName = fullname
        if not path:
            mod = sys.modules.get(fullname, False)
            if mod is None or mod and isinstance(mod, types.ModuleType):
                return mod

        frame = sys._getframe(1)
        global_scope = frame.f_globals
        # this is the scope in which import <fullname> was called

        if '.' in fullname:
            head, fullname = fullname.rsplit('.', 1)

            # this import protocol works such that if I am going to be
            # able to import fullname, then everything in front of the
            # last . in fullname must already be loaded into sys.modules.
            mod = sys.modules.get(head,None)
            if mod is None:
                return None

            if hasattr(mod, '__path__'):
                path = mod.__path__

        try:
            file, pathname, desc = imp.find_module(fullname, path)
            return OnDemandLoader(origName, file, pathname, desc, global_scope)
        except ImportError:
            # don't return an import error.  That will stop
            # the automated search mechanism from working.
            return None

def install():
    sys.meta_path.append(OnDemandImporter())

Developed for conary, an open source package manager used with rBuilder - www.rpath.com. Conary was taking a while to import all of its modules before even beginning to process command line options, so I looked for a way to delay unneccessary module imports.

This is possible using sys.meta_path as described in http://www.python.org/peps/pep-0302.html.

Note that if you're performing introspection on your module objects, such as calling isinstance, (as unittest does, e.g.) the lazy importer won't work. Any isinstance tests will fail.

The importer uses closures to try to avoid adding attributes to the module proxy. Because it can't have any attributes, I have to do a sick hack to try to determine which path the module proxy is for. Better solutions for that particular problem are more than welcome.

5 comments

bearophile - 18 years, 2 months ago  # | flag

MatPlotLib. I have tried it with MatPlotLib, but it gives some problems:

>>> import importer



>>> importer.install()



>>> from pylab import *
David Christian (author) 18 years, 2 months ago  # | flag

MatPlotLib. it seems the lazy importer doesn't handle C modules that import other modules very well. I have posted a question on python-list about this to try to figure out how to detect this case so I can handle it.

http://mail.python.org/pipermail/python-list/2006-February/326087.html

Connelly Barnes 17 years, 10 months ago  # | flag

See also. See also Phillip Eby's Importing package [1] and my autoimp module [2]. The former serves a need similar to your recipe; the latter is intended to make the interactive Python prompt easier to use by pre-importing all modules (and doing this lazily, so modules are only loaded when they are used).

[1]. http://peak.telecommunity.com/DevCenter/Importing">Importing

[2]. http://barnesc.blogspot.com/2006/06/automatic-python-imports-with-autoimp.html

Tim Mitchell 16 years, 10 months ago  # | flag

matplotlib and reloading. This code is working well for me thanks. The module type can be obtained from the desc output from imp.find_module. However this is not enough to get matplotlib to load as it depends on all modules being loaded for some documentation trickery

To get around the matplotlib problem I added a list of modules to skip as follows:

class OnDemandImporter(object):
    """ The on-demand importer imports a module proxy that
        inserts the desired module into the calling scope only when
        an attribute from the module is actually used.
    """
    __skip = ['gtk', 'gdk', 'gobject', 'matplotlib']
    def find_module(self, fullname, path=None):
        nameparts = fullname.split('.')
        for m in nameparts:
            if m in self.__skip:
                return None # load normally

I also found that the reload() command is broken because it expects a module and gets a ModuleProxy instance instead. I got around that by changing the install function:

onDemandImporter = OnDemandImporter()
_realReload = None
def lazyReload(module):
    if type(module) is types.ModuleType:
        _realReload(module)

def install():
    global _realReload
    _realReload = __builtins__['reload']
    sys.meta_path.append(onDemandImporter)
    __builtins__['reload'] = lazyReload

def uninstall():
    try:
        sys.meta_path.remove(onDemandImporter)
    except ValueError:
        pass
    else:
        if _realReload is not None:
            __builtins__['reload'] = _realReload
Jean-Lou Dupont 15 years, 1 month ago  # | flag

I have used the valuable information found in this recipe to create a Python Preprocessor. The project is hosted at: link.