Welcome, guest | Sign In | My Account | Store | Cart
def _curry(fn, *cargs, **ckwargs):
    def call_fn(*fargs, **fkwargs):
        d = ckwargs.copy()
        d.update(fkwargs)
        return fn(*(cargs + fargs), **d)
    return call_fn

class MultiHandler(ContentHandler, object):
   """
   MultiHandler is a handler for the xml.sax parser.
   Its purpose is to dispatch calls to one or more other handlers.
   When dealing with really large XML files (say, Wikipedia's 100GB full text dump)
   this is handy so that you can process the information in multiple (modular) ways
   without having to read the whole file off disk in separate passes.
   
   If an exception is thrown from a constituent handler call, MultiHandler will 
      dump a diagnostic to the supplied errout (or stderr) 
      and continue processing.
      
   Example usage:
      import sys
      from xml.sax import make_parser
      from xml.sax.handler import feature_namespaces, ContentHandler
      from MultiHandler import MultiHandler
   
      mh = MultiHandler()
      mh.handlers.append(YourHandler())
      mh.handlers.append(YourOtherHandler())
      parser = make_parser()
      parser.setFeature(feature_namespaces, 0)
      parser.setContentHandler(mh)
      parser.parse(sys.stdin)   
   """
   #ContentHandler is just inherited to make isinstance happy.  
   #we'll be overridding everything using new-style __getattribute__.
   def __init__(self, errout=None):
      self.handlers = []
      self.errout = errout
      if self.errout == None:
         import sys
         self.errout = sys.stderr
   
   def __getattribute__(self, name):
      if name == 'handlers' or name == 'errout':
         return object.__getattribute__(self, name)
      def handlerCall(self, *args, **kwargs):
         for handler in self.handlers:
            try:
               m = getattr(handler, name)
               m(*args, **kwargs)
            except:
               self.errout.write('MultiHandler: error dispatching %s to handler %s\n' %  \
                     (name, str(handler)))
   
      ret = _curry(handlerCall, self)
      return ret

History