Welcome, guest | Sign In | My Account | Store | Cart
import StringIO, sys
from xml import sax
from xml.sax import handler, saxutils, xmlreader

RDF_NS = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'

class RDFFilter (saxutils.XMLFilterBase):
    def __init__ (self, *args):
        saxutils.XMLFilterBase.__init__(self, *args)
        self.in_rdf_stack = [False]

    def startElementNS (self, (uri, localname), qname, attrs):
        if uri == RDF_NS or self.in_rdf_stack[0] == True:
            self.in_rdf_stack.insert(0, True)
            return

        # Delete attributes that belong to the RDF namespace
        dict = {}
        for key, value in attrs.items():
            uri, localname = key
            if uri != RDF_NS:
                dict[key] = value
                
        attrs = xmlreader.AttributesNSImpl(dict, attrs.getQNames())
            
        self.in_rdf_stack.insert(0, self.in_rdf_stack[0])
        
        saxutils.XMLFilterBase.startElementNS(self,
                                              (uri, localname), qname, attrs)
    
    def characters(self, content):
        if self.in_rdf_stack[0]:
            return
        saxutils.XMLFilterBase.characters(self, content)
        
    def endElementNS (self, (uri, localname), qname):
        if self.in_rdf_stack.pop(0) == True:
            return
        saxutils.XMLFilterBase.endElementNS(self,
                                            (uri, localname), qname)

def filter_rdf (input, output):
    """filter_rdf(input:file, output:file)

    Parses the XML input from the input stream, filtering out all
    elements and attributes that are in the RDF namespace.
    """

    output_gen = saxutils.XMLGenerator(output)
    parser = sax.make_parser()
    filter = RDFFilter(parser)
    filter.setFeature(handler.feature_namespaces, True)
    filter.setContentHandler(output_gen)
    filter.setErrorHandler(handler.ErrorHandler())
    filter.parse(input)

if __name__ == '__main__':
    TEST_RDF = '''<?xml version="1.0"?>
<metadata xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
         xmlns:dc="http://purl.org/dc/elements/1.1/">
   <title>  This is non-RDF content </title>
   <rdf:RDF>
     <rdf:Description rdf:about="%s">
       <dc:Creator>%s</dc:Creator>
     </rdf:Description>
   </rdf:RDF>
  <element />
</metadata>
''' 
    input = StringIO.StringIO(TEST_RDF)
    filter_rdf(input, sys.stdout)

History