Welcome, guest | Sign In | My Account | Store | Cart
#!/usr/bin/env python


"""
Program:		XML Routines

Description:	This provides functions and classes to read an XML file building
				a tree of the data file's elements and attributes (to be added!).

Notes:
	--	Think about any of the following:
		--	Read an xml file using xmlData then parse with an LL1 type parser.
		--	Look up MacOSX schema for arrays etc.
		
History:
  2004/10/04	Added "return None" to getFirstChildByName().  Added class to
  				visit the Tree rather than having it embedded in the node
  				class which is a cleaner implementation.

!!! USE AT YOUR OWN RISK !!!
"""

__version__ = 1.0
__license__ = "none"




######################################################################
#								Imports
######################################################################

import		os
import		sys
import		optparse
import		string
import		xml.sax



######################################################################
#							Global Data
######################################################################

# Flags
fDebug = 0
fVerbose  = 0



		
######################################################################
#						set the Debug Flag.
######################################################################

def			setDebug( fValue ):
	"""Set the fDebug flag."""
	global		fDebug

	if fValue:
		fDebug = 1
	else:
		fDebug = 0



######################################################################
#				Read an XML file building a database tree.
######################################################################

# NOTE:		Currently, this system does not handle XML Attributes!

#---------------------------------------------------------------------
#							XML Element Class
#---------------------------------------------------------------------

class		xmlElement:

	def			__init__( self, name, parent, dictAttributes ):
		self.data = None
		self.name = name
		self.parent = parent
		self.dictAttributes = dictAttributes
		self.listChildren = [ ]
		self.dictData = { }

		
	def			__str__( self ):
		szStr = '%s' % (self.name)
		if self.dictAttributes:
			szStr += "%s" % (self.dictAttributes)
		if self.data:
			szStr += ": %s\n" % (self.data)
		else:
			szStr += "\n"
		return szStr

		
	def			addChild( self, node ):
		self.listChildren.append( node )


	def			getAttribute( self, name ):
		return self.dictAttributes[ name ]


	def			getAttributeNames( self ):
		return self.dictAttributes.keys( )


	def			getChildren( self ):
		return self.listChildren
		

	def			getData( self ):
		return self.data
		

	def			getFirstChildByName( self, name ):
		for oChild in self.iterChildrenByName( name ):
			return oChild
		return None
		

	def			getName( self ):
		return self.name
		

	def			hasChildByName( self, name ):
		for oChild in self.iterChildrenByName( name ):
			return 1
		return 0

	def			iterChildren( self ):
		iCur = 0
		while iCur < len( self.listChildren ):
			oChild = self.listChildren[iCur]
			yield oChild
			iCur = iCur + 1
		return
		

	def			iterChildrenByName( self, name ):
		iCur = 0
		while iCur < len( self.listChildren ):
			oChild = self.listChildren[iCur]
			if name == oChild.name:
				yield oChild
			iCur = iCur + 1
		return
		

	def			numChildren( self ):
		return len( self.listChildren )
		

	def			setData( self, data ):

		if string.strip( data ):
			data = data.encode( )
			self.data = data



#---------------------------------------------------------------------
#						XML Element Handler Class
#---------------------------------------------------------------------

class		xmlHandler(xml.sax.ContentHandler):

	def			__init__( self ):
		xml.sax.ContentHandler.__init__( self )
		self.iLevel = 0
		self.listKeys = [ ]
		self.listDataStack = [ xmlElement( u'root', None, None ) ]

		
	def			startElement( self, name, oAttributes ):
		'SAX start element even handler'
		global		fDebug
		
		if fDebug:
			print "startElement(",name.encode(),")"
		attr={ }
		for oKey in oAttributes.getNames( ):
			oData = oAttributes.getValue( oKey )
			if string.strip( oKey ):
				key = oKey.encode( )
				data = oData.strip( )
				attr[key] = data.encode( )
		parent = self.listDataStack[ -1 ]
		node = xmlElement( name.encode(), parent, attr )
		if parent is not None:
			parent.addChild( node )
		self.listDataStack.append( node )
		self.data = ''

		
	def			characters( self, data ):
		'SAX character data event handler'

		if string.strip( data ):
			data = data.encode( )
			self.data += data

		
	def			endElement( self, name ):
		'SAX end element event handler'
		global		fDebug
		
		if fDebug:
			print "endElement(",name,")"

		curData = self.listDataStack.pop( )
		curData.setData( self.data )
		self.data = ''

		
	def			xmlRoot( self ):
		return self.listDataStack[0]



#---------------------------------------------------------------------
#					Read an XML file function.
#---------------------------------------------------------------------

def			readXmlFile( szFileName ):
	" Read in the xml file and build a database tree."

	# Parse the xml file.	
	fileParser = xml.sax.make_parser( )
	fileParser.setFeature( xml.sax.handler.feature_namespaces, 0 ) # Turn off namespaces.
	curHandler = xmlHandler( )
	fileParser.setContentHandler( curHandler )
	fileIn = open( szFileName, 'r' )
	fileParser.parse( fileIn )
	fileIn.close( )
	xmlRoot = curHandler.xmlRoot( )
	return xmlRoot
		
		
#---------------------------------------------------------------------
#							XML Visit Class
#---------------------------------------------------------------------

class		xmlVisit:

	def			_depthFirst( self, curNode ):
		"""	This is a recursive method and therefore has a Python limitation
			of only being able to recurse 1000 times (ie 1000 levels of the
			tree).
		"""
		iRc = self.visitNode( curNode )
		if iRc:
			return iRc
		iRc = self.startChildren( curNode )
		listChildren = curNode.getChildren( )
		for curChild in listChildren:
			iRc = self._depthFirst( curChild )
			if iRc:
				return iRc
		iRc = self.endChildren( curNode )


	def			depthFirst( self, curNode ):
		self._depthFirst( curNode )
		
		
	def			endChildren( self, curNode ):
		""" called when a node's children have been processed.  curNode is the
			node that owns the children.
			Override this if necessary
		"""
		pass


	def			startChildren( self, curNode ):
		""" called when a node's children are about to be processed.  curNode is the
			node that owns the children.
			Override this if necessary
		"""
		pass


	def			visitNode( self, curNode ):
		" called when a node is to be processed. Override this if necessary"
		pass




#---------------------------------------------------------------------
#					Convert the XML Tree to a string.
#---------------------------------------------------------------------

class		xmlTree2String(xmlVisit):

	def			endChildren( self, curNode ):
		" convert the xml tree to a string. "

		self.iIndent -= 2


	def			convert( self, xmlRoot ):
		""" convert the xml tree to a string.
		"""

		# Visit each node adding it to the string.
		self.szString = ''
		self.iIndent = 0
		self.depthFirst( xmlRoot ) 
		return self.szString
		
		
	def			visitNode( self, curNode ):
		" convert the xml tree to a string. "

		s = self.iIndent * ' '
		s += "%s" % (curNode.name)
		if curNode.dictAttributes:
			s += "%s" % (curNode.dictAttributes)
		if curNode.data:
			s += ": %s\n" % (curNode.data)
		else:
			s += "\n"
		self.szString += s
		self.iIndent += 2
		return 0


#---------------------------------------------------------------------
#					Convert the XML Tree to a string.
#---------------------------------------------------------------------

class		xmlTree2KeyedData(xmlVisit):

	def			endChildren( self, curNode ):
		self.listKeys.pop( )


	def			visitNode( self, curNode ):
		self.listKeys.append( curNode.name )
		if curNode.data:
			# Build the key.
			newKey = '_'.join( self.listKeys )
			# Add the key and data to the nodeAccum.
			if newKey:
				self.nodeAccum.__dict__[newKey] = curNode.data
		return 0


	def			makeKeyedData( self, nodeAccum, xmlRoot ):
		self.listKeys = [ ]
		self.nodeAccum = nodeAccum
		self.depthFirst( xmlRoot ) 
		return self.nodeAccum
		

				

######################################################################
#						Command-line interface
######################################################################


def			main( argV=None ):
	"Command-line interface."
	global		fDebug
	global		fVerbose
	global		listComputers

	if argV is None:
		argV = sys.argv

	# Parse the command line.		
	szUsage = "usage: %prog [options] arg1 arg2 ..."
	oCmdPrs = optparse.OptionParser( usage=szUsage )
	oCmdPrs.add_option( "-d", "--debug", action="store_true",
						dest="fDebug", default=False,
						help="Set debug mode"
	)
	(oOptions, oArgs) = oCmdPrs.parse_args( argV )
	if oOptions.fDebug:
		fDebug = 1
		fahData.setDebug( )
		print "In DEBUG Mode..."

	# Build the XML Tree.
	xmlRoot = readXmlFile( argV[1] )
	
	# Now convert it to a string and print that.
	oTree2String = xmlTree2String( )
	szString = oTree2String.convert( xmlRoot )
	print szString
		
	return 0




if __name__ == "__main__":
	sys.exit( main( sys.argv ) or 0 )

History

  • revision 2 (19 years ago)
  • previous revisions are not available