/* MicroXml provides stand-alone support for the basic, most-used features of XML -- tags, attributes, and element values. That's all. It produces a DOM tree of XML nodes. MicroXml does not support DTDs, CDATAs and other advanced XML features. It stores the XML declaration but doesn't use it. However, within these limitations, MicroXml is easy to use and allows far simpler debugging of XML results than when using a full-featured XML library. With XmlDoc::to_string() one can round-trip the XML for fast eyeball checking. This module contains main() which will parse a sample XML string into a DOM tree, then write the DOM tree out as XML text. Jack Trainor 2015 */ #include #include #include #include #include #include typedef unsigned int uint; static const int NO_TAG = 0; static const int START_TAG = 1; static const int END_TAG = 2; static const int EMPTY_TAG = 3; static const int COMMENT_TAG = 4; static const int DECL_TAG = 5; static const int DOCTYPE_TAG = 6; bool strEq(const std::string& s1, const std::string& s2); int getTagType(const std::string& xml); bool charInString(const char c, const std::string& s); void compressReturns(std::string& s); //////////////////////////////////////////////////////////////////////////////// class XBuffer { public: const std::string& text; int index; int len; XBuffer(const std::string& text, int index=0); virtual ~XBuffer(void); virtual bool atEnd(); virtual void incIndex(); virtual char getChar(); virtual char getNextChar(); virtual void skipSpace(); virtual void skipChars(const std::string& chars); virtual bool getCharsToDelimiter(const std::string& delimiters, std::string& chars, bool inclDelimiter=false); }; //////////////////////////////////////////////////////////////////////////////// class XmlNode { public: std::string tag; std::string text; std::map attributes; std::vector children; XmlNode* parent; XmlNode(const std::string& tag, const std::string& text, const std::map& attributes, XmlNode* parent); virtual ~XmlNode(void); virtual void addChild(XmlNode* node); virtual void appendText(const std::string& text); virtual void toString(std::string& s); }; //////////////////////////////////////////////////////////////////////////////// class XmlDocument { public: std::deque stack; XmlNode* root; XmlDocument(void); virtual ~XmlDocument(void); virtual void pushNode(XmlNode* node); virtual XmlNode* popNode(); virtual XmlNode* getCurNode(); virtual void startElement(const std::string& name, const std::map& attributes); virtual void characters(const std::string& chars); virtual void endElement(const std::string& name); virtual void getNameAttrsFromTag(std::string& xml, std::string& name, std::map& attrs); virtual void lex(const std::string& xml, std::vector& chunks); virtual XmlNode* parse(const std::string& xml, bool stripEmptyChars=true); virtual void handleError(const std::string& msg); // virtual void test(); }; //============================================================================== // Utils //============================================================================== void trim(std::string &str){ int i = 0; int len = str.size(); while (isspace(str[i]) != 0) i++; str = str.substr(i, str.length() - i); i = str.size()-1; len = str.size(); while (len > 0 && i >= 0 && isspace(str[i]) != 0) i--; if (len > 0 && i >= 0) { str = str.substr(0, i + 1); } } void replace(std::string &str, char oldC, char newC) { for (uint i = 0; i < str.size(); i++) { char c = str[i]; if (c == oldC) { str[i] = newC; } } } bool strEq(const std::string& s1, const std::string& s2) { return (s1.compare(s2) == 0); } bool charInString(const char c, const std::string& s) { size_t pos = s.find(c); return (pos != std::string::npos); } void compressReturns(std::string& xml) { XBuffer xbuf(xml, 0); std::string sOut; while (!xbuf.atEnd()) { std::string chars; xbuf.getCharsToDelimiter("\n", chars, true); sOut.append(chars); xbuf.skipChars("\n"); } xml.clear(); xml.append(sOut); } int getTagType(const std::string& xml) { int len = xml.size(); if (xml[0] == '<' && xml[len-1] == '>') { if ((xml.find("= len); } void XBuffer::incIndex() { if (!atEnd()) { index += 1; } } char XBuffer::getChar() { if (!atEnd()) { return text.at(index); } return 0; } char XBuffer::getNextChar() { incIndex(); return getChar(); } void XBuffer::skipSpace() { while (!atEnd()) { int c = getChar(); if (isspace(c)) { incIndex(); } else { break; } } } void XBuffer::skipChars(const std::string& chars) { while (!atEnd()) { int c = getChar(); if (charInString(c, chars)) { incIndex(); } else { break; } } } bool XBuffer::getCharsToDelimiter(const std::string& delimiters, std::string& chars, bool inclDelimiter) { chars.clear(); bool foundDelimiter = false; while (!atEnd() && !foundDelimiter) { char c = getChar(); size_t found = delimiters.find(c); foundDelimiter = (found != std::string::npos); if (!foundDelimiter || (foundDelimiter && inclDelimiter)) { chars.append(1, c); incIndex(); } } return foundDelimiter; } //============================================================================== // XmlNode //============================================================================== XmlNode::XmlNode(const std::string& tag, const std::string& text, const std::map& attributes, XmlNode* parent=NULL) : tag(tag), text(text), attributes(attributes), parent(parent) { if (parent) { parent->addChild(this); } } XmlNode::~XmlNode(void) { for (std::vector::iterator it=children.begin(); it!=children.end(); ++it) { XmlNode* node = *it; delete node; } } void XmlNode::addChild(XmlNode* node) { children.push_back(node); node->parent = this; } void XmlNode::appendText(const std::string& text_) { text.append(text_); } void XmlNode::toString(std::string& s) { std::string attrsStr; for (std::map::iterator it=attributes.begin(); it!=attributes.end(); ++it) { if (attrsStr.size() != 0) { attrsStr.append(" "); } attrsStr.append(it->first + "=\"" + it->second + "\""); } if (children.size() > 0 || text.size() > 0) { if (attrsStr.size() > 0) { s.append("\n<" + tag + " " + attrsStr + ">"); } else { s.append("\n<" + tag + ">"); } s.append("\n<" + tag + ">"); s.append(text); for (std::vector::iterator it=children.begin(); it!=children.end(); ++it) { std::string nodeString; XmlNode* node = *it; node->toString(nodeString); s.append(nodeString); } s.append("\n"); } else { if (attrsStr.size() > 0) { attrsStr.insert(0, " "); } s.append("<" + tag + attrsStr + "/>\n"); } compressReturns(s); } //============================================================================== // XmlDocument //============================================================================== XmlDocument::XmlDocument(void) : root(NULL) { } XmlDocument::~XmlDocument(void) { delete root; } void XmlDocument::handleError(const std::string& msg) { std::cout << msg << std::endl; } void XmlDocument::pushNode(XmlNode* node) { stack.push_back(node); } XmlNode* XmlDocument::popNode() { XmlNode* node = getCurNode(); if (node) { stack.pop_back(); } else { std::string msg = "XmlDocument::popNode stack empty."; handleError(msg); } return node; } XmlNode* XmlDocument::getCurNode() { XmlNode* node = NULL; int nodeCount = stack.size(); if (nodeCount > 0) { node = stack[nodeCount-1]; } return node; } void XmlDocument::startElement(const std::string& name, const std::map& attributes) { std::cout << "startElement: [" + name + "]" << std::endl; XmlNode* node = new XmlNode(name, "", attributes); if (!root) { root = node; } XmlNode* curNode = getCurNode(); if (curNode) { curNode->addChild(node); } pushNode(node); } void XmlDocument::characters(const std::string& chars) { XmlNode* curNode = getCurNode(); if (curNode) { curNode->appendText(chars); } } void XmlDocument::endElement(const std::string& name) { std::cout << "endElement: [" + name + "]" << std::endl; XmlNode* node = popNode(); if (node && (node->tag.compare(name) != 0)) { std::string msg = "XmlDocument::end_element: tag ["; msg.append(name + "] not matching node ["); msg.append(node->tag + "]"); handleError(msg); } } void XmlDocument::getNameAttrsFromTag(std::string& xml, std::string& name, std::map& attrs) { replace(xml, '\'', '"'); XBuffer xbuf(xml); xbuf.incIndex(); xbuf.skipSpace(); xbuf.getCharsToDelimiter("' />", name); char c = xbuf.getChar(); while (true) { if (c == ' ') { xbuf.skipSpace(); std::string attrName; xbuf.getCharsToDelimiter("=", attrName); c = xbuf.getNextChar(); if (c == '"') { xbuf.incIndex(); std::string attrVal; xbuf.getCharsToDelimiter("\"", attrVal); c = xbuf.getChar(); if (c == '"') { attrs[attrName] = attrVal; xbuf.incIndex(); } else { std::string msg = "XmlDoc::get_name_attrs_from_tag ["; msg.append(xml + "] reached end of buffer too soon."); handleError(msg); } c = xbuf.getChar(); } } else { break; } } } void XmlDocument::lex(const std::string& xml, std::vector& chunks) { // Divide xml into raw items delimited by '<' and '>' pair or not so delimited. std::string chars; XBuffer xbuf(xml); while (!xbuf.atEnd()) { bool found = xbuf.getCharsToDelimiter("<", chars); bool charsEmpty = (chars.size() == 0); bool charsReturn = (chars.compare("\n") == 0); if (found && !charsEmpty && !charsReturn) { chunks.push_back(chars); } if (!xbuf.atEnd()) { bool found = xbuf.getCharsToDelimiter(">", chars, true); bool charsEmpty = (chars.size() == 0); if (found && !charsEmpty) { chunks.push_back(chars); } } } } XmlNode* XmlDocument::parse(const std::string& xml, bool stripEmptyChars) { std::vector chunks; lex(xml, chunks); if (chunks.size() > 0) { for (unsigned int i = 0; i < chunks.size(); i++) { std::string name; std::map attrs; std::string chunk = chunks[i]; int tagType = getTagType(chunk); switch (tagType) { case START_TAG: getNameAttrsFromTag(chunk, name, attrs); startElement(name, attrs); break; case END_TAG: name = chunk.substr(2, chunk.size()-3); endElement(name); break; case EMPTY_TAG: getNameAttrsFromTag(chunk, name, attrs); startElement(name, attrs); endElement(name); break; case COMMENT_TAG: break; case DECL_TAG: break; case DOCTYPE_TAG: break; default: if (stripEmptyChars == true) { trim(chunk); } characters(chunk); } } } return root; } //////////////////////////////////////////////////////////////////////////////// const std::string XML = " \ \ gambardella, matthew \ xml developer \'s guide \ computer \ 44.95 \ 2000-10-01 \ an in-depth look at creating applications \ with xml. \ \ \ ralls, kim \ midnight rain \ fantasy \ 5.95 \ 2000-12-16 \ a former architect battles corporate zombies, \ an evil sorceress, and her own childhood to become queen \ of the world. \ \ \ corets, eva \ maeve ascendant \ fantasy \ 5.95 \ 2000-11-17 \ after the collapse of a nanotechnology \ society in england, the young survivors lay the \ foundation for a new society. \ \ \ "; void test() { XmlDocument xdoc; xdoc.parse(XML); std::string s; xdoc.root->toString(s); std::cout << s << std::endl; } int main(int argc, char *argv[]) { std::cout << "MicroXml Recipe." << std::endl; test(); std::cout << "Press RETURN." << std::endl; std::cin.get(); return 0; }