diff options
author | crupest <crupest@outlook.com> | 2022-01-02 18:49:31 +0800 |
---|---|---|
committer | crupest <crupest@outlook.com> | 2022-01-02 18:49:31 +0800 |
commit | 18099ad8f5c24b1c2b1c92238dbc54912eab0406 (patch) | |
tree | 514207d630a2100262aedaea276ea259bbc108b7 | |
parent | 96a93e17baaff2c2050eba2afada639e93001232 (diff) | |
download | cru-18099ad8f5c24b1c2b1c92238dbc54912eab0406.tar.gz cru-18099ad8f5c24b1c2b1c92238dbc54912eab0406.tar.bz2 cru-18099ad8f5c24b1c2b1c92238dbc54912eab0406.zip |
...
-rw-r--r-- | include/cru/common/String.hpp | 7 | ||||
-rw-r--r-- | include/cru/common/StringUtil.hpp | 2 | ||||
-rw-r--r-- | include/cru/xml/XmlNode.hpp | 3 | ||||
-rw-r--r-- | include/cru/xml/XmlParser.hpp | 4 | ||||
-rw-r--r-- | src/common/String.cpp | 9 | ||||
-rw-r--r-- | src/common/StringUtil.cpp | 4 | ||||
-rw-r--r-- | src/xml/XmlNode.cpp | 6 | ||||
-rw-r--r-- | src/xml/XmlParser.cpp | 115 | ||||
-rw-r--r-- | test/CMakeLists.txt | 1 | ||||
-rw-r--r-- | test/xml/CMakeLists.txt | 6 | ||||
-rw-r--r-- | test/xml/ParserTest.cpp | 135 |
11 files changed, 266 insertions, 26 deletions
diff --git a/include/cru/common/String.hpp b/include/cru/common/String.hpp index ade2d84b..bd079243 100644 --- a/include/cru/common/String.hpp +++ b/include/cru/common/String.hpp @@ -165,6 +165,10 @@ class CRU_BASE_API String { } inline void append(StringView str); + String substr(size_type start, size_type size) const { + return String(this->buffer_ + start, size); + } + public: String& operator+=(value_type value) { this->append(value); @@ -177,6 +181,9 @@ class CRU_BASE_API String { } public: + String& TrimEnd(); + + public: void AppendCodePoint(CodePoint code_point); Utf16CodePointIterator CodePointIterator() const { diff --git a/include/cru/common/StringUtil.hpp b/include/cru/common/StringUtil.hpp index 6c6b47b8..a35da695 100644 --- a/include/cru/common/StringUtil.hpp +++ b/include/cru/common/StringUtil.hpp @@ -221,4 +221,6 @@ Index CRU_BASE_API Utf16NextWord(const char16_t* ptr, Index size, char16_t CRU_BASE_API ToLower(char16_t c); char16_t CRU_BASE_API ToUpper(char16_t c); + +char16_t CRU_BASE_API IsWhitespace(char16_t c); } // namespace cru diff --git a/include/cru/xml/XmlNode.hpp b/include/cru/xml/XmlNode.hpp index 0cbb6756..186f395c 100644 --- a/include/cru/xml/XmlNode.hpp +++ b/include/cru/xml/XmlNode.hpp @@ -67,7 +67,7 @@ class XmlElementNode : public XmlNode { CRU_DELETE_COPY(XmlElementNode) CRU_DELETE_MOVE(XmlElementNode) - ~XmlElementNode() override = default; + ~XmlElementNode() override; public: String GetTag() const { return tag_; } @@ -78,6 +78,7 @@ class XmlElementNode : public XmlNode { void SetAttributes(std::unordered_map<String, String> attributes) { attributes_ = std::move(attributes); } + const std::vector<XmlNode*> GetChildren() const { return children_; } void AddAttribute(String key, String value); void AddChild(XmlNode* child); diff --git a/include/cru/xml/XmlParser.hpp b/include/cru/xml/XmlParser.hpp index 1d44c46f..188a08f2 100644 --- a/include/cru/xml/XmlParser.hpp +++ b/include/cru/xml/XmlParser.hpp @@ -28,6 +28,7 @@ class XmlParser { XmlElementNode* DoParse(); char16_t Read1(); + String ReadWithoutAdvance(int count = 1); void ReadSpacesAndDiscard(); String ReadSpaces(); String ReadIdenitifier(); @@ -36,8 +37,9 @@ class XmlParser { private: String xml_; - XmlElementNode* cache_; + XmlElementNode* cache_ = nullptr; + // Consider the while file enclosed by a single tag called $root. XmlElementNode* pseudo_root_node_ = new XmlElementNode(u"$root"); XmlElementNode* current_ = pseudo_root_node_; int current_position_ = 0; diff --git a/src/common/String.cpp b/src/common/String.cpp index 743a33fd..8d674369 100644 --- a/src/common/String.cpp +++ b/src/common/String.cpp @@ -211,6 +211,15 @@ String::iterator String::erase(const_iterator start, const_iterator end) { return s; } +String& String::TrimEnd() { + if (size_ == 0) return *this; + while (size_ > 0 && IsWhitespace(buffer_[size_ - 1])) { + size_--; + } + + return *this; +} + std::string String::ToUtf8() const { return cru::ToUtf8(buffer_, size_); } void String::AppendCodePoint(CodePoint code_point) { diff --git a/src/common/StringUtil.cpp b/src/common/StringUtil.cpp index c828fa21..d3948c6a 100644 --- a/src/common/StringUtil.cpp +++ b/src/common/StringUtil.cpp @@ -252,4 +252,8 @@ char16_t ToUpper(char16_t c) { } return c; } + +char16_t IsWhitespace(char16_t c) { + return c == u' ' || c == u'\t' || c == u'\n' || c == u'\r'; +} } // namespace cru diff --git a/src/xml/XmlNode.cpp b/src/xml/XmlNode.cpp index f4b43ea6..d6203973 100644 --- a/src/xml/XmlNode.cpp +++ b/src/xml/XmlNode.cpp @@ -1,6 +1,12 @@ #include "cru/xml/XmlNode.hpp" namespace cru::xml { +XmlElementNode::~XmlElementNode() { + for (auto child : children_) { + delete child; + } +} + void XmlElementNode::AddAttribute(String key, String value) { attributes_[std::move(key)] = std::move(value); } diff --git a/src/xml/XmlParser.cpp b/src/xml/XmlParser.cpp index f24a7f68..d0f61542 100644 --- a/src/xml/XmlParser.cpp +++ b/src/xml/XmlParser.cpp @@ -2,6 +2,10 @@ #include "cru/xml/XmlNode.hpp" namespace cru::xml { +XmlParser::XmlParser(String xml) : xml_(std::move(xml)) {} + +XmlParser::~XmlParser() { delete pseudo_root_node_; } + XmlElementNode* XmlParser::Parse() { if (!cache_) { cache_ = DoParse(); @@ -16,6 +20,13 @@ char16_t XmlParser::Read1() { return xml_[current_position_++]; } +String XmlParser::ReadWithoutAdvance(int count) { + if (current_position_ + count > xml_.size()) { + return u""; + } + return xml_.substr(current_position_, count); +} + void XmlParser::ReadSpacesAndDiscard() { while (current_position_ < xml_.size() && (xml_[current_position_] == ' ' || xml_[current_position_] == '\t' || @@ -70,48 +81,104 @@ String XmlParser::ReadAttributeString() { XmlElementNode* XmlParser::DoParse() { while (current_position_ < xml_.size()) { - switch (xml_[current_position_]) { - case '<': { - ++current_position_; + ReadSpacesAndDiscard(); - if (Read1() == '/') { - } else { - ReadSpacesAndDiscard(); + if (current_position_ == xml_.size()) { + break; + } - String tag = ReadIdenitifier(); + if (ReadWithoutAdvance() == u"<") { + current_position_ += 1; - XmlElementNode* node = new XmlElementNode(tag); + if (ReadWithoutAdvance() == u"/") { + current_position_ += 1; - while (true) { - ReadSpacesAndDiscard(); - if (Read1() == '>') { - break; - } else { - String attribute_name = ReadIdenitifier(); + ReadSpacesAndDiscard(); + + String tag = ReadIdenitifier(); + + if (tag != current_->GetTag()) { + throw XmlParsingException(u"Tag mismatch."); + } + + ReadSpacesAndDiscard(); + + if (Read1() != '>') { + throw XmlParsingException(u"Expected >."); + } + + current_ = current_->GetParent(); + } else { + ReadSpacesAndDiscard(); + + String tag = ReadIdenitifier(); + + XmlElementNode* node = new XmlElementNode(tag); - ReadSpacesAndDiscard(); + bool is_self_closing = false; - if (Read1() != '=') { - throw XmlParsingException(u"Expected '='"); - } + while (true) { + ReadSpacesAndDiscard(); + auto c = ReadWithoutAdvance(); + if (c == u">") { + current_position_ += 1; + break; + } else if (c == u"/") { + current_position_ += 1; + + if (Read1() != '>') { + throw XmlParsingException(u"Expected >."); + } - ReadSpacesAndDiscard(); + is_self_closing = true; + break; + } else { + String attribute_name = ReadIdenitifier(); - String attribute_value = ReadAttributeString(); + ReadSpacesAndDiscard(); - node->AddAttribute(attribute_name, attribute_value); + if (Read1() != '=') { + throw XmlParsingException(u"Expected '='"); } + + ReadSpacesAndDiscard(); + + String attribute_value = ReadAttributeString(); + + node->AddAttribute(attribute_name, attribute_value); } + } + + current_->AddChild(node); - current_->AddChild(node); + if (!is_self_closing) { current_ = node; } + } + + } else { + String text; + + while (ReadWithoutAdvance() != u"<") { + char16_t c = Read1(); - break; + text += c; } + + if (!text.empty()) current_->AddChild(new XmlTextNode(text.TrimEnd())); } } - return pseudo_root_node_; + if (current_ != pseudo_root_node_) { + throw XmlParsingException(u"Unexpected end of xml"); + } + + if (pseudo_root_node_->GetChildren().size() != 1 || + pseudo_root_node_->GetChildren()[0]->GetType() != + XmlNode::Type::Element) { + throw XmlParsingException(u"Expected 1 element node as root."); + } + + return static_cast<XmlElementNode*>(pseudo_root_node_->GetChildren()[0]); } } // namespace cru::xml diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 3b9567cd..455aad90 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -7,6 +7,7 @@ target_link_libraries(cru_test_base INTERFACE GTest::gtest GTest::gtest_main) add_subdirectory(common) add_subdirectory(platform) +add_subdirectory(xml) if(WIN32) add_subdirectory(win) diff --git a/test/xml/CMakeLists.txt b/test/xml/CMakeLists.txt new file mode 100644 index 00000000..a8dfa264 --- /dev/null +++ b/test/xml/CMakeLists.txt @@ -0,0 +1,6 @@ +add_executable(cru_xml_test + ParserTest.cpp +) +target_link_libraries(cru_xml_test PRIVATE cru_xml cru_test_base) + +gtest_discover_tests(cru_xml_test) diff --git a/test/xml/ParserTest.cpp b/test/xml/ParserTest.cpp new file mode 100644 index 00000000..01098b7c --- /dev/null +++ b/test/xml/ParserTest.cpp @@ -0,0 +1,135 @@ +#include "cru/xml/XmlNode.hpp" +#include "cru/xml/XmlParser.hpp" + +#include <gtest/gtest.h> + +using namespace cru::xml; + +TEST(CruXmlParserTest, Simple) { + XmlParser parser(u"<root></root>"); + auto n = parser.Parse(); + ASSERT_EQ(n->GetTag(), u"root"); + ASSERT_EQ(n->GetAttributes().empty(), true); + ASSERT_EQ(n->GetChildren().size(), 0); + delete n; +} + +TEST(CruXmlParserTest, SimpleWithAttribute) { + XmlParser parser(u"<root a1=\"v1\" a2=\"v2\"></root>"); + auto n = parser.Parse(); + ASSERT_EQ(n->GetTag(), u"root"); + ASSERT_EQ(n->GetAttributes().at(u"a1"), u"v1"); + ASSERT_EQ(n->GetAttributes().at(u"a2"), u"v2"); + ASSERT_EQ(n->GetChildren().size(), 0); + delete n; +} + +TEST(CruXmlParserTest, SimpleSelfClosing) { + XmlParser parser(u"<root a1=\"v1\" a2=\"v2\"/>"); + auto n = parser.Parse(); + ASSERT_EQ(n->GetTag(), u"root"); + ASSERT_EQ(n->GetAttributes().at(u"a1"), u"v1"); + ASSERT_EQ(n->GetAttributes().at(u"a2"), u"v2"); + ASSERT_EQ(n->GetChildren().size(), 0); + delete n; +} + +TEST(CruXmlParserTest, NestedElement) { + XmlParser parser( + u"<root><c1><d1></d1></c1><c2><d2></d2><d3></d3></c2></root>"); + auto n = parser.Parse(); + ASSERT_EQ(n->GetChildren().size(), 2); + ASSERT_EQ(static_cast<XmlElementNode*>(n->GetChildren().at(0))->GetTag(), + u"c1"); + ASSERT_EQ(static_cast<XmlElementNode*>(n->GetChildren().at(1))->GetTag(), + u"c2"); + ASSERT_EQ(static_cast<XmlElementNode*>(n->GetChildren().at(0)) + ->GetChildren() + .size(), + 1); + ASSERT_EQ(static_cast<XmlElementNode*>( + static_cast<XmlElementNode*>(n->GetChildren().at(0)) + ->GetChildren() + .at(0)) + ->GetTag(), + u"d1"); + ASSERT_EQ(static_cast<XmlElementNode*>(n->GetChildren().at(1)) + ->GetChildren() + .size(), + 2); + ASSERT_EQ(static_cast<XmlElementNode*>( + static_cast<XmlElementNode*>(n->GetChildren().at(1)) + ->GetChildren() + .at(0)) + ->GetTag(), + u"d2"); + ASSERT_EQ(static_cast<XmlElementNode*>( + static_cast<XmlElementNode*>(n->GetChildren().at(1)) + ->GetChildren() + .at(1)) + ->GetTag(), + u"d3"); + delete n; +} + +TEST(CruXmlParserTest, SimpleText) { + XmlParser parser(u"<root>text</root>"); + auto n = parser.Parse(); + ASSERT_EQ(n->GetChildren().size(), 1); + ASSERT_EQ(static_cast<XmlTextNode*>(n->GetChildren().at(0))->GetText(), + u"text"); + delete n; +} + +TEST(CruXmlParserTest, Whitespace) { + XmlParser parser(u"\t\t<root>\n\t\t\ttext test\n\t\t</root>\t\t"); + auto n = parser.Parse(); + ASSERT_EQ(n->GetChildren().size(), 1); + ASSERT_EQ(static_cast<XmlTextNode*>(n->GetChildren().at(0))->GetText(), + u"text test"); + delete n; +} + +TEST(CruXmlParserTest, Complex) { + XmlParser parser( + uR"( +<root a1="v1"> + <c1> + <d1> + </d1> + </c1> + <c2 a2="v2" a3="v3"> + t1 + <d2 a4="v4"> t2 </d2> + text test + <d3></d3> + t2 + </c2> +</root> + )"); + auto n = parser.Parse(); + ASSERT_EQ(n->GetAttributes().at(u"a1"), u"v1"); + ASSERT_EQ(n->GetChildren().size(), 2); + ASSERT_EQ(static_cast<XmlElementNode*>(n->GetChildren().at(0))->GetTag(), + u"c1"); + ASSERT_EQ(static_cast<XmlElementNode*>(n->GetChildren().at(0)) + ->GetChildren() + .size(), + 1); + auto c2 = static_cast<XmlElementNode*>(n->GetChildren().at(1)); + ASSERT_EQ(c2->GetTag(), u"c2"); + ASSERT_EQ(c2->GetAttributes().at(u"a2"), u"v2"); + ASSERT_EQ(c2->GetAttributes().at(u"a3"), u"v3"); + ASSERT_EQ(static_cast<XmlTextNode*>(c2->GetChildren().at(0))->GetText(), + u"t1"); + auto d2 = static_cast<XmlElementNode*>(c2->GetChildren().at(1)); + ASSERT_EQ(d2->GetTag(), u"d2"); + ASSERT_EQ(d2->GetAttributes().at(u"a4"), u"v4"); + ASSERT_EQ(static_cast<XmlTextNode*>(c2->GetChildren().at(2))->GetText(), + u"text test"); + ASSERT_EQ(static_cast<XmlElementNode*>(c2->GetChildren().at(3))->GetTag(), + u"d3"); + ASSERT_EQ(static_cast<XmlTextNode*>(c2->GetChildren().at(4))->GetText(), + u"t2"); + delete n; +} |