aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorcrupest <crupest@outlook.com>2022-01-02 18:49:31 +0800
committercrupest <crupest@outlook.com>2022-01-02 18:49:31 +0800
commit18099ad8f5c24b1c2b1c92238dbc54912eab0406 (patch)
tree514207d630a2100262aedaea276ea259bbc108b7
parent96a93e17baaff2c2050eba2afada639e93001232 (diff)
downloadcru-18099ad8f5c24b1c2b1c92238dbc54912eab0406.tar.gz
cru-18099ad8f5c24b1c2b1c92238dbc54912eab0406.tar.bz2
cru-18099ad8f5c24b1c2b1c92238dbc54912eab0406.zip
...
-rw-r--r--include/cru/common/String.hpp7
-rw-r--r--include/cru/common/StringUtil.hpp2
-rw-r--r--include/cru/xml/XmlNode.hpp3
-rw-r--r--include/cru/xml/XmlParser.hpp4
-rw-r--r--src/common/String.cpp9
-rw-r--r--src/common/StringUtil.cpp4
-rw-r--r--src/xml/XmlNode.cpp6
-rw-r--r--src/xml/XmlParser.cpp115
-rw-r--r--test/CMakeLists.txt1
-rw-r--r--test/xml/CMakeLists.txt6
-rw-r--r--test/xml/ParserTest.cpp135
11 files changed, 266 insertions, 26 deletions
diff --git a/include/cru/common/String.hpp b/include/cru/common/String.hpp
index ade2d84b..bd079243 100644
--- a/include/cru/common/String.hpp
+++ b/include/cru/common/String.hpp
@@ -165,6 +165,10 @@ class CRU_BASE_API String {
}
inline void append(StringView str);
+ String substr(size_type start, size_type size) const {
+ return String(this->buffer_ + start, size);
+ }
+
public:
String& operator+=(value_type value) {
this->append(value);
@@ -177,6 +181,9 @@ class CRU_BASE_API String {
}
public:
+ String& TrimEnd();
+
+ public:
void AppendCodePoint(CodePoint code_point);
Utf16CodePointIterator CodePointIterator() const {
diff --git a/include/cru/common/StringUtil.hpp b/include/cru/common/StringUtil.hpp
index 6c6b47b8..a35da695 100644
--- a/include/cru/common/StringUtil.hpp
+++ b/include/cru/common/StringUtil.hpp
@@ -221,4 +221,6 @@ Index CRU_BASE_API Utf16NextWord(const char16_t* ptr, Index size,
char16_t CRU_BASE_API ToLower(char16_t c);
char16_t CRU_BASE_API ToUpper(char16_t c);
+
+char16_t CRU_BASE_API IsWhitespace(char16_t c);
} // namespace cru
diff --git a/include/cru/xml/XmlNode.hpp b/include/cru/xml/XmlNode.hpp
index 0cbb6756..186f395c 100644
--- a/include/cru/xml/XmlNode.hpp
+++ b/include/cru/xml/XmlNode.hpp
@@ -67,7 +67,7 @@ class XmlElementNode : public XmlNode {
CRU_DELETE_COPY(XmlElementNode)
CRU_DELETE_MOVE(XmlElementNode)
- ~XmlElementNode() override = default;
+ ~XmlElementNode() override;
public:
String GetTag() const { return tag_; }
@@ -78,6 +78,7 @@ class XmlElementNode : public XmlNode {
void SetAttributes(std::unordered_map<String, String> attributes) {
attributes_ = std::move(attributes);
}
+ const std::vector<XmlNode*> GetChildren() const { return children_; }
void AddAttribute(String key, String value);
void AddChild(XmlNode* child);
diff --git a/include/cru/xml/XmlParser.hpp b/include/cru/xml/XmlParser.hpp
index 1d44c46f..188a08f2 100644
--- a/include/cru/xml/XmlParser.hpp
+++ b/include/cru/xml/XmlParser.hpp
@@ -28,6 +28,7 @@ class XmlParser {
XmlElementNode* DoParse();
char16_t Read1();
+ String ReadWithoutAdvance(int count = 1);
void ReadSpacesAndDiscard();
String ReadSpaces();
String ReadIdenitifier();
@@ -36,8 +37,9 @@ class XmlParser {
private:
String xml_;
- XmlElementNode* cache_;
+ XmlElementNode* cache_ = nullptr;
+ // Consider the while file enclosed by a single tag called $root.
XmlElementNode* pseudo_root_node_ = new XmlElementNode(u"$root");
XmlElementNode* current_ = pseudo_root_node_;
int current_position_ = 0;
diff --git a/src/common/String.cpp b/src/common/String.cpp
index 743a33fd..8d674369 100644
--- a/src/common/String.cpp
+++ b/src/common/String.cpp
@@ -211,6 +211,15 @@ String::iterator String::erase(const_iterator start, const_iterator end) {
return s;
}
+String& String::TrimEnd() {
+ if (size_ == 0) return *this;
+ while (size_ > 0 && IsWhitespace(buffer_[size_ - 1])) {
+ size_--;
+ }
+
+ return *this;
+}
+
std::string String::ToUtf8() const { return cru::ToUtf8(buffer_, size_); }
void String::AppendCodePoint(CodePoint code_point) {
diff --git a/src/common/StringUtil.cpp b/src/common/StringUtil.cpp
index c828fa21..d3948c6a 100644
--- a/src/common/StringUtil.cpp
+++ b/src/common/StringUtil.cpp
@@ -252,4 +252,8 @@ char16_t ToUpper(char16_t c) {
}
return c;
}
+
+char16_t IsWhitespace(char16_t c) {
+ return c == u' ' || c == u'\t' || c == u'\n' || c == u'\r';
+}
} // namespace cru
diff --git a/src/xml/XmlNode.cpp b/src/xml/XmlNode.cpp
index f4b43ea6..d6203973 100644
--- a/src/xml/XmlNode.cpp
+++ b/src/xml/XmlNode.cpp
@@ -1,6 +1,12 @@
#include "cru/xml/XmlNode.hpp"
namespace cru::xml {
+XmlElementNode::~XmlElementNode() {
+ for (auto child : children_) {
+ delete child;
+ }
+}
+
void XmlElementNode::AddAttribute(String key, String value) {
attributes_[std::move(key)] = std::move(value);
}
diff --git a/src/xml/XmlParser.cpp b/src/xml/XmlParser.cpp
index f24a7f68..d0f61542 100644
--- a/src/xml/XmlParser.cpp
+++ b/src/xml/XmlParser.cpp
@@ -2,6 +2,10 @@
#include "cru/xml/XmlNode.hpp"
namespace cru::xml {
+XmlParser::XmlParser(String xml) : xml_(std::move(xml)) {}
+
+XmlParser::~XmlParser() { delete pseudo_root_node_; }
+
XmlElementNode* XmlParser::Parse() {
if (!cache_) {
cache_ = DoParse();
@@ -16,6 +20,13 @@ char16_t XmlParser::Read1() {
return xml_[current_position_++];
}
+String XmlParser::ReadWithoutAdvance(int count) {
+ if (current_position_ + count > xml_.size()) {
+ return u"";
+ }
+ return xml_.substr(current_position_, count);
+}
+
void XmlParser::ReadSpacesAndDiscard() {
while (current_position_ < xml_.size() &&
(xml_[current_position_] == ' ' || xml_[current_position_] == '\t' ||
@@ -70,48 +81,104 @@ String XmlParser::ReadAttributeString() {
XmlElementNode* XmlParser::DoParse() {
while (current_position_ < xml_.size()) {
- switch (xml_[current_position_]) {
- case '<': {
- ++current_position_;
+ ReadSpacesAndDiscard();
- if (Read1() == '/') {
- } else {
- ReadSpacesAndDiscard();
+ if (current_position_ == xml_.size()) {
+ break;
+ }
- String tag = ReadIdenitifier();
+ if (ReadWithoutAdvance() == u"<") {
+ current_position_ += 1;
- XmlElementNode* node = new XmlElementNode(tag);
+ if (ReadWithoutAdvance() == u"/") {
+ current_position_ += 1;
- while (true) {
- ReadSpacesAndDiscard();
- if (Read1() == '>') {
- break;
- } else {
- String attribute_name = ReadIdenitifier();
+ ReadSpacesAndDiscard();
+
+ String tag = ReadIdenitifier();
+
+ if (tag != current_->GetTag()) {
+ throw XmlParsingException(u"Tag mismatch.");
+ }
+
+ ReadSpacesAndDiscard();
+
+ if (Read1() != '>') {
+ throw XmlParsingException(u"Expected >.");
+ }
+
+ current_ = current_->GetParent();
+ } else {
+ ReadSpacesAndDiscard();
+
+ String tag = ReadIdenitifier();
+
+ XmlElementNode* node = new XmlElementNode(tag);
- ReadSpacesAndDiscard();
+ bool is_self_closing = false;
- if (Read1() != '=') {
- throw XmlParsingException(u"Expected '='");
- }
+ while (true) {
+ ReadSpacesAndDiscard();
+ auto c = ReadWithoutAdvance();
+ if (c == u">") {
+ current_position_ += 1;
+ break;
+ } else if (c == u"/") {
+ current_position_ += 1;
+
+ if (Read1() != '>') {
+ throw XmlParsingException(u"Expected >.");
+ }
- ReadSpacesAndDiscard();
+ is_self_closing = true;
+ break;
+ } else {
+ String attribute_name = ReadIdenitifier();
- String attribute_value = ReadAttributeString();
+ ReadSpacesAndDiscard();
- node->AddAttribute(attribute_name, attribute_value);
+ if (Read1() != '=') {
+ throw XmlParsingException(u"Expected '='");
}
+
+ ReadSpacesAndDiscard();
+
+ String attribute_value = ReadAttributeString();
+
+ node->AddAttribute(attribute_name, attribute_value);
}
+ }
+
+ current_->AddChild(node);
- current_->AddChild(node);
+ if (!is_self_closing) {
current_ = node;
}
+ }
+
+ } else {
+ String text;
+
+ while (ReadWithoutAdvance() != u"<") {
+ char16_t c = Read1();
- break;
+ text += c;
}
+
+ if (!text.empty()) current_->AddChild(new XmlTextNode(text.TrimEnd()));
}
}
- return pseudo_root_node_;
+ if (current_ != pseudo_root_node_) {
+ throw XmlParsingException(u"Unexpected end of xml");
+ }
+
+ if (pseudo_root_node_->GetChildren().size() != 1 ||
+ pseudo_root_node_->GetChildren()[0]->GetType() !=
+ XmlNode::Type::Element) {
+ throw XmlParsingException(u"Expected 1 element node as root.");
+ }
+
+ return static_cast<XmlElementNode*>(pseudo_root_node_->GetChildren()[0]);
}
} // namespace cru::xml
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 3b9567cd..455aad90 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -7,6 +7,7 @@ target_link_libraries(cru_test_base INTERFACE GTest::gtest GTest::gtest_main)
add_subdirectory(common)
add_subdirectory(platform)
+add_subdirectory(xml)
if(WIN32)
add_subdirectory(win)
diff --git a/test/xml/CMakeLists.txt b/test/xml/CMakeLists.txt
new file mode 100644
index 00000000..a8dfa264
--- /dev/null
+++ b/test/xml/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_executable(cru_xml_test
+ ParserTest.cpp
+)
+target_link_libraries(cru_xml_test PRIVATE cru_xml cru_test_base)
+
+gtest_discover_tests(cru_xml_test)
diff --git a/test/xml/ParserTest.cpp b/test/xml/ParserTest.cpp
new file mode 100644
index 00000000..01098b7c
--- /dev/null
+++ b/test/xml/ParserTest.cpp
@@ -0,0 +1,135 @@
+#include "cru/xml/XmlNode.hpp"
+#include "cru/xml/XmlParser.hpp"
+
+#include <gtest/gtest.h>
+
+using namespace cru::xml;
+
+TEST(CruXmlParserTest, Simple) {
+ XmlParser parser(u"<root></root>");
+ auto n = parser.Parse();
+ ASSERT_EQ(n->GetTag(), u"root");
+ ASSERT_EQ(n->GetAttributes().empty(), true);
+ ASSERT_EQ(n->GetChildren().size(), 0);
+ delete n;
+}
+
+TEST(CruXmlParserTest, SimpleWithAttribute) {
+ XmlParser parser(u"<root a1=\"v1\" a2=\"v2\"></root>");
+ auto n = parser.Parse();
+ ASSERT_EQ(n->GetTag(), u"root");
+ ASSERT_EQ(n->GetAttributes().at(u"a1"), u"v1");
+ ASSERT_EQ(n->GetAttributes().at(u"a2"), u"v2");
+ ASSERT_EQ(n->GetChildren().size(), 0);
+ delete n;
+}
+
+TEST(CruXmlParserTest, SimpleSelfClosing) {
+ XmlParser parser(u"<root a1=\"v1\" a2=\"v2\"/>");
+ auto n = parser.Parse();
+ ASSERT_EQ(n->GetTag(), u"root");
+ ASSERT_EQ(n->GetAttributes().at(u"a1"), u"v1");
+ ASSERT_EQ(n->GetAttributes().at(u"a2"), u"v2");
+ ASSERT_EQ(n->GetChildren().size(), 0);
+ delete n;
+}
+
+TEST(CruXmlParserTest, NestedElement) {
+ XmlParser parser(
+ u"<root><c1><d1></d1></c1><c2><d2></d2><d3></d3></c2></root>");
+ auto n = parser.Parse();
+ ASSERT_EQ(n->GetChildren().size(), 2);
+ ASSERT_EQ(static_cast<XmlElementNode*>(n->GetChildren().at(0))->GetTag(),
+ u"c1");
+ ASSERT_EQ(static_cast<XmlElementNode*>(n->GetChildren().at(1))->GetTag(),
+ u"c2");
+ ASSERT_EQ(static_cast<XmlElementNode*>(n->GetChildren().at(0))
+ ->GetChildren()
+ .size(),
+ 1);
+ ASSERT_EQ(static_cast<XmlElementNode*>(
+ static_cast<XmlElementNode*>(n->GetChildren().at(0))
+ ->GetChildren()
+ .at(0))
+ ->GetTag(),
+ u"d1");
+ ASSERT_EQ(static_cast<XmlElementNode*>(n->GetChildren().at(1))
+ ->GetChildren()
+ .size(),
+ 2);
+ ASSERT_EQ(static_cast<XmlElementNode*>(
+ static_cast<XmlElementNode*>(n->GetChildren().at(1))
+ ->GetChildren()
+ .at(0))
+ ->GetTag(),
+ u"d2");
+ ASSERT_EQ(static_cast<XmlElementNode*>(
+ static_cast<XmlElementNode*>(n->GetChildren().at(1))
+ ->GetChildren()
+ .at(1))
+ ->GetTag(),
+ u"d3");
+ delete n;
+}
+
+TEST(CruXmlParserTest, SimpleText) {
+ XmlParser parser(u"<root>text</root>");
+ auto n = parser.Parse();
+ ASSERT_EQ(n->GetChildren().size(), 1);
+ ASSERT_EQ(static_cast<XmlTextNode*>(n->GetChildren().at(0))->GetText(),
+ u"text");
+ delete n;
+}
+
+TEST(CruXmlParserTest, Whitespace) {
+ XmlParser parser(u"\t\t<root>\n\t\t\ttext test\n\t\t</root>\t\t");
+ auto n = parser.Parse();
+ ASSERT_EQ(n->GetChildren().size(), 1);
+ ASSERT_EQ(static_cast<XmlTextNode*>(n->GetChildren().at(0))->GetText(),
+ u"text test");
+ delete n;
+}
+
+TEST(CruXmlParserTest, Complex) {
+ XmlParser parser(
+ uR"(
+<root a1="v1">
+ <c1>
+ <d1>
+ </d1>
+ </c1>
+ <c2 a2="v2" a3="v3">
+ t1
+ <d2 a4="v4"> t2 </d2>
+ text test
+ <d3></d3>
+ t2
+ </c2>
+</root>
+ )");
+ auto n = parser.Parse();
+ ASSERT_EQ(n->GetAttributes().at(u"a1"), u"v1");
+ ASSERT_EQ(n->GetChildren().size(), 2);
+ ASSERT_EQ(static_cast<XmlElementNode*>(n->GetChildren().at(0))->GetTag(),
+ u"c1");
+ ASSERT_EQ(static_cast<XmlElementNode*>(n->GetChildren().at(0))
+ ->GetChildren()
+ .size(),
+ 1);
+ auto c2 = static_cast<XmlElementNode*>(n->GetChildren().at(1));
+ ASSERT_EQ(c2->GetTag(), u"c2");
+ ASSERT_EQ(c2->GetAttributes().at(u"a2"), u"v2");
+ ASSERT_EQ(c2->GetAttributes().at(u"a3"), u"v3");
+ ASSERT_EQ(static_cast<XmlTextNode*>(c2->GetChildren().at(0))->GetText(),
+ u"t1");
+ auto d2 = static_cast<XmlElementNode*>(c2->GetChildren().at(1));
+ ASSERT_EQ(d2->GetTag(), u"d2");
+ ASSERT_EQ(d2->GetAttributes().at(u"a4"), u"v4");
+ ASSERT_EQ(static_cast<XmlTextNode*>(c2->GetChildren().at(2))->GetText(),
+ u"text test");
+ ASSERT_EQ(static_cast<XmlElementNode*>(c2->GetChildren().at(3))->GetTag(),
+ u"d3");
+ ASSERT_EQ(static_cast<XmlTextNode*>(c2->GetChildren().at(4))->GetText(),
+ u"t2");
+ delete n;
+}