From 6ecbc419c0234bf880947708cc7e3aa2afe2d503 Mon Sep 17 00:00:00 2001 From: Norman Feske Date: Fri, 25 Sep 2015 15:49:33 +0200 Subject: [PATCH] Xml_node, Xml_generator: sanitize node content Fixes #1698 --- repos/os/include/util/xml_generator.h | 43 +++++++++- repos/os/include/util/xml_node.h | 101 ++++++++++++++++++++++++ repos/os/src/test/xml_generator/main.cc | 32 ++++++++ 3 files changed, 174 insertions(+), 2 deletions(-) diff --git a/repos/os/include/util/xml_generator.h b/repos/os/include/util/xml_generator.h index a35313c23..d1a841874 100644 --- a/repos/os/include/util/xml_generator.h +++ b/repos/os/include/util/xml_generator.h @@ -87,6 +87,30 @@ class Genode::Xml_generator */ void append(char const *src) { append(src, strlen(src)); } + /** + * Append character, sanitize it if needed + */ + void append_sanitized(char const c) + { + switch (c) { + case 0: append("�"); break; + case '>': append(">"); break; + case '<': append("<"); break; + case '&': append("&"); break; + case '"': append("""); break; + case '\'': append("'"); break; + default: append(c); break; + } + } + + /** + * Append character buffer, sanitize characters if needed + */ + void append_sanitized(char const *src, size_t len) + { + for (; len--; append_sanitized(*src++)); + } + /** * Return unused part of the buffer */ @@ -192,7 +216,7 @@ class Genode::Xml_generator dst.append(' '); dst.append(name); dst.append("=\""); - dst.append(value); + dst.append(value, strlen(value)); dst.append("\""); _attr_offset += gap; @@ -201,9 +225,14 @@ class Genode::Xml_generator void append(char const *src, size_t src_len) { Out_buffer content_buffer = _content_buffer(false); - content_buffer.append(src, src_len); + _commit_content(content_buffer); + } + void append_sanitized(char const *src, size_t src_len) + { + Out_buffer content_buffer = _content_buffer(false); + content_buffer.append_sanitized(src, src_len); _commit_content(content_buffer); } @@ -300,6 +329,16 @@ class Genode::Xml_generator _curr_node->append(str, str_len == ~0UL ? strlen(str) : str_len); } + /** + * Append sanitized content to XML node + * + * This method must not be followed by calls of 'attribute'. + */ + void append_sanitized(char const *str, size_t str_len = ~0UL) + { + _curr_node->append_sanitized(str, str_len == ~0UL ? strlen(str) : str_len); + } + size_t used() const { return _out_buffer.used(); } }; diff --git a/repos/os/include/util/xml_node.h b/repos/os/include/util/xml_node.h index df1a8c344..2a61be0f1 100644 --- a/repos/os/include/util/xml_node.h +++ b/repos/os/include/util/xml_node.h @@ -396,6 +396,66 @@ class Genode::Xml_node Token next_token() const { return _next; } }; + /** + * Helper class to decode XML character entities + */ + struct Decoded_character + { + char character = 0; + size_t encoded_len = 1; + + struct Translation + { + char character; + char const *seq; + size_t seq_len; + }; + + static Translation translate(char const *src, size_t src_len) + { + enum { NUM = 6 }; + static Translation translations[NUM] = { + { '>', ">", 4 }, + { '<', "<", 4 }, + { '&', "&", 5 }, + { '"', """, 6 }, + { '\'', "'", 6 }, + { 0, "�", 6 } + }; + + if (src_len == 0) + return { 0, nullptr, 0 }; + + for (unsigned i = 0; i < NUM; i++) { + + Translation const &translation = translations[i]; + + if (src_len < translation.seq_len + || memcmp(src, translation.seq, translation.seq_len)) + continue; + + /* translation matches */ + return translation; + } + + /* sequence is not known, pass single character as is */ + return { *src, nullptr, 1 }; + } + + Decoded_character(char const *src, size_t src_len) + { + if (*src != '&' || src_len == 0) { + character = *src; + return; + } + + Translation const translation = translate(src, src_len); + + character = translation.character; + encoded_len = translation.seq_len; + } + }; + const char *_addr; /* first character of XML data */ size_t _max_len; /* length of XML data in characters */ int _num_sub_nodes; /* number of immediate sub nodes */ @@ -605,6 +665,47 @@ class Genode::Xml_node return _end_tag.token().start() - content_addr(); } + /** + * Export decoded node content from XML node + * + * \param dst destination buffer + * \param dst_len size of destination buffer in bytes + * \return number of bytes written to the destination buffer + * + * This function transforms XML character entities into their + * respective characters. + */ + size_t decoded_content(char *dst, size_t dst_len) const + { + size_t result_len = 0; + char const *src = content_base(); + size_t src_len = content_size(); + + for (; dst_len > 1 && src_len; result_len++) { + + Decoded_character const decoded_character(src, src_len); + + *dst++ = decoded_character.character; + + src += decoded_character.encoded_len; + src_len -= decoded_character.encoded_len; + } + + return result_len; + } + + /** + * Read decoded node content as Genode::String + */ + template + STRING decoded_content() const + { + char buf[STRING::capacity() + 1]; + size_t const len = decoded_content(buf, sizeof(buf)); + buf[min(len, STRING::capacity())] = 0; + return STRING(buf); + } + /** * Return the number of the XML node's immediate sub nodes */ diff --git a/repos/os/src/test/xml_generator/main.cc b/repos/os/src/test/xml_generator/main.cc index 2472e5000..8cf4c925f 100644 --- a/repos/os/src/test/xml_generator/main.cc +++ b/repos/os/src/test/xml_generator/main.cc @@ -13,6 +13,7 @@ #include #include +#include using Genode::size_t; @@ -68,6 +69,37 @@ int main(int argc, char **argv) catch (Genode::Xml_generator::Buffer_exceeded) { printf("buffer exceeded (expected error)\n"); } + /* + * Test the sanitizing of XML node content + */ + { + /* generate pattern that contains all possible byte values */ + char pattern[256]; + for (unsigned i = 0; i < sizeof(pattern); i++) + pattern[i] = i; + + /* generate XML with the pattern as content */ + Genode::Xml_generator xml(dst, sizeof(dst), "data", [&] () { + xml.append_sanitized(pattern, sizeof(pattern)); }); + + /* parse the generated XML data */ + Genode::Xml_node node(dst); + + /* obtain decoded node content */ + char decoded[sizeof(dst)]; + size_t const decoded_len = node.decoded_content(decoded, sizeof(decoded)); + + /* compare result with original pattern */ + if (decoded_len != sizeof(pattern)) { + printf("decoded content has unexpected length %zd\n", decoded_len); + return 1; + } + if (Genode::memcmp(decoded, pattern, sizeof(pattern))) { + printf("decoded content does not match original pattern\n"); + return 1; + } + } + printf("--- XML generator test finished ---\n"); return 0;