Improve tokenizing to support XML comments

The whole XML comment has to be parsed as one XML tag to support strange
but valid combinations like

  <!---->
  <!--invisible-tag></invisible-tag-->

Fixes #1424
This commit is contained in:
Christian Helmuth 2016-05-28 23:57:18 +02:00
parent 941a71346a
commit cf771c10d6
4 changed files with 147 additions and 26 deletions

View File

@ -90,7 +90,9 @@ class Genode::Token
/**
* Return true if token is valid
*/
operator bool () const { return _start && _len; }
bool valid() const { return _start && _len; }
operator bool () const { return valid(); }
/**
* Access single characters of token
@ -105,6 +107,37 @@ class Genode::Token
*/
Token next() const { return Token(_start + _len, _max_len - _len); }
/**
* Return next token after delimiter
*/
Token next_after(char const *delim)
{
size_t const len = strlen(delim);
if (!valid() || len > _max_len)
return Token();
char const *s = _start;
for (size_t rest = _max_len; rest >= len; --rest, ++s)
if (strcmp(s, delim, len) == 0)
return Token(s, rest).next();
return Token();
}
/**
* Return true if token starts with pattern
*/
bool matches(char const *pattern)
{
size_t const len = strlen(pattern);
if (!valid() || len > _max_len)
return false;
return strcmp(pattern, _start, len) == 0;
}
/**
* Return next non-whitespace token
*/

View File

@ -346,20 +346,6 @@ class Genode::Xml_node
Token _next; /* token following the comment */
bool _valid; /* true if comment is well formed */
/**
* Check if token sequence matches specified character sequence
*
* \param t start of token sequence
* \param s null-terminated character sequence
*/
static bool _match(Token t, const char *s)
{
for (int i = 0; s[i]; t = t.next(), i++)
if (t[0] != s[i])
return false;
return true;
}
public:
/**
@ -369,18 +355,16 @@ class Genode::Xml_node
*/
Comment(Token t) : _valid(false)
{
/* check for comment-start tag */
if (!_match(t, "<!--"))
/* check for comment start */
if (!t.matches("<!--"))
return;
/* search for comment-end tag */
for ( ; t && !_match(t, "-->"); t = t.next());
/* skip four single characters for "<!--" */
t = t.next().next().next().next();
if (t.type() == Token::END)
return;
_next = t.next().next().next();
_valid = true;
/* find token after comment delimiter */
_next = t.next_after("-->");
_valid = _next.valid();
}
/**

View File

@ -23,4 +23,94 @@ append qemu_args "-nographic -m 64"
run_genode_until {.*child "test-xml_node" exited with exit value 0.*\n} 10
puts "Test succeeded"
# pay only attention to the output of init and its children
grep_output {^\[init \-\> test\-xml_node\]}
trim_lines
compare_output_to {
[init -> test-xml_node] --- XML-token test ---
[init -> test-xml_node] token type="SINGLECHAR", len=1, content="<"
[init -> test-xml_node] token type="IDENT", len=6, content="config"
[init -> test-xml_node] token type="SINGLECHAR", len=1, content=">"
[init -> test-xml_node] token type="WHITESPACE", len=2, content=" "
[init -> test-xml_node] token type="IDENT", len=9, content="sometext1"
[init -> test-xml_node] token type="WHITESPACE", len=2, content=" "
[init -> test-xml_node] token type="SINGLECHAR", len=1, content="<"
[init -> test-xml_node] token type="IDENT", len=7, content="program"
[init -> test-xml_node] token type="WHITESPACE", len=1, content=" "
[init -> test-xml_node] token type="IDENT", len=4, content="attr"
[init -> test-xml_node] token type="SINGLECHAR", len=1, content="="
[init -> test-xml_node] token type="STRING", len=6, content=""abcd""
[init -> test-xml_node] token type="SINGLECHAR", len=1, content="/"
[init -> test-xml_node] token type="SINGLECHAR", len=1, content=">"
[init -> test-xml_node] token type="WHITESPACE", len=2, content=" "
[init -> test-xml_node] token type="IDENT", len=9, content="sometext2"
[init -> test-xml_node] token type="WHITESPACE", len=2, content=" "
[init -> test-xml_node] token type="SINGLECHAR", len=1, content="<"
[init -> test-xml_node] token type="IDENT", len=7, content="program"
[init -> test-xml_node] token type="SINGLECHAR", len=1, content=">"
[init -> test-xml_node] token type="IDENT", len=9, content="inProgram"
[init -> test-xml_node] token type="SINGLECHAR", len=1, content="<"
[init -> test-xml_node] token type="SINGLECHAR", len=1, content="/"
[init -> test-xml_node] token type="IDENT", len=7, content="program"
[init -> test-xml_node] token type="SINGLECHAR", len=1, content=">"
[init -> test-xml_node] token type="WHITESPACE", len=2, content=" "
[init -> test-xml_node] token type="IDENT", len=9, content="sometext3"
[init -> test-xml_node] token type="SINGLECHAR", len=1, content="<"
[init -> test-xml_node] token type="SINGLECHAR", len=1, content="/"
[init -> test-xml_node] token type="IDENT", len=6, content="config"
[init -> test-xml_node] token type="SINGLECHAR", len=1, content=">"
[init -> test-xml_node] --- XML-parser test ---
[init -> test-xml_node] -- Test valid XML structure --
[init -> test-xml_node] XML node: name = "config", number of subnodes = 3
[init -> test-xml_node] XML node: name = "program", number of subnodes = 2
[init -> test-xml_node] XML node: name = "filename", leaf content = "init"
[init -> test-xml_node] XML node: name = "quota", leaf content = "16M"
[init -> test-xml_node] XML node: name = "program", number of subnodes = 2
[init -> test-xml_node] XML node: name = "filename", leaf content = "timer"
[init -> test-xml_node] XML node: name = "quota", leaf content = "64K"
[init -> test-xml_node] XML node: name = "program", number of subnodes = 2
[init -> test-xml_node] XML node: name = "filename", leaf content = "framebuffer"
[init -> test-xml_node] XML node: name = "quota", leaf content = "8M"
[init -> test-xml_node] -- Test invalid XML structure (broken tag) --
[init -> test-xml_node] XML node: name = "config", number of subnodes = 3
[init -> test-xml_node] XML node: name = "program", number of subnodes = 2
[init -> test-xml_node] XML node: name = "filename", leaf content = "init"
[init -> test-xml_node] XML node: name = "quota", leaf content = "16M"
[init -> test-xml_node] XML node: name = "program", number of subnodes = 2
[init -> test-xml_node] XML node: name = "filename", leaf content = "timer"
[init -> test-xml_node] XML node: name = "quota", leaf content = "64K"
[init -> test-xml_node] XML node: name = "program", number of subnodes = 2
[init -> test-xml_node] XML node: name = "filename", leaf content = "framebuffer"
[init -> test-xml_node] XML node: name = "quota", leaf content = "8M"
[init -> test-xml_node] -- Test invalid XML structure (truncated) --
[init -> test-xml_node] string has invalid XML syntax
[init -> test-xml_node] -- Test invalid XML structure (truncated comment) --
[init -> test-xml_node] string has invalid XML syntax
[init -> test-xml_node] -- Test invalid XML structure (unfinished string) --
[init -> test-xml_node] string has invalid XML syntax
[init -> test-xml_node] -- Test node access by key --
[init -> test-xml_node] content of sub node "filename" = "init"
[init -> test-xml_node] content of sub node "quota" = "16M"
[init -> test-xml_node] sub node "info" is not defined
[init -> test-xml_node] -- Test access to XML attributes --
[init -> test-xml_node] XML node: name = "config", number of subnodes = 3
[init -> test-xml_node] attribute name="priolevels", value="4"
[init -> test-xml_node] XML node: name = "program", number of subnodes = 2
[init -> test-xml_node] XML node: name = "filename", leaf content = "init"
[init -> test-xml_node] XML node: name = "quota", leaf content = "16M"
[init -> test-xml_node] XML node: name = "single-tag", leaf content = ""
[init -> test-xml_node] XML node: name = "single-tag-with-attr", leaf content = ""
[init -> test-xml_node] attribute name="name", value="ein_name"
[init -> test-xml_node] attribute name="quantum", value="2K"
[init -> test-xml_node] -- Test parsing XML with nodes mixed with text --
[init -> test-xml_node] XML node: name = "config", number of subnodes = 2
[init -> test-xml_node] XML node: name = "program", leaf content = ""
[init -> test-xml_node] attribute name="attr", value="abcd"
[init -> test-xml_node] XML node: name = "program", leaf content = "inProgram"
[init -> test-xml_node] -- Test parsing XML with comments --
[init -> test-xml_node] XML node: name = "config", number of subnodes = 2
[init -> test-xml_node] XML node: name = "visible-tag", leaf content = ""
[init -> test-xml_node] XML node: name = "visible-tag", leaf content = ""
[init -> test-xml_node] --- End of XML-parser test ---
}

View File

@ -126,6 +126,17 @@ static const char *xml_test_text_between_nodes =
" sometext3"
"</config>";
/* strange but valid XML comments */
static const char *xml_test_comments =
"<config>"
"<visible-tag/>"
"<!---->"
"<!-- <invisible-tag/> -->"
"<!--<invisible-tag/>-->"
"<!--invisible-tag></invisible-tag-->"
"<visible-tag/>"
"</config>";
/******************
** Test program **
@ -267,7 +278,7 @@ static void print_xml_info(const char *xml_string)
int main()
{
printf("--- XML-token test ---\n");
print_xml_tokens<Scanner_policy_identifier_with_underline>(xml_test_text_between_nodes);
print_xml_tokens<Scanner_policy_identifier_with_underline>(xml_test_text_between_nodes);
printf("--- XML-parser test ---\n");
@ -298,6 +309,9 @@ int main()
printf("-- Test parsing XML with nodes mixed with text --\n");
print_xml_info(xml_test_text_between_nodes);
printf("-- Test parsing XML with comments --\n");
print_xml_info(xml_test_comments);
printf("--- End of XML-parser test ---\n");
return 0;
}