utf8: non-character U+fffe as invalid codepoint

Unicode non-characters [1] are guaranteed to never be used for a
character. The formerly used U+fffd however is a valid character - the
replacement character [2] correctly displayed by Qt5 as <?>.

[1] https://en.wikipedia.org/wiki/Universal_Character_Set_characters#Non-characters
[2] https://en.wikipedia.org/wiki/Specials_(Unicode_block)#Replacement_character

Issue #3483
This commit is contained in:
Christian Helmuth 2019-08-23 12:08:58 +02:00
parent 4967166811
commit a97b8043b5
3 changed files with 3 additions and 3 deletions

View File

@ -368,7 +368,7 @@ void Terminal::Main::_handle_input()
if (special_sequence)
_read_buffer.add(special_sequence);
else
else if (codepoint.valid())
_read_buffer.add(codepoint);
});
});

View File

@ -25,7 +25,7 @@ namespace Genode {
struct Genode::Codepoint
{
static constexpr uint32_t INVALID = 0xfffd;
static constexpr uint32_t INVALID = 0xfffe;
uint32_t value;

View File

@ -107,7 +107,7 @@ void Component::construct(Genode::Env &env)
Expect_codepoint(0x3b5));
/* 2.1.1 1 byte (U-00000000) */
test("<EFBFBD>", Expect_invalid(), Expect_end());
test("\x00", Expect_codepoint(0x00), Expect_end());
/* 2.1.2 2 bytes (U-00000080) */
test("\xc2\x80", Expect_codepoint(0x80), Expect_end());