From d738d5de2a38c7a423a8302d9950a9016c2c47f6 Mon Sep 17 00:00:00 2001 From: nyamatongwe Date: Wed, 16 May 2012 17:32:00 +1000 Subject: Treat 66 non-characters *FFFE, *FFFF, FDD0 .. FDEF as errors and display the individual bytes. See The Unicode Standard (version 6.1) section 16.7 Noncharacters. --- src/Editor.cxx | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src') diff --git a/src/Editor.cxx b/src/Editor.cxx index 1a591b325..b46f8a6b7 100644 --- a/src/Editor.cxx +++ b/src/Editor.cxx @@ -2073,6 +2073,10 @@ bool BadUTF(const char *s, int len, int &trailBytes) { if (len < 4) return true; if (GoodTrailByte(us[1]) && GoodTrailByte(us[2]) && GoodTrailByte(us[3])) { + if (((us[1] & 0xf) == 0xf) && (us[2] == 0xbf) && ((us[3] == 0xbe) || (us[3] == 0xbf))) { + // *FFFE or *FFFF non-character + return true; + } if (*us == 0xf4) { // Check if encoding a value beyond the last Unicode character 10FFFF if (us[1] > 0x8f) { @@ -2116,6 +2120,10 @@ bool BadUTF(const char *s, int len, int &trailBytes) { // U+FFFF return true; } + if ((*us == 0xef) && (us[1] == 0xb7) && (((us[2] & 0xf0) == 0x90) || ((us[2] & 0xf0) == 0xa0))) { + // U+FDD0 .. U+FDEF + return true; + } trailBytes = 2; return false; } else { -- cgit v1.2.3