aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authornyamatongwe <unknown>2012-05-16 17:32:00 +1000
committernyamatongwe <unknown>2012-05-16 17:32:00 +1000
commit489edabd16dfa6b1b985cb5fcde83025f5e6837c (patch)
treea08cfc00b55813f29da393e2f67cb6c120524565
parent0e5a82e17f6a5e38b539735b3b51e567a04601bb (diff)
downloadscintilla-mirror-489edabd16dfa6b1b985cb5fcde83025f5e6837c.tar.gz
Treat 66 non-characters *FFFE, *FFFF, FDD0 .. FDEF as errors and display the individual bytes.
See The Unicode Standard (version 6.1) section 16.7 Noncharacters.
-rw-r--r--src/Editor.cxx8
1 files changed, 8 insertions, 0 deletions
diff --git a/src/Editor.cxx b/src/Editor.cxx
index 1a591b325..b46f8a6b7 100644
--- a/src/Editor.cxx
+++ b/src/Editor.cxx
@@ -2073,6 +2073,10 @@ bool BadUTF(const char *s, int len, int &trailBytes) {
if (len < 4)
return true;
if (GoodTrailByte(us[1]) && GoodTrailByte(us[2]) && GoodTrailByte(us[3])) {
+ if (((us[1] & 0xf) == 0xf) && (us[2] == 0xbf) && ((us[3] == 0xbe) || (us[3] == 0xbf))) {
+ // *FFFE or *FFFF non-character
+ return true;
+ }
if (*us == 0xf4) {
// Check if encoding a value beyond the last Unicode character 10FFFF
if (us[1] > 0x8f) {
@@ -2116,6 +2120,10 @@ bool BadUTF(const char *s, int len, int &trailBytes) {
// U+FFFF
return true;
}
+ if ((*us == 0xef) && (us[1] == 0xb7) && (((us[2] & 0xf0) == 0x90) || ((us[2] & 0xf0) == 0xa0))) {
+ // U+FDD0 .. U+FDEF
+ return true;
+ }
trailBytes = 2;
return false;
} else {