aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorNeil <nyamatongwe@gmail.com>2017-03-02 09:41:01 +1100
committerNeil <nyamatongwe@gmail.com>2017-03-02 09:41:01 +1100
commit14c9bcc7dc7d5a8577136baad5b43c6bc311312d (patch)
tree4e842ee9bb84c731c37ae9ea5844c6cf5c8ecad2 /src
parente15a9f49e1c9224d1c81bc5ef1c14f008ee2d87c (diff)
downloadscintilla-mirror-14c9bcc7dc7d5a8577136baad5b43c6bc311312d.tar.gz
Fix potential problems with IME on Cocoa when document contains invalid UTF-8.
Diffstat (limited to 'src')
-rw-r--r--src/UniConversion.cxx23
-rw-r--r--src/UniConversion.h1
2 files changed, 24 insertions, 0 deletions
diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx
index 4da9e102a..d0028d65e 100644
--- a/src/UniConversion.cxx
+++ b/src/UniConversion.cxx
@@ -8,6 +8,7 @@
#include <stdlib.h>
#include <stdexcept>
+#include <string>
#include "UniConversion.h"
@@ -304,6 +305,28 @@ int UTF8DrawBytes(const unsigned char *us, int len) {
return (utf8StatusNext & UTF8MaskInvalid) ? 1 : (utf8StatusNext & UTF8MaskWidth);
}
+// Replace invalid bytes in UTF-8 with the replacement character
+std::string FixInvalidUTF8(const std::string &text) {
+ std::string result;
+ const unsigned char *us = reinterpret_cast<const unsigned char *>(text.c_str());
+ size_t remaining = text.size();
+ while (remaining > 0) {
+ const int utf8Status = UTF8Classify(us, static_cast<int>(remaining));
+ if (utf8Status & UTF8MaskInvalid) {
+ // Replacement character 0xFFFD = UTF8:"efbfbd".
+ result.append("\xef\xbf\xbd");
+ us++;
+ remaining--;
+ } else {
+ const int len = utf8Status&UTF8MaskWidth;
+ result.append(reinterpret_cast<const char *>(us), len);
+ us += len;
+ remaining -= len;
+ }
+ }
+ return result;
+}
+
#ifdef SCI_NAMESPACE
}
#endif
diff --git a/src/UniConversion.h b/src/UniConversion.h
index aeb13f0c2..c5867a664 100644
--- a/src/UniConversion.h
+++ b/src/UniConversion.h
@@ -23,6 +23,7 @@ size_t UTF16Length(const char *s, size_t len);
size_t UTF16FromUTF8(const char *s, size_t len, wchar_t *tbuf, size_t tlen);
unsigned int UTF32FromUTF8(const char *s, unsigned int len, unsigned int *tbuf, unsigned int tlen);
unsigned int UTF16FromUTF32Character(unsigned int val, wchar_t *tbuf);
+std::string FixInvalidUTF8(const std::string &text);
extern int UTF8BytesOfLead[256];
void UTF8BytesOfLeadInitialise();