diff options
author | Neil <nyamatongwe@gmail.com> | 2017-03-02 09:41:01 +1100 |
---|---|---|
committer | Neil <nyamatongwe@gmail.com> | 2017-03-02 09:41:01 +1100 |
commit | 14c9bcc7dc7d5a8577136baad5b43c6bc311312d (patch) | |
tree | 4e842ee9bb84c731c37ae9ea5844c6cf5c8ecad2 /src/UniConversion.cxx | |
parent | e15a9f49e1c9224d1c81bc5ef1c14f008ee2d87c (diff) | |
download | scintilla-mirror-14c9bcc7dc7d5a8577136baad5b43c6bc311312d.tar.gz |
Fix potential problems with IME on Cocoa when document contains invalid UTF-8.
Diffstat (limited to 'src/UniConversion.cxx')
-rw-r--r-- | src/UniConversion.cxx | 23 |
1 files changed, 23 insertions, 0 deletions
diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx index 4da9e102a..d0028d65e 100644 --- a/src/UniConversion.cxx +++ b/src/UniConversion.cxx @@ -8,6 +8,7 @@ #include <stdlib.h> #include <stdexcept> +#include <string> #include "UniConversion.h" @@ -304,6 +305,28 @@ int UTF8DrawBytes(const unsigned char *us, int len) { return (utf8StatusNext & UTF8MaskInvalid) ? 1 : (utf8StatusNext & UTF8MaskWidth); } +// Replace invalid bytes in UTF-8 with the replacement character +std::string FixInvalidUTF8(const std::string &text) { + std::string result; + const unsigned char *us = reinterpret_cast<const unsigned char *>(text.c_str()); + size_t remaining = text.size(); + while (remaining > 0) { + const int utf8Status = UTF8Classify(us, static_cast<int>(remaining)); + if (utf8Status & UTF8MaskInvalid) { + // Replacement character 0xFFFD = UTF8:"efbfbd". + result.append("\xef\xbf\xbd"); + us++; + remaining--; + } else { + const int len = utf8Status&UTF8MaskWidth; + result.append(reinterpret_cast<const char *>(us), len); + us += len; + remaining -= len; + } + } + return result; +} + #ifdef SCI_NAMESPACE } #endif |