diff options
author | Neil <nyamatongwe@gmail.com> | 2018-06-01 09:59:25 +1000 |
---|---|---|
committer | Neil <nyamatongwe@gmail.com> | 2018-06-01 09:59:25 +1000 |
commit | 2aaf735e921f9676f520c5423d018441a0a83c57 (patch) | |
tree | 26e99dabfbd122a1056221f410d353f3bc5da705 /src | |
parent | d77d42af4f0d64e85f2860804652d4a976f23e54 (diff) | |
download | scintilla-mirror-2aaf735e921f9676f520c5423d018441a0a83c57.tar.gz |
Add function to find a UTF-16 position in a UTF-8 string.
Diffstat (limited to 'src')
-rw-r--r-- | src/UniConversion.cxx | 12 | ||||
-rw-r--r-- | src/UniConversion.h | 1 |
2 files changed, 13 insertions, 0 deletions
diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx index 1287aa612..3b7472638 100644 --- a/src/UniConversion.cxx +++ b/src/UniConversion.cxx @@ -37,6 +37,18 @@ size_t UTF8Length(std::wstring_view wsv) { return len; } +size_t UTF8PositionFromUTF16Position(std::string_view u8Text, size_t positionUTF16) noexcept { + size_t positionUTF8 = 0; + for (size_t lengthUTF16 = 0; (positionUTF8 < u8Text.length()) && (lengthUTF16 < positionUTF16);) { + const unsigned char uch = u8Text[positionUTF8]; + const unsigned int byteCount = UTF8BytesOfLead[uch]; + lengthUTF16 += UTF16LengthFromUTF8ByteCount(byteCount); + positionUTF8 += byteCount; + } + + return positionUTF8; +} + void UTF8FromUTF16(std::wstring_view wsv, char *putf, size_t len) { size_t k = 0; for (size_t i = 0; i < wsv.length() && wsv[i];) { diff --git a/src/UniConversion.h b/src/UniConversion.h index 1b84b8f81..e2be40776 100644 --- a/src/UniConversion.h +++ b/src/UniConversion.h @@ -15,6 +15,7 @@ const int UTF8MaxBytes = 4; const int unicodeReplacementChar = 0xFFFD; size_t UTF8Length(std::wstring_view wsv); +size_t UTF8PositionFromUTF16Position(std::string_view u8Text, size_t positionUTF16) noexcept; void UTF8FromUTF16(std::wstring_view wsv, char *putf, size_t len); void UTF8FromUTF32Character(int uch, char *putf); size_t UTF16Length(std::string_view sv); |