diff options
| -rw-r--r-- | src/Document.cxx | 9 | ||||
| -rw-r--r-- | src/Editor.cxx | 2 | ||||
| -rw-r--r-- | src/UniConversion.cxx | 62 | ||||
| -rw-r--r-- | src/UniConversion.h | 10 | ||||
| -rw-r--r-- | test/unit/testUniConversion.cxx | 48 | ||||
| -rw-r--r-- | win32/PlatWin.cxx | 12 | ||||
| -rw-r--r-- | win32/ScintillaWin.cxx | 46 | 
7 files changed, 97 insertions, 92 deletions
| diff --git a/src/Document.cxx b/src/Document.cxx index fbe387f6d..bcc368a68 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -2939,12 +2939,13 @@ Sci::Position Cxx11RegexFindText(const Document *doc, Sci::Position minPos, Sci:  		bool matched = false;  		if (SC_CP_UTF8 == doc->dbcsCodePage) { -			const size_t lenS = strlen(s); -			std::vector<wchar_t> ws(lenS + 1); +			const std::string_view sv(s); +			const size_t lenS = sv.length(); +			std::vector<wchar_t> ws(sv.length() + 1);  #if WCHAR_T_IS_16 -			const size_t outLen = UTF16FromUTF8(s, lenS, &ws[0], lenS); +			const size_t outLen = UTF16FromUTF8(sv, &ws[0], lenS);  #else -			const size_t outLen = UTF32FromUTF8(s, lenS, reinterpret_cast<unsigned int *>(&ws[0]), lenS); +			const size_t outLen = UTF32FromUTF8(sv, reinterpret_cast<unsigned int *>(&ws[0]), lenS);  #endif  			ws[outLen] = 0;  			std::wregex regexp; diff --git a/src/Editor.cxx b/src/Editor.cxx index 36ebc5cea..7a61d503a 100644 --- a/src/Editor.cxx +++ b/src/Editor.cxx @@ -1957,7 +1957,7 @@ void Editor::AddCharUTF(const char *s, unsigned int len, bool treatAsDBCS) {  			// characters representing themselves.  		} else {  			unsigned int utf32[1] = { 0 }; -			UTF32FromUTF8(s, len, utf32, ELEMENTS(utf32)); +			UTF32FromUTF8(std::string_view(s, len), utf32, ELEMENTS(utf32));  			byte = utf32[0];  		}  		NotifyChar(byte); diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx index a6830f950..1287aa612 100644 --- a/src/UniConversion.cxx +++ b/src/UniConversion.cxx @@ -17,10 +17,10 @@ using namespace Scintilla;  namespace Scintilla { -size_t UTF8Length(const wchar_t *uptr, size_t tlen) { +size_t UTF8Length(std::wstring_view wsv) {  	size_t len = 0; -	for (size_t i = 0; i < tlen && uptr[i];) { -		const unsigned int uch = uptr[i]; +	for (size_t i = 0; i < wsv.length() && wsv[i];) { +		const unsigned int uch = wsv[i];  		if (uch < 0x80) {  			len++;  		} else if (uch < 0x800) { @@ -37,10 +37,10 @@ size_t UTF8Length(const wchar_t *uptr, size_t tlen) {  	return len;  } -void UTF8FromUTF16(const wchar_t *uptr, size_t tlen, char *putf, size_t len) { +void UTF8FromUTF16(std::wstring_view wsv, char *putf, size_t len) {  	size_t k = 0; -	for (size_t i = 0; i < tlen && uptr[i];) { -		const unsigned int uch = uptr[i]; +	for (size_t i = 0; i < wsv.length() && wsv[i];) { +		const unsigned int uch = wsv[i];  		if (uch < 0x80) {  			putf[k++] = static_cast<char>(uch);  		} else if (uch < 0x800) { @@ -50,7 +50,7 @@ void UTF8FromUTF16(const wchar_t *uptr, size_t tlen, char *putf, size_t len) {  			(uch <= SURROGATE_TRAIL_LAST)) {  			// Half a surrogate pair  			i++; -			const unsigned int xch = 0x10000 + ((uch & 0x3ff) << 10) + (uptr[i] & 0x3ff); +			const unsigned int xch = 0x10000 + ((uch & 0x3ff) << 10) + (wsv[i] & 0x3ff);  			putf[k++] = static_cast<char>(0xF0 | (xch >> 18));  			putf[k++] = static_cast<char>(0x80 | ((xch >> 12) & 0x3f));  			putf[k++] = static_cast<char>(0x80 | ((xch >> 6) & 0x3f)); @@ -86,14 +86,14 @@ void UTF8FromUTF32Character(int uch, char *putf) {  	putf[k] = '\0';  } -size_t UTF16Length(const char *s, size_t len) { +size_t UTF16Length(std::string_view sv) {  	size_t ulen = 0; -	for (size_t i = 0; i < len;) { -		const unsigned char ch = s[i]; +	for (size_t i = 0; i<sv.length();) { +		const unsigned char ch = sv[i];  		const unsigned int byteCount = UTF8BytesOfLead[ch];  		const unsigned int utf16Len = UTF16LengthFromUTF8ByteCount(byteCount);  		i += byteCount; -		ulen += (i > len) ? 1 : utf16Len; +		ulen += (i > sv.length()) ? 1 : utf16Len;  	}  	return ulen;  } @@ -104,14 +104,14 @@ constexpr unsigned char TrailByteValue(unsigned char c) {  	return c & 0b0011'1111;  } -size_t UTF16FromUTF8(const char *s, size_t len, wchar_t *tbuf, size_t tlen) { +size_t UTF16FromUTF8(std::string_view sv, wchar_t *tbuf, size_t tlen) {  	size_t ui = 0; -	for (size_t i = 0; i < len;) { -		unsigned char ch = s[i]; +	for (size_t i = 0; i < sv.length();) { +		unsigned char ch = sv[i];  		const unsigned int byteCount = UTF8BytesOfLead[ch];  		unsigned int value; -		if (i + byteCount > len) { +		if (i + byteCount > sv.length()) {  			// Trying to read past end but still have space to write  			if (ui < tlen) {  				tbuf[ui] = ch; @@ -132,26 +132,26 @@ size_t UTF16FromUTF8(const char *s, size_t len, wchar_t *tbuf, size_t tlen) {  			break;  		case 2:  			value = (ch & 0x1F) << 6; -			ch = s[i++]; +			ch = sv[i++];  			value += TrailByteValue(ch);  			tbuf[ui] = static_cast<wchar_t>(value);  			break;  		case 3:  			value = (ch & 0xF) << 12; -			ch = s[i++]; +			ch = sv[i++];  			value += (TrailByteValue(ch) << 6); -			ch = s[i++]; +			ch = sv[i++];  			value += TrailByteValue(ch);  			tbuf[ui] = static_cast<wchar_t>(value);  			break;  		default:  			// Outside the BMP so need two surrogates  			value = (ch & 0x7) << 18; -			ch = s[i++]; +			ch = sv[i++];  			value += TrailByteValue(ch) << 12; -			ch = s[i++]; +			ch = sv[i++];  			value += TrailByteValue(ch) << 6; -			ch = s[i++]; +			ch = sv[i++];  			value += TrailByteValue(ch);  			tbuf[ui] = static_cast<wchar_t>(((value - 0x10000) >> 10) + SURROGATE_LEAD_FIRST);  			ui++; @@ -163,14 +163,14 @@ size_t UTF16FromUTF8(const char *s, size_t len, wchar_t *tbuf, size_t tlen) {  	return ui;  } -size_t UTF32FromUTF8(const char *s, size_t len, unsigned int *tbuf, size_t tlen) { +size_t UTF32FromUTF8(std::string_view sv, unsigned int *tbuf, size_t tlen) {  	size_t ui = 0; -	for (size_t i = 0; i < len;) { -		unsigned char ch = s[i]; +	for (size_t i = 0; i < sv.length();) { +		unsigned char ch = sv[i];  		const unsigned int byteCount = UTF8BytesOfLead[ch];  		unsigned int value; -		if (i + byteCount > len) { +		if (i + byteCount > sv.length()) {  			// Trying to read past end but still have space to write  			if (ui < tlen) {  				tbuf[ui] = ch; @@ -190,23 +190,23 @@ size_t UTF32FromUTF8(const char *s, size_t len, unsigned int *tbuf, size_t tlen)  			break;  		case 2:  			value = (ch & 0x1F) << 6; -			ch = s[i++]; +			ch = sv[i++];  			value += TrailByteValue(ch);  			break;  		case 3:  			value = (ch & 0xF) << 12; -			ch = s[i++]; +			ch = sv[i++];  			value += TrailByteValue(ch) << 6; -			ch = s[i++]; +			ch = sv[i++];  			value += TrailByteValue(ch);  			break;  		default:  			value = (ch & 0x7) << 18; -			ch = s[i++]; +			ch = sv[i++];  			value += TrailByteValue(ch) << 12; -			ch = s[i++]; +			ch = sv[i++];  			value += TrailByteValue(ch) << 6; -			ch = s[i++]; +			ch = sv[i++];  			value += TrailByteValue(ch);  			break;  		} diff --git a/src/UniConversion.h b/src/UniConversion.h index 4cdfe1fac..1b84b8f81 100644 --- a/src/UniConversion.h +++ b/src/UniConversion.h @@ -14,12 +14,12 @@ const int UTF8MaxBytes = 4;  const int unicodeReplacementChar = 0xFFFD; -size_t UTF8Length(const wchar_t *uptr, size_t tlen); -void UTF8FromUTF16(const wchar_t *uptr, size_t tlen, char *putf, size_t len); +size_t UTF8Length(std::wstring_view wsv); +void UTF8FromUTF16(std::wstring_view wsv, char *putf, size_t len);  void UTF8FromUTF32Character(int uch, char *putf); -size_t UTF16Length(const char *s, size_t len); -size_t UTF16FromUTF8(const char *s, size_t len, wchar_t *tbuf, size_t tlen); -size_t UTF32FromUTF8(const char *s, size_t len, unsigned int *tbuf, size_t tlen); +size_t UTF16Length(std::string_view sv); +size_t UTF16FromUTF8(std::string_view sv, wchar_t *tbuf, size_t tlen); +size_t UTF32FromUTF8(std::string_view sv, unsigned int *tbuf, size_t tlen);  unsigned int UTF16FromUTF32Character(unsigned int val, wchar_t *tbuf) noexcept;  std::string FixInvalidUTF8(const std::string &text); diff --git a/test/unit/testUniConversion.cxx b/test/unit/testUniConversion.cxx index efea2d8f4..a23d799e3 100644 --- a/test/unit/testUniConversion.cxx +++ b/test/unit/testUniConversion.cxx @@ -23,53 +23,53 @@ TEST_CASE("UTF16Length") {  	SECTION("UTF16Length ASCII") {  		// Latin Small Letter A  		const char *s = "a"; -		size_t len = UTF16Length(s, strlen(s)); +		size_t len = UTF16Length(s);  		REQUIRE(len == 1U);  	}  	SECTION("UTF16Length Example1") {  		// Dollar Sign  		const char *s = "\x24"; -		size_t len = UTF16Length(s, strlen(s)); +		size_t len = UTF16Length(s);  		REQUIRE(len == 1U);  	}  	SECTION("UTF16Length Example2") {  		// Cent Sign  		const char *s = "\xC2\xA2"; -		size_t len = UTF16Length(s, strlen(s)); +		size_t len = UTF16Length(s);  		REQUIRE(len == 1U);  	}  	SECTION("UTF16Length Example3") {  		// Euro Sign  		const char *s = "\xE2\x82\xAC"; -		size_t len = UTF16Length(s, strlen(s)); +		size_t len = UTF16Length(s);  		REQUIRE(len == 1U);  	}  	SECTION("UTF16Length Example4") {  		// Gothic Letter Hwair  		const char *s = "\xF0\x90\x8D\x88"; -		size_t len = UTF16Length(s, strlen(s)); +		size_t len = UTF16Length(s);  		REQUIRE(len == 2U);  	}  	SECTION("UTF16Length Invalid Trail byte in lead position") {  		const char *s = "a\xB5yz"; -		size_t len = UTF16Length(s, strlen(s)); +		size_t len = UTF16Length(s);  		REQUIRE(len == 4U);  	}  	SECTION("UTF16Length Invalid Lead byte at end") {  		const char *s = "a\xC2"; -		size_t len = UTF16Length(s, strlen(s)); +		size_t len = UTF16Length(s);  		REQUIRE(len == 2U);  	}  	SECTION("UTF16Length Invalid Lead byte implies 3 trails but only 2") {  		const char *s = "a\xF1yz"; -		size_t len = UTF16Length(s, strlen(s)); +		size_t len = UTF16Length(s);  		REQUIRE(len == 2U);  	}  } @@ -108,7 +108,7 @@ TEST_CASE("UniConversion") {  	SECTION("UTF16FromUTF8 ASCII") {  		const char s[] = {'a', 0};  		wchar_t tbuf[1] = {0}; -		size_t tlen = UTF16FromUTF8(s, 1, tbuf, 1); +		size_t tlen = UTF16FromUTF8(s, tbuf, 1);  		REQUIRE(tlen == 1U);  		REQUIRE(tbuf[0] == 'a');  	} @@ -116,7 +116,7 @@ TEST_CASE("UniConversion") {  	SECTION("UTF16FromUTF8 Example1") {  		const char s[] = {'\x24', 0};  		wchar_t tbuf[1] = {0}; -		size_t tlen = UTF16FromUTF8(s, 1, tbuf, 1); +		size_t tlen = UTF16FromUTF8(s, tbuf, 1);  		REQUIRE(tlen == 1U);  		REQUIRE(tbuf[0] == 0x24);  	} @@ -124,7 +124,7 @@ TEST_CASE("UniConversion") {  	SECTION("UTF16FromUTF8 Example2") {  		const char s[] = {'\xC2', '\xA2', 0};  		wchar_t tbuf[1] = {0}; -		size_t tlen = UTF16FromUTF8(s, 2, tbuf, 1); +		size_t tlen = UTF16FromUTF8(s, tbuf, 1);  		REQUIRE(tlen == 1U);  		REQUIRE(tbuf[0] == 0xA2);  	} @@ -132,7 +132,7 @@ TEST_CASE("UniConversion") {  	SECTION("UTF16FromUTF8 Example3") {  		const char s[] = {'\xE2', '\x82', '\xAC', 0};  		wchar_t tbuf[1] = {0}; -		size_t tlen = UTF16FromUTF8(s, 3, tbuf, 1);; +		size_t tlen = UTF16FromUTF8(s, tbuf, 1);;  		REQUIRE(tlen == 1U);  		REQUIRE(tbuf[0] == 0x20AC);  	} @@ -140,7 +140,7 @@ TEST_CASE("UniConversion") {  	SECTION("UTF16FromUTF8 Example4") {  		const char s[] = {'\xF0', '\x90', '\x8D', '\x88', 0};  		wchar_t tbuf[2] = {0, 0}; -		size_t tlen = UTF16FromUTF8(s, 4, tbuf, 2); +		size_t tlen = UTF16FromUTF8(s, tbuf, 2);  		REQUIRE(tlen == 2U);  		REQUIRE(tbuf[0] == 0xD800);  		REQUIRE(tbuf[1] == 0xDF48); @@ -149,7 +149,7 @@ TEST_CASE("UniConversion") {  	SECTION("UTF16FromUTF8 Invalid Trail byte in lead position") {  		const char s[] = "a\xB5yz";  		wchar_t tbuf[4] = {}; -		size_t tlen = UTF16FromUTF8(s, 4, tbuf, 4); +		size_t tlen = UTF16FromUTF8(s, tbuf, 4);  		REQUIRE(tlen == 4U);  		REQUIRE(tbuf[0] == 'a');  		REQUIRE(tbuf[1] == 0xB5); @@ -160,7 +160,7 @@ TEST_CASE("UniConversion") {  	SECTION("UTF16FromUTF8 Invalid Lead byte at end") {  		const char s[] = "a\xC2";  		wchar_t tbuf[2] = {}; -		size_t tlen = UTF16FromUTF8(s, 2, tbuf, 2); +		size_t tlen = UTF16FromUTF8(s, tbuf, 2);  		REQUIRE(tlen == 2U);  		REQUIRE(tbuf[0] == 'a');  		REQUIRE(tbuf[1] == 0xC2); @@ -169,7 +169,7 @@ TEST_CASE("UniConversion") {  	SECTION("UTF16FromUTF8 Invalid Lead byte implies 3 trails but only 2") {  		const char *s = "a\xF1yz";  		wchar_t tbuf[4] = {}; -		size_t tlen = UTF16FromUTF8(s, 4, tbuf, 4); +		size_t tlen = UTF16FromUTF8(s, tbuf, 4);  		REQUIRE(tlen == 2U);  		REQUIRE(tbuf[0] == 'a');  		REQUIRE(tbuf[1] == 0xF1); @@ -180,7 +180,7 @@ TEST_CASE("UniConversion") {  	SECTION("UTF32FromUTF8 ASCII") {  		const char s[] = {'a', 0};  		unsigned int tbuf[1] = {0}; -		size_t tlen = UTF32FromUTF8(s, 1, tbuf, 1); +		size_t tlen = UTF32FromUTF8(s, tbuf, 1);  		REQUIRE(tlen == 1U);  		REQUIRE(tbuf[0] == static_cast<unsigned int>('a'));  	} @@ -188,7 +188,7 @@ TEST_CASE("UniConversion") {  	SECTION("UTF32FromUTF8 Example1") {  		const char s[] = {'\x24', 0};  		unsigned int tbuf[1] = {0}; -		size_t tlen = UTF32FromUTF8(s, 1, tbuf, 1); +		size_t tlen = UTF32FromUTF8(s, tbuf, 1);  		REQUIRE(tlen == 1U);  		REQUIRE(tbuf[0] == 0x24);  	} @@ -196,7 +196,7 @@ TEST_CASE("UniConversion") {  	SECTION("UTF32FromUTF8 Example2") {  		const char s[] = {'\xC2', '\xA2', 0};  		unsigned int tbuf[1] = {0}; -		size_t tlen = UTF32FromUTF8(s, 2, tbuf, 1); +		size_t tlen = UTF32FromUTF8(s, tbuf, 1);  		REQUIRE(tlen == 1U);  		REQUIRE(tbuf[0] == 0xA2);  	} @@ -204,7 +204,7 @@ TEST_CASE("UniConversion") {  	SECTION("UTF32FromUTF8 Example3") {  		const char s[] = {'\xE2', '\x82', '\xAC', 0};  		unsigned int tbuf[1] = {0}; -		size_t tlen = UTF32FromUTF8(s, 3, tbuf, 1); +		size_t tlen = UTF32FromUTF8(s, tbuf, 1);  		REQUIRE(tlen == 1U);  		REQUIRE(tbuf[0] == 0x20AC);  	} @@ -212,7 +212,7 @@ TEST_CASE("UniConversion") {  	SECTION("UTF32FromUTF8 Example4") {  		const char s[] = {'\xF0', '\x90', '\x8D', '\x88', 0};  		unsigned int tbuf[1] = {0}; -		size_t tlen = UTF32FromUTF8(s, 4, tbuf, 1); +		size_t tlen = UTF32FromUTF8(s, tbuf, 1);  		REQUIRE(tlen == 1U);  		REQUIRE(tbuf[0] == 0x10348);  	} @@ -220,7 +220,7 @@ TEST_CASE("UniConversion") {  	SECTION("UTF32FromUTF8 Invalid Trail byte in lead position") {  		const char s[] = "a\xB5yz";  		unsigned int tbuf[4] = {}; -		size_t tlen = UTF32FromUTF8(s, 4, tbuf, 4); +		size_t tlen = UTF32FromUTF8(s, tbuf, 4);  		REQUIRE(tlen == 4U);  		REQUIRE(tbuf[0] == static_cast<unsigned int>('a'));  		REQUIRE(tbuf[1] == 0xB5); @@ -231,7 +231,7 @@ TEST_CASE("UniConversion") {  	SECTION("UTF32FromUTF8 Invalid Lead byte at end") {  		const char s[] = "a\xC2";  		unsigned int tbuf[2] = {}; -		size_t tlen = UTF32FromUTF8(s, 2, tbuf, 2); +		size_t tlen = UTF32FromUTF8(s, tbuf, 2);  		REQUIRE(tlen == 2U);  		REQUIRE(tbuf[0] == static_cast<unsigned int>('a'));  		REQUIRE(tbuf[1] == 0xC2); @@ -240,7 +240,7 @@ TEST_CASE("UniConversion") {  	SECTION("UTF32FromUTF8 Invalid Lead byte implies 3 trails but only 2") {  		const char *s = "a\xF1yz";  		unsigned int tbuf[4] = {}; -		size_t tlen = UTF32FromUTF8(s, 4, tbuf, 4); +		size_t tlen = UTF32FromUTF8(s, tbuf, 4);  		REQUIRE(tlen == 2U);  		REQUIRE(tbuf[0] == static_cast<unsigned int>('a'));  		REQUIRE(tbuf[1] == 0xF1); diff --git a/win32/PlatWin.cxx b/win32/PlatWin.cxx index 388002c70..beacc26fb 100644 --- a/win32/PlatWin.cxx +++ b/win32/PlatWin.cxx @@ -291,7 +291,7 @@ void SetLogFont(LOGFONTW &lf, const char *faceName, int characterSet, float size  	lf.lfItalic = italic ? 1 : 0;  	lf.lfCharSet = static_cast<BYTE>(characterSet);  	lf.lfQuality = Win32MapFontQuality(extraFontFlag); -	UTF16FromUTF8(faceName, strlen(faceName)+1, lf.lfFaceName, LF_FACESIZE); +	UTF16FromUTF8(faceName, lf.lfFaceName, LF_FACESIZE);  }  /** @@ -345,8 +345,8 @@ FontCached::FontCached(const FontParameters &fp) :  #if defined(USE_D2D)  		IDWriteTextFormat *pTextFormat;  		const int faceSize = 200; -		WCHAR wszFace[faceSize]; -		UTF16FromUTF8(fp.faceName, strlen(fp.faceName)+1, wszFace, faceSize); +		WCHAR wszFace[faceSize] = L""; +		UTF16FromUTF8(fp.faceName, wszFace, faceSize);  		const FLOAT fHeight = fp.size;  		const DWRITE_FONT_STYLE style = fp.italic ? DWRITE_FONT_STYLE_ITALIC : DWRITE_FONT_STYLE_NORMAL;  		HRESULT hr = pIDWriteFactory->CreateTextFormat(wszFace, NULL, @@ -395,8 +395,8 @@ bool FontCached::SameAs(const FontParameters &fp) {  		(lf.lfCharSet == fp.characterSet) &&  		(lf.lfQuality == Win32MapFontQuality(fp.extraFontFlag)) &&  		(technology == fp.technology)) { -			wchar_t wszFace[LF_FACESIZE]; -			UTF16FromUTF8(fp.faceName, strlen(fp.faceName)+1, wszFace, LF_FACESIZE); +			wchar_t wszFace[LF_FACESIZE] = L""; +			UTF16FromUTF8(fp.faceName, wszFace, LF_FACESIZE);  			return 0 == wcscmp(lf.lfFaceName,wszFace);  	}  	return false; @@ -503,7 +503,7 @@ public:  	TextWide(std::string_view text, bool unicodeMode, int codePage=0) :  		VarBuffer<wchar_t, stackBufferLength>(text.length()) {  		if (unicodeMode) { -			tlen = static_cast<int>(UTF16FromUTF8(text.data(), text.length(), buffer, text.length())); +			tlen = static_cast<int>(UTF16FromUTF8(text, buffer, text.length()));  		} else {  			// Support Asian string display in 9x English  			tlen = ::MultiByteToWideChar(codePage, 0, text.data(), static_cast<int>(text.length()), diff --git a/win32/ScintillaWin.cxx b/win32/ScintillaWin.cxx index 869b1909a..c0b6fb3e2 100644 --- a/win32/ScintillaWin.cxx +++ b/win32/ScintillaWin.cxx @@ -803,9 +803,10 @@ Sci::Position ScintillaWin::EncodedFromUTF8(const char *utf8, char *encoded) con  // the current codepage. Code is similar to HandleCompositionWindowed().  void ScintillaWin::AddCharUTF16(wchar_t const *wcs, unsigned int wclen) {  	if (IsUnicodeMode()) { -		size_t len = UTF8Length(wcs, wclen); +		const std::wstring_view wsv(wcs, wclen); +		size_t len = UTF8Length(wsv);  		char utfval[maxLenInputIME * 3]; -		UTF8FromUTF16(wcs, wclen, utfval, len); +		UTF8FromUTF16(wsv, utfval, len);  		utfval[len] = '\0';  		AddCharUTF(utfval, static_cast<unsigned int>(len));  	} else { @@ -1207,7 +1208,7 @@ sptr_t ScintillaWin::GetTextLength() {  	std::vector<char> docBytes(pdoc->Length(), '\0');  	pdoc->GetCharRange(&docBytes[0], 0, pdoc->Length());  	if (IsUnicodeMode()) { -		return UTF16Length(&docBytes[0], docBytes.size()); +		return UTF16Length(std::string_view(&docBytes[0], docBytes.size()));  	} else {  		return ::MultiByteToWideChar(CodePageOfDocument(), 0, &docBytes[0],  			static_cast<int>(docBytes.size()), NULL, 0); @@ -1223,12 +1224,13 @@ sptr_t ScintillaWin::GetText(uptr_t wParam, sptr_t lParam) {  	std::vector<char> docBytes(pdoc->Length(), '\0');  	pdoc->GetCharRange(&docBytes[0], 0, pdoc->Length());  	if (IsUnicodeMode()) { -		const size_t lengthUTF16 = UTF16Length(&docBytes[0], docBytes.size()); +                const std::string_view sv(&docBytes[0], docBytes.size()); +		const size_t lengthUTF16 = UTF16Length(sv);  		if (lParam == 0)  			return lengthUTF16;  		if (wParam == 0)  			return 0; -		size_t uLen = UTF16FromUTF8(&docBytes[0], docBytes.size(), +		size_t uLen = UTF16FromUTF8(sv,  			ptr, wParam - 1);  		ptr[uLen] = L'\0';  		return uLen; @@ -2051,8 +2053,7 @@ public:  				if (foldedUTF8) {  					// Maximum length of a case conversion is 6 bytes, 3 characters  					wchar_t wFolded[20]; -					const size_t charsConverted = UTF16FromUTF8(foldedUTF8, -							strlen(foldedUTF8), +					const size_t charsConverted = UTF16FromUTF8(std::string_view(foldedUTF8),  							wFolded, ELEMENTS(wFolded));  					for (size_t j=0; j<charsConverted; j++)  						utf16Folded[lenFlat++] = wFolded[j]; @@ -2096,8 +2097,7 @@ CaseFolder *ScintillaWin::CaseFolderForEncoding() {  					const char *caseFolded = CaseConvert(wCharacter[0], CaseConversionFold);  					if (caseFolded) {  						wchar_t wLower[20]; -						const size_t charsConverted = UTF16FromUTF8(caseFolded, -							strlen(caseFolded), +						const size_t charsConverted = UTF16FromUTF8(std::string_view(caseFolded),  							wLower, ELEMENTS(wLower));  						if (charsConverted == 1) {  							char sCharacterLowered[20]; @@ -2262,9 +2262,10 @@ void ScintillaWin::Paste() {  			// Default Scintilla behaviour in Unicode mode  			if (IsUnicodeMode()) {  				const size_t bytes = memUSelection.Size(); -				len = UTF8Length(uptr, bytes / 2); +				const std::wstring_view wsv(uptr, bytes / 2); +				len = UTF8Length(wsv);  				putf.resize(len + 1); -				UTF8FromUTF16(uptr, bytes / 2, &putf[0], len); +				UTF8FromUTF16(wsv, &putf[0], len);  			} else {  				// CF_UNICODETEXT available, but not in Unicode mode  				// Convert from Unicode to current Scintilla code page @@ -2300,9 +2301,10 @@ void ScintillaWin::Paste() {  					const size_t ulen = ::MultiByteToWideChar(CP_ACP, 0,  					                    ptr, ilen, &uptr[0], ilen +1); -					const size_t mlen = UTF8Length(&uptr[0], ulen); +					const std::wstring_view wsv(&uptr[0], ulen); +					const size_t mlen = UTF8Length(wsv);  					std::vector<char> putf(mlen+1); -					UTF8FromUTF16(&uptr[0], ulen, &putf[0], mlen); +					UTF8FromUTF16(wsv, &putf[0], mlen);  					InsertPasteShape(&putf[0], mlen, pasteShape);  				} else { @@ -2684,7 +2686,7 @@ void ScintillaWin::ImeStartComposition() {  			lf.lfFaceName[0] = L'\0';  			if (vs.styles[styleHere].fontName) {  				const char* fontName = vs.styles[styleHere].fontName; -				UTF16FromUTF8(fontName, strlen(fontName)+1, lf.lfFaceName, LF_FACESIZE); +				UTF16FromUTF8(std::string_view(fontName), lf.lfFaceName, LF_FACESIZE);  			}  			::ImmSetCompositionFontW(imc.hIMC, &lf); @@ -2796,11 +2798,11 @@ void ScintillaWin::CopyToClipboard(const SelectionText &selectedText) {  	// Default Scintilla behaviour in Unicode mode  	if (IsUnicodeMode()) { -		const size_t uchars = UTF16Length(selectedText.Data(), -			selectedText.LengthWithTerminator()); +                const std::string_view sv(selectedText.Data(), selectedText.LengthWithTerminator()); +		const size_t uchars = UTF16Length(sv);  		uniText.Allocate(2 * uchars);  		if (uniText) { -			UTF16FromUTF8(selectedText.Data(), selectedText.LengthWithTerminator(), +			UTF16FromUTF8(sv,  				static_cast<wchar_t *>(uniText.ptr), uchars);  		}  	} else { @@ -3101,9 +3103,10 @@ STDMETHODIMP ScintillaWin::Drop(LPDATAOBJECT pIDataSource, DWORD grfKeyState,  				if (IsUnicodeMode()) {  					const size_t tlen = memUDrop.Size();  					// Convert UTF-16 to UTF-8 -					const size_t dataLen = UTF8Length(udata, tlen/2); +					const std::wstring_view wsv(udata, tlen / 2); +					const size_t dataLen = UTF8Length(wsv);  					data.resize(dataLen+1); -					UTF8FromUTF16(udata, tlen/2, &data[0], dataLen); +					UTF8FromUTF16(wsv, &data[0], dataLen);  				} else {  					// Convert UTF-16 to ANSI  					// @@ -3176,10 +3179,11 @@ STDMETHODIMP ScintillaWin::GetData(FORMATETC *pFEIn, STGMEDIUM *pSTM) {  	GlobalMemory text;  	if (pFEIn->cfFormat == CF_UNICODETEXT) { -		const size_t uchars = UTF16Length(drag.Data(), drag.LengthWithTerminator()); +                const std::string_view sv(drag.Data(), drag.LengthWithTerminator()); +		const size_t uchars = UTF16Length(sv);  		text.Allocate(2 * uchars);  		if (text) { -			UTF16FromUTF8(drag.Data(), drag.LengthWithTerminator(), +			UTF16FromUTF8(sv,  				static_cast<wchar_t *>(text.ptr), uchars);  		}  	} else { | 
