3 files changed, 31 insertions, 9 deletions
diff --git a/src/Document.cxx b/src/Document.cxx
index f1079398d..4aad2e370 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -3061,16 +3061,9 @@ Sci::Position Cxx11RegexFindText(const Document *doc, Sci::Position minPos, Sci:
 
 		bool matched = false;
 		if (SC_CP_UTF8 == doc->dbcsCodePage) {
-			const size_t lenS = strlen(s);
-			std::vector<wchar_t> ws(lenS + 1);
-#if WCHAR_T_IS_16
-			const size_t outLen = UTF16FromUTF8(s, lenS, &ws[0], lenS);
-#else
-			const size_t outLen = UTF32FromUTF8(s, lenS, reinterpret_cast<unsigned int *>(&ws[0]), lenS);
-#endif
-			ws[outLen] = 0;
+			const std::wstring ws = WStringFromUTF8(s, strlen(s));
 			std::wregex regexp;
-			regexp.assign(&ws[0], flagsRe);
+			regexp.assign(ws, flagsRe);
 			matched = MatchOnLines<UTF8Iterator>(doc, regexp, resr, search);
 
 		} else {
diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx
index 6cd6a8ba9..8cbb3cdd2 100644
--- a/src/UniConversion.cxx
+++ b/src/UniConversion.cxx
@@ -162,6 +162,17 @@ size_t UTF16FromUTF8(const char *s, size_t len, wchar_t *tbuf, size_t tlen) {
 	return ui;
 }
 
+size_t UTF32Length(const char *s, size_t len) noexcept {
+	size_t ulen = 0;
+	for (size_t i = 0; i < len;) {
+		const unsigned char ch = s[i];
+		const unsigned int byteCount = UTF8BytesOfLead[ch];
+		i += byteCount;
+		ulen++;
+	}
+	return ulen;
+}
+
 size_t UTF32FromUTF8(const char *s, size_t len, unsigned int *tbuf, size_t tlen) {
 	size_t ui = 0;
 	for (size_t i = 0; i < len;) {
@@ -215,6 +226,20 @@ size_t UTF32FromUTF8(const char *s, size_t len, unsigned int *tbuf, size_t tlen)
 	return ui;
 }
 
+std::wstring WStringFromUTF8(const char *s, size_t len) {
+#ifdef _WIN32
+		const size_t len16 = UTF16Length(s, len);
+		std::wstring ws(len16, 0);
+		UTF16FromUTF8(s, len, &ws[0], len16);
+		return ws;
+#else
+		const size_t len32 = UTF32Length(s, len);
+		std::wstring ws(len32, 0);
+		UTF32FromUTF8(s, len, reinterpret_cast<unsigned int *>(&ws[0]), len32);
+		return ws;
+#endif
+}
+
 unsigned int UTF16FromUTF32Character(unsigned int val, wchar_t *tbuf) noexcept {
 	if (val < SUPPLEMENTAL_PLANE_FIRST) {
 		tbuf[0] = static_cast<wchar_t>(val);
diff --git a/src/UniConversion.h b/src/UniConversion.h
index 4bb8875d0..9f405e1ed 100644
--- a/src/UniConversion.h
+++ b/src/UniConversion.h
@@ -19,7 +19,11 @@ void UTF8FromUTF16(const wchar_t *uptr, size_t tlen, char *putf, size_t len);
 void UTF8FromUTF32Character(int uch, char *putf);
 size_t UTF16Length(const char *s, size_t len);
 size_t UTF16FromUTF8(const char *s, size_t len, wchar_t *tbuf, size_t tlen);
+size_t UTF32Length(const char *s, size_t len) noexcept;
 size_t UTF32FromUTF8(const char *s, size_t len, unsigned int *tbuf, size_t tlen);
+// WStringFromUTF8 does the right thing when wchar_t is 2 or 4 bytes so
+// works on both Windows and Unix.
+std::wstring WStringFromUTF8(const char *s, size_t len);
 unsigned int UTF16FromUTF32Character(unsigned int val, wchar_t *tbuf) noexcept;
 bool UTF8IsValid(const char *s, size_t len) noexcept;
 std::string FixInvalidUTF8(const std::string &text);