Feature [feature-requests:#1408] Simplify code, remove IsDBCSTrailByteInvalid.

Drop temporary test for IsDBCSTrailByteNoExcept.
author: Zufu Liu <unknown> 2021-07-03 13:21:38 +1000
committer: Zufu Liu <unknown> 2021-07-03 13:21:38 +1000
commit: a2d23bd463e65f532301b682b64cd02b8a57716b (patch)
tree: 1d8129cc58281e1134ebf3884273f458bfeebef4
parent: 9cda372c64c8920d2e910825161a8ed882b417b3 (diff)
download: scintilla-mirror-a2d23bd463e65f532301b682b64cd02b8a57716b.tar.gz
3 files changed, 13 insertions, 75 deletions
diff --git a/src/Document.cxx b/src/Document.cxx
index 739b6266a..0ec598bc2 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -985,27 +985,22 @@ Sci::Position Document::GetRelativePositionUTF16(Sci::Position positionStart, Sc
 }
 
 int SCI_METHOD Document::GetCharacterAndWidth(Sci_Position position, Sci_Position *pWidth) const {
-	int character;
 	int bytesInCharacter = 1;
 	const unsigned char leadByte = cb.UCharAt(position);
-	if (dbcsCodePage) {
+	int character = leadByte;
+	if (dbcsCodePage && !UTF8IsAscii(leadByte)) {
 		if (CpUtf8 == dbcsCodePage) {
-			if (UTF8IsAscii(leadByte)) {
-				// Single byte character or invalid
-				character =  leadByte;
+			const int widthCharBytes = UTF8BytesOfLead[leadByte];
+			unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
+			for (int b=1; b<widthCharBytes; b++)
+				charBytes[b] = cb.UCharAt(position+b);
+			const int utf8status = UTF8Classify(charBytes, widthCharBytes);
+			if (utf8status & UTF8MaskInvalid) {
+				// Report as singleton surrogate values which are invalid Unicode
+				character =  0xDC80 + leadByte;
 			} else {
-				const int widthCharBytes = UTF8BytesOfLead[leadByte];
-				unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
-				for (int b=1; b<widthCharBytes; b++)
-					charBytes[b] = cb.UCharAt(position+b);
-				const int utf8status = UTF8Classify(charBytes, widthCharBytes);
-				if (utf8status & UTF8MaskInvalid) {
-					// Report as singleton surrogate values which are invalid Unicode
-					character =  0xDC80 + leadByte;
-				} else {
-					bytesInCharacter = utf8status & UTF8MaskWidth;
-					character = UnicodeFromUTF8(charBytes);
-				}
+				bytesInCharacter = utf8status & UTF8MaskWidth;
+				character = UnicodeFromUTF8(charBytes);
 			}
 		} else {
 			if (IsDBCSLeadByteNoExcept(leadByte)) {
@@ -1013,15 +1008,9 @@ int SCI_METHOD Document::GetCharacterAndWidth(Sci_Position position, Sci_Positio
 				if (IsDBCSTrailByteNoExcept(trailByte)) {
 					bytesInCharacter = 2;
 					character = (leadByte << 8) | trailByte;
-				} else {
-					character = leadByte;
 				}
-			} else {
-				character = leadByte;
 			}
 		}
-	} else {
-		character = leadByte;
 	}
 	if (pWidth) {
 		*pWidth = bytesInCharacter;
@@ -1135,51 +1124,12 @@ bool Document::IsDBCSLeadByteInvalid(char ch) const noexcept {
 	return false;
 }
 
-bool Document::IsDBCSTrailByteInvalid(char ch) const noexcept {
-	const unsigned char trail = ch;
-	switch (dbcsCodePage) {
-	case 932:
-		// Shift_jis
-		return
-			(trail <= 0x3F) ||
-			(trail == 0x7F) ||
-			(trail >= 0xFD);
-	case 936:
-		// GBK
-		return
-			(trail <= 0x3F) ||
-			(trail == 0x7F) ||
-			(trail == 0xFF);
-	case 949:
-		// Korean Wansung KS C-5601-1987
-		return
-			(trail <= 0x40) ||
-			((trail >= 0x5B) && (trail <= 0x60)) ||
-			((trail >= 0x7B) && (trail <= 0x80)) ||
-			(trail == 0xFF);
-	case 950:
-		// Big5
-		return
-			(trail <= 0x3F) ||
-			((trail >= 0x7F) && (trail <= 0xA0)) ||
-			(trail == 0xFF);
-	case 1361:
-		// Korean Johab KS C-5601-1992
-		return
-			(trail <= 0x30) ||
-			(trail == 0x7F) ||
-			(trail == 0x80) ||
-			(trail == 0xFF);
-	}
-	return false;
-}
-
 int Document::DBCSDrawBytes(std::string_view text) const noexcept {
 	if (text.length() <= 1) {
 		return static_cast<int>(text.length());
 	}
 	if (IsDBCSLeadByteNoExcept(text[0])) {
-		return IsDBCSTrailByteInvalid(text[1]) ? 1 : 2;
+		return IsDBCSTrailByteNoExcept(text[1]) ? 2 : 1;
 	} else {
 		return 1;
 	}
diff --git a/src/Document.h b/src/Document.h
index 88392c1a2..c40ce2a44 100644
--- a/src/Document.h
+++ b/src/Document.h
@@ -331,7 +331,6 @@ public:
 	bool IsDBCSLeadByteNoExcept(char ch) const noexcept;
 	bool IsDBCSTrailByteNoExcept(char ch) const noexcept;
 	bool IsDBCSLeadByteInvalid(char ch) const noexcept;
-	bool IsDBCSTrailByteInvalid(char ch) const noexcept;
 	int DBCSDrawBytes(std::string_view text) const noexcept;
 	int SafeSegment(const char *text, int length, int lengthSegment) const noexcept;
 	EncodingFamily CodePageFamily() const noexcept;
diff --git a/test/unit/testDocument.cxx b/test/unit/testDocument.cxx
index cecd14920..c5275d25f 100644
--- a/test/unit/testDocument.cxx
+++ b/test/unit/testDocument.cxx
@@ -121,15 +121,4 @@ TEST_CASE("Document") {
 		REQUIRE(ch == '=');
 	}
 
-	SECTION("CheckTrailBytes") {
-		Document doc(DocumentOption::Default);
-		const int pages[] = { 932, 936, 949, 950, 1361 };
-		for (const int page : pages) {
-			doc.SetDBCSCodePage(page);
-			for (int byteVal = 0; byteVal < 0x100; byteVal++) {
-				char ch = static_cast<char>(byteVal);
-				REQUIRE(doc.IsDBCSTrailByteNoExcept(ch) != doc.IsDBCSTrailByteInvalid(ch));
-			}
-		}
-	}
 }
author	Zufu Liu <unknown>	2021-07-03 13:21:38 +1000
committer	Zufu Liu <unknown>	2021-07-03 13:21:38 +1000
commit	a2d23bd463e65f532301b682b64cd02b8a57716b (patch)
tree	1d8129cc58281e1134ebf3884273f458bfeebef4
parent	9cda372c64c8920d2e910825161a8ed882b417b3 (diff)
download	scintilla-mirror-a2d23bd463e65f532301b682b64cd02b8a57716b.tar.gz