Feature [feature-requests:#1417] Use backward iteration to find space / control

character and text / punctuation boundaries in SafeSegment as will be simpler and faster in almost all cases. Simplify BreakFinder::Next calling SafeSegment.
author: Zufu Liu <unknown> 2021-10-21 22:15:57 +1100
committer: Zufu Liu <unknown> 2021-10-21 22:15:57 +1100
commit: 9975609bf3b39f0e1cd121995ac49aea30a6c48f (patch)
tree: 339887d2052a909480b4e3b4df12f318bbec2be8 /test
parent: a989b1ed63c7cf81c693da8f2f66ab5e29ee341a (diff)
download: scintilla-mirror-9975609bf3b39f0e1cd121995ac49aea30a6c48f.tar.gz
1 files changed, 117 insertions, 0 deletions
diff --git a/test/unit/testDocument.cxx b/test/unit/testDocument.cxx
index 41e3907ae..4a7e20095 100644
--- a/test/unit/testDocument.cxx
+++ b/test/unit/testDocument.cxx
@@ -486,3 +486,120 @@ TEST_CASE("Words") {
 		REQUIRE(!docEndSpace.document.IsWordAt(3, 5));
 	}
 }
+
+TEST_CASE("SafeSegment") {
+	SECTION("Short") {
+		const DocPlus doc("", 0);
+		// all encoding: break before or after last space
+		const std::string_view text = "12 ";
+		size_t length = doc.document.SafeSegment(text);
+		REQUIRE(length <= text.length());
+		REQUIRE(text[length - 1] == '2');
+		REQUIRE(text[length] == ' ');
+	}
+
+	SECTION("ASCII") {
+		const DocPlus doc("", 0);
+		// all encoding: break before or after last space
+		std::string_view text = "12 3 \t45";
+		size_t length = doc.document.SafeSegment(text);
+		REQUIRE(text[length - 1] == ' ');
+		REQUIRE(text[length] == '\t');
+
+		// UTF-8 and ASCII: word and punctuation boundary in middle of text
+		text = "(IsBreakSpace(text[j]))";
+		length = doc.document.SafeSegment(text);
+		REQUIRE(text[length - 1] == 'j');
+		REQUIRE(text[length] == ']');
+
+		// UTF-8 and ASCII: word and punctuation boundary near start of text
+		text = "(IsBreakSpace";
+		length = doc.document.SafeSegment(text);
+		REQUIRE(text[length - 1] == '(');
+		REQUIRE(text[length] == 'I');
+
+		// UTF-8 and ASCII: word and punctuation boundary near end of text
+		text = "IsBreakSpace)";
+		length = doc.document.SafeSegment(text);
+		REQUIRE(text[length - 1] == 'e');
+		REQUIRE(text[length] == ')');
+
+		// break before last character
+		text = "JapaneseJa";
+		length = doc.document.SafeSegment(text);
+		REQUIRE(text[length - 1] == 'J');
+		REQUIRE(text[length] == 'a');
+	}
+
+	SECTION("UTF-8") {
+		const DocPlus doc("", CpUtf8);
+		// break before last character: no trail byte
+		std::string_view text = "JapaneseJa";
+		size_t length = doc.document.SafeSegment(text);
+		REQUIRE(text[length - 1] == 'J');
+		REQUIRE(text[length] == 'a');
+
+		// break before last character: 1 trail byte
+		text = "Japanese\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xc2\xa9";
+		length = doc.document.SafeSegment(text);
+		REQUIRE(text[length - 1] == '\x9e');
+		REQUIRE(text[length] == '\xc2');
+
+		// break before last character: 2 trail bytes
+		text = "Japanese\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e";
+		length = doc.document.SafeSegment(text);
+		REQUIRE(text[length - 1] == '\xac');
+		REQUIRE(text[length] == '\xe8');
+
+		// break before last character: 3 trail bytes
+		text = "Japanese\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xf0\x9f\x98\x8a";
+		length = doc.document.SafeSegment(text);
+		REQUIRE(text[length - 1] == '\x9e');
+		REQUIRE(text[length] == '\xf0');
+	}
+
+	SECTION("DBCS Shift-JIS") {
+		const DocPlus doc("", 932);
+		// word and punctuation boundary in middle of text: single byte
+		std::string_view text = "(IsBreakSpace(text[j]))";
+		size_t length = doc.document.SafeSegment(text);
+		REQUIRE(text[length - 1] == 'j');
+		REQUIRE(text[length] == ']');
+
+		// word and punctuation boundary in middle of text: double byte
+		text = "(IsBreakSpace(text[\x8c\xea]))";
+		length = doc.document.SafeSegment(text);
+		REQUIRE(text[length - 1] == '\xea');
+		REQUIRE(text[length] == ']');
+
+		// word and punctuation boundary near start of text
+		text = "(IsBreakSpace";
+		length = doc.document.SafeSegment(text);
+		REQUIRE(text[length - 1] == '(');
+		REQUIRE(text[length] == 'I');
+
+		// word and punctuation boundary near end of text: single byte
+		text = "IsBreakSpace)";
+		length = doc.document.SafeSegment(text);
+		REQUIRE(text[length - 1] == 'e');
+		REQUIRE(text[length] == ')');
+
+		// word and punctuation boundary near end of text: double byte
+		text = "IsBreakSpace\x8c\xea)";
+		length = doc.document.SafeSegment(text);
+		REQUIRE(text[length - 1] == '\xea');
+		REQUIRE(text[length] == ')');
+
+		// break before last character: single byte
+		text = "JapaneseJa";
+		length = doc.document.SafeSegment(text);
+		REQUIRE(text[length - 1] == 'J');
+		REQUIRE(text[length] == 'a');
+
+		// break before last character: double byte
+		text = "Japanese\x93\xfa\x96\x7b\x8c\xea";
+		length = doc.document.SafeSegment(text);
+		REQUIRE(text[length - 1] == '\x7b');
+		REQUIRE(text[length] == '\x8c');
+	}
+}
author	Zufu Liu <unknown>	2021-10-21 22:15:57 +1100
committer	Zufu Liu <unknown>	2021-10-21 22:15:57 +1100
commit	9975609bf3b39f0e1cd121995ac49aea30a6c48f (patch)
tree	339887d2052a909480b4e3b4df12f318bbec2be8 /test
parent	a989b1ed63c7cf81c693da8f2f66ab5e29ee341a (diff)
download	scintilla-mirror-9975609bf3b39f0e1cd121995ac49aea30a6c48f.tar.gz