diff options
author | Zufu Liu <unknown> | 2021-10-21 22:15:57 +1100 |
---|---|---|
committer | Zufu Liu <unknown> | 2021-10-21 22:15:57 +1100 |
commit | 9975609bf3b39f0e1cd121995ac49aea30a6c48f (patch) | |
tree | 339887d2052a909480b4e3b4df12f318bbec2be8 /test | |
parent | a989b1ed63c7cf81c693da8f2f66ab5e29ee341a (diff) | |
download | scintilla-mirror-9975609bf3b39f0e1cd121995ac49aea30a6c48f.tar.gz |
Feature [feature-requests:#1417] Use backward iteration to find space / control
character and text / punctuation boundaries in SafeSegment as will be simpler
and faster in almost all cases.
Simplify BreakFinder::Next calling SafeSegment.
Diffstat (limited to 'test')
-rw-r--r-- | test/unit/testDocument.cxx | 117 |
1 files changed, 117 insertions, 0 deletions
diff --git a/test/unit/testDocument.cxx b/test/unit/testDocument.cxx index 41e3907ae..4a7e20095 100644 --- a/test/unit/testDocument.cxx +++ b/test/unit/testDocument.cxx @@ -486,3 +486,120 @@ TEST_CASE("Words") { REQUIRE(!docEndSpace.document.IsWordAt(3, 5)); } } + +TEST_CASE("SafeSegment") { + SECTION("Short") { + const DocPlus doc("", 0); + // all encoding: break before or after last space + const std::string_view text = "12 "; + size_t length = doc.document.SafeSegment(text); + REQUIRE(length <= text.length()); + REQUIRE(text[length - 1] == '2'); + REQUIRE(text[length] == ' '); + } + + SECTION("ASCII") { + const DocPlus doc("", 0); + // all encoding: break before or after last space + std::string_view text = "12 3 \t45"; + size_t length = doc.document.SafeSegment(text); + REQUIRE(text[length - 1] == ' '); + REQUIRE(text[length] == '\t'); + + // UTF-8 and ASCII: word and punctuation boundary in middle of text + text = "(IsBreakSpace(text[j]))"; + length = doc.document.SafeSegment(text); + REQUIRE(text[length - 1] == 'j'); + REQUIRE(text[length] == ']'); + + // UTF-8 and ASCII: word and punctuation boundary near start of text + text = "(IsBreakSpace"; + length = doc.document.SafeSegment(text); + REQUIRE(text[length - 1] == '('); + REQUIRE(text[length] == 'I'); + + // UTF-8 and ASCII: word and punctuation boundary near end of text + text = "IsBreakSpace)"; + length = doc.document.SafeSegment(text); + REQUIRE(text[length - 1] == 'e'); + REQUIRE(text[length] == ')'); + + // break before last character + text = "JapaneseJa"; + length = doc.document.SafeSegment(text); + REQUIRE(text[length - 1] == 'J'); + REQUIRE(text[length] == 'a'); + } + + SECTION("UTF-8") { + const DocPlus doc("", CpUtf8); + // break before last character: no trail byte + std::string_view text = "JapaneseJa"; + size_t length = doc.document.SafeSegment(text); + REQUIRE(text[length - 1] == 'J'); + REQUIRE(text[length] == 'a'); + + // break before last character: 1 trail byte + text = "Japanese\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xc2\xa9"; + length = doc.document.SafeSegment(text); + REQUIRE(text[length - 1] == '\x9e'); + REQUIRE(text[length] == '\xc2'); + + // break before last character: 2 trail bytes + text = "Japanese\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e"; + length = doc.document.SafeSegment(text); + REQUIRE(text[length - 1] == '\xac'); + REQUIRE(text[length] == '\xe8'); + + // break before last character: 3 trail bytes + text = "Japanese\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xf0\x9f\x98\x8a"; + length = doc.document.SafeSegment(text); + REQUIRE(text[length - 1] == '\x9e'); + REQUIRE(text[length] == '\xf0'); + } + + SECTION("DBCS Shift-JIS") { + const DocPlus doc("", 932); + // word and punctuation boundary in middle of text: single byte + std::string_view text = "(IsBreakSpace(text[j]))"; + size_t length = doc.document.SafeSegment(text); + REQUIRE(text[length - 1] == 'j'); + REQUIRE(text[length] == ']'); + + // word and punctuation boundary in middle of text: double byte + text = "(IsBreakSpace(text[\x8c\xea]))"; + length = doc.document.SafeSegment(text); + REQUIRE(text[length - 1] == '\xea'); + REQUIRE(text[length] == ']'); + + // word and punctuation boundary near start of text + text = "(IsBreakSpace"; + length = doc.document.SafeSegment(text); + REQUIRE(text[length - 1] == '('); + REQUIRE(text[length] == 'I'); + + // word and punctuation boundary near end of text: single byte + text = "IsBreakSpace)"; + length = doc.document.SafeSegment(text); + REQUIRE(text[length - 1] == 'e'); + REQUIRE(text[length] == ')'); + + // word and punctuation boundary near end of text: double byte + text = "IsBreakSpace\x8c\xea)"; + length = doc.document.SafeSegment(text); + REQUIRE(text[length - 1] == '\xea'); + REQUIRE(text[length] == ')'); + + // break before last character: single byte + text = "JapaneseJa"; + length = doc.document.SafeSegment(text); + REQUIRE(text[length - 1] == 'J'); + REQUIRE(text[length] == 'a'); + + // break before last character: double byte + text = "Japanese\x93\xfa\x96\x7b\x8c\xea"; + length = doc.document.SafeSegment(text); + REQUIRE(text[length - 1] == '\x7b'); + REQUIRE(text[length] == '\x8c'); + } +} |