aboutsummaryrefslogtreecommitdiffhomepage
path: root/test/unit/testDocument.cxx
diff options
context:
space:
mode:
authorNeil <nyamatongwe@gmail.com>2021-07-11 19:31:58 +1000
committerNeil <nyamatongwe@gmail.com>2021-07-11 19:31:58 +1000
commit667fb0b9f8485613f3d18f15dec3466d615c6097 (patch)
treed73330ade53dc91e109bfea06609c139b15491fa /test/unit/testDocument.cxx
parent79f3a8bee6d69323ee7c6c334856e1de9f4aefc4 (diff)
downloadscintilla-mirror-667fb0b9f8485613f3d18f15dec3466d615c6097.tar.gz
Add search tests with different encodings, case-matching options, and directions.
Diffstat (limited to 'test/unit/testDocument.cxx')
-rw-r--r--test/unit/testDocument.cxx193
1 files changed, 189 insertions, 4 deletions
diff --git a/test/unit/testDocument.cxx b/test/unit/testDocument.cxx
index c5275d25f..44bb68b7a 100644
--- a/test/unit/testDocument.cxx
+++ b/test/unit/testDocument.cxx
@@ -36,10 +36,31 @@ using namespace Scintilla::Internal;
// Test Document.
+struct Folding {
+ int from;
+ int to;
+ int length;
+};
+
+// Table of case folding for non-ASCII bytes in Windows Latin code page 1252
+Folding foldings1252[] = {
+ {0x8a, 0x9a, 0x01},
+ {0x8c, 0x9c, 0x01},
+ {0x8e, 0x9e, 0x01},
+ {0x9f, 0xff, 0x01},
+ {0xc0, 0xe0, 0x17},
+ {0xd8, 0xf8, 0x07},
+};
+
struct DocPlus {
Document document;
DocPlus(std::string_view svInitial, int codePage) : document(DocumentOption::Default) {
+ SetCodePage(codePage);
+ document.InsertString(0, svInitial.data(), svInitial.length());
+ }
+
+ void SetCodePage(int codePage) {
document.SetDBCSCodePage(codePage);
if (codePage == CpUtf8) {
document.SetCaseFolder(std::make_unique<CaseFolderUnicode>());
@@ -50,9 +71,27 @@ struct DocPlus {
pcft->StandardASCII();
document.SetCaseFolder(std::move(pcft));
}
- document.InsertString(0, svInitial.data(), svInitial.length());
}
+ void SetSBCSFoldings(const Folding *foldings, size_t length) {
+ std::unique_ptr<CaseFolderTable> pcft = std::make_unique<CaseFolderTable>();
+ pcft->StandardASCII();
+ for (size_t block = 0; block < length; block++) {
+ for (int fold = 0; fold < foldings[block].length; fold++) {
+ pcft->SetTranslation(foldings[block].from + fold, foldings[block].to + fold);
+ }
+ }
+ document.SetCaseFolder(std::move(pcft));
+ }
+
+ Sci::Position FindNeedle(const std::string &needle, FindOption options, Sci::Position *length) {
+ assert(*length == static_cast<Sci::Position>(needle.length()));
+ return document.FindText(0, document.Length(), needle.c_str(), options, length);
+ }
+ Sci::Position FindNeedleReverse(const std::string &needle, FindOption options, Sci::Position *length) {
+ assert(*length == static_cast<Sci::Position>(needle.length()));
+ return document.FindText(document.Length(), 0, needle.c_str(), options, length);
+ }
};
TEST_CASE("Document") {
@@ -74,14 +113,130 @@ TEST_CASE("Document") {
REQUIRE(!doc.document.CanRedo());
}
+ // Search ranges are from first argument to just before second argument
+ // Arguments are expected to be at character boundaries and will be tweaked if
+ // part way through a character.
+ SECTION("SearchInLatin") {
+ DocPlus doc("abcde", 0); // a b c d e
+ std::string finding = "b";
+ Sci::Position lengthFinding = finding.length();
+ Sci::Position location = doc.FindNeedle(finding, FindOption::MatchCase, &lengthFinding);
+ REQUIRE(location == 1);
+ location = doc.FindNeedleReverse(finding, FindOption::MatchCase, &lengthFinding);
+ REQUIRE(location == 1);
+ location = doc.document.FindText(0, 2, finding.c_str(), FindOption::MatchCase, &lengthFinding);
+ REQUIRE(location == 1);
+ location = doc.document.FindText(0, 1, finding.c_str(), FindOption::MatchCase, &lengthFinding);
+ REQUIRE(location == -1);
+ }
+
+ SECTION("InsensitiveSearchInLatin") {
+ DocPlus doc("abcde", 0); // a b c d e
+ std::string finding = "B";
+ Sci::Position lengthFinding = finding.length();
+ Sci::Position location = doc.FindNeedle(finding, FindOption::None, &lengthFinding);
+ REQUIRE(location == 1);
+ location = doc.FindNeedleReverse(finding, FindOption::None, &lengthFinding);
+ REQUIRE(location == 1);
+ location = doc.document.FindText(0, 2, finding.c_str(), FindOption::None, &lengthFinding);
+ REQUIRE(location == 1);
+ location = doc.document.FindText(0, 1, finding.c_str(), FindOption::None, &lengthFinding);
+ REQUIRE(location == -1);
+ }
+
+ SECTION("InsensitiveSearchIn1252") {
+ // In Windows Latin, code page 1252, C6 is AE and E6 is ae
+ DocPlus doc("tru\xc6s\xe6t", 0); // t r u AE s ae t
+ doc.SetSBCSFoldings(foldings1252, std::size(foldings1252));
+
+ // Search for upper-case AE
+ std::string finding = "\xc6";
+ Sci::Position lengthFinding = finding.length();
+ Sci::Position location = doc.FindNeedle(finding, FindOption::None, &lengthFinding);
+ REQUIRE(location == 3);
+ location = doc.document.FindText(4, doc.document.Length(), finding.c_str(), FindOption::None, &lengthFinding);
+ REQUIRE(location == 5);
+ location = doc.FindNeedleReverse(finding, FindOption::None, &lengthFinding);
+ REQUIRE(location == 5);
+
+ // Search for lower-case ae
+ finding = "\xe6";
+ location = doc.FindNeedle(finding, FindOption::None, &lengthFinding);
+ REQUIRE(location == 3);
+ location = doc.document.FindText(4, doc.document.Length(), finding.c_str(), FindOption::None, &lengthFinding);
+ REQUIRE(location == 5);
+ location = doc.FindNeedleReverse(finding, FindOption::None, &lengthFinding);
+ REQUIRE(location == 5);
+ }
+
+ SECTION("Search2InLatin") {
+ // Checks that the initial '_' and final 'f' are ignored since they are outside the search bounds
+ DocPlus doc("_abcdef", 0); // _ a b c d e f
+ std::string finding = "cd";
+ Sci::Position lengthFinding = finding.length();
+ size_t docLength = doc.document.Length() - 1;
+ Sci::Position location = doc.document.FindText(1, docLength, finding.c_str(), FindOption::MatchCase, &lengthFinding);
+ REQUIRE(location == 3);
+ location = doc.document.FindText(docLength, 1, finding.c_str(), FindOption::MatchCase, &lengthFinding);
+ REQUIRE(location == 3);
+ location = doc.document.FindText(docLength, 1, "bc", FindOption::MatchCase, &lengthFinding);
+ REQUIRE(location == 2);
+ location = doc.document.FindText(docLength, 1, "ab", FindOption::MatchCase, &lengthFinding);
+ REQUIRE(location == 1);
+ location = doc.document.FindText(docLength, 1, "de", FindOption::MatchCase, &lengthFinding);
+ REQUIRE(location == 4);
+ location = doc.document.FindText(docLength, 1, "_a", FindOption::MatchCase, &lengthFinding);
+ REQUIRE(location == -1);
+ location = doc.document.FindText(docLength, 1, "ef", FindOption::MatchCase, &lengthFinding);
+ REQUIRE(location == -1);
+ lengthFinding = 3;
+ location = doc.document.FindText(docLength, 1, "cde", FindOption::MatchCase, &lengthFinding);
+ REQUIRE(location == 3);
+ }
+
SECTION("SearchInUTF8") {
DocPlus doc("ab\xCE\x93" "d", CpUtf8); // a b gamma d
- std::string finding = "b";
+ const std::string finding = "b";
Sci::Position lengthFinding = finding.length();
- Sci::Position location = doc.document.FindText(0, doc.document.Length(), finding.c_str(), FindOption::MatchCase, &lengthFinding);
+ Sci::Position location = doc.FindNeedle(finding, FindOption::MatchCase, &lengthFinding);
REQUIRE(location == 1);
location = doc.document.FindText(doc.document.Length(), 0, finding.c_str(), FindOption::MatchCase, &lengthFinding);
REQUIRE(location == 1);
+ location = doc.document.FindText(0, 1, finding.c_str(), FindOption::MatchCase, &lengthFinding);
+ REQUIRE(location == -1);
+ // Check doesn't try to follow a lead-byte past the search end
+ const std::string findingUTF = "\xCE\x93";
+ lengthFinding = findingUTF.length();
+ location = doc.document.FindText(0, 4, findingUTF.c_str(), FindOption::MatchCase, &lengthFinding);
+ REQUIRE(location == 2);
+ // Only succeeds as 3 is partway through character so adjusted to 4
+ location = doc.document.FindText(0, 3, findingUTF.c_str(), FindOption::MatchCase, &lengthFinding);
+ REQUIRE(location == 2);
+ location = doc.document.FindText(0, 2, findingUTF.c_str(), FindOption::MatchCase, &lengthFinding);
+ REQUIRE(location == -1);
+ }
+
+ SECTION("InsensitiveSearchInUTF8") {
+ DocPlus doc("ab\xCE\x93" "d", CpUtf8); // a b gamma d
+ const std::string finding = "b";
+ Sci::Position lengthFinding = finding.length();
+ Sci::Position location = doc.FindNeedle(finding, FindOption::None, &lengthFinding);
+ REQUIRE(location == 1);
+ location = doc.document.FindText(doc.document.Length(), 0, finding.c_str(), FindOption::None, &lengthFinding);
+ REQUIRE(location == 1);
+ const std::string findingUTF = "\xCE\x93";
+ lengthFinding = findingUTF.length();
+ location = doc.FindNeedle(findingUTF, FindOption::None, &lengthFinding);
+ REQUIRE(location == 2);
+ location = doc.document.FindText(doc.document.Length(), 0, findingUTF.c_str(), FindOption::None, &lengthFinding);
+ REQUIRE(location == 2);
+ location = doc.document.FindText(0, 4, findingUTF.c_str(), FindOption::None, &lengthFinding);
+ REQUIRE(location == 2);
+ // Only succeeds as 3 is partway through character so adjusted to 4
+ location = doc.document.FindText(0, 3, findingUTF.c_str(), FindOption::None, &lengthFinding);
+ REQUIRE(location == 2);
+ location = doc.document.FindText(0, 2, findingUTF.c_str(), FindOption::None, &lengthFinding);
+ REQUIRE(location == -1);
}
SECTION("SearchInShiftJIS") {
@@ -92,7 +247,7 @@ TEST_CASE("Document") {
std::string finding = "b";
// Search forwards
Sci::Position lengthFinding = finding.length();
- Sci::Position location = doc.document.FindText(0, doc.document.Length(), finding.c_str(), FindOption::MatchCase, &lengthFinding);
+ Sci::Position location = doc.FindNeedle(finding, FindOption::MatchCase, &lengthFinding);
REQUIRE(location == 1);
// Search backwards
lengthFinding = finding.length();
@@ -100,6 +255,36 @@ TEST_CASE("Document") {
REQUIRE(location == 1);
}
+ SECTION("InsensitiveSearchInShiftJIS") {
+ // {CJK UNIFIED IDEOGRAPH-9955} is two bytes: {0xE9, 'b'} in Shift-JIS
+ // The 'b' can be incorrectly matched by the search string 'b' when the search
+ // does not iterate the text correctly.
+ DocPlus doc("ab\xe9" "b ", 932); // a b {CJK UNIFIED IDEOGRAPH-9955} {space}
+ std::string finding = "b";
+ // Search forwards
+ Sci::Position lengthFinding = finding.length();
+ Sci::Position location = doc.FindNeedle(finding, FindOption::None, &lengthFinding);
+ REQUIRE(location == 1);
+ // Search backwards
+ lengthFinding = finding.length();
+ location = doc.document.FindText(doc.document.Length(), 0, finding.c_str(), FindOption::None, &lengthFinding);
+ REQUIRE(location == 1);
+ std::string finding932 = "\xe9" "b";
+ // Search forwards
+ lengthFinding = finding932.length();
+ location = doc.FindNeedle(finding932, FindOption::None, &lengthFinding);
+ REQUIRE(location == 2);
+ // Search backwards
+ lengthFinding = finding932.length();
+ location = doc.document.FindText(doc.document.Length(), 0, finding932.c_str(), FindOption::None, &lengthFinding);
+ REQUIRE(location == 2);
+ location = doc.document.FindText(0, 3, finding932.c_str(), FindOption::None, &lengthFinding);
+ REQUIRE(location == 2);
+ location = doc.document.FindText(0, 2, finding932.c_str(), FindOption::None, &lengthFinding);
+ REQUIRE(location == -1);
+ // Can not test case mapping of double byte text as folder available here does not implement this
+ }
+
SECTION("GetCharacterAndWidth") {
Document doc(DocumentOption::Default);
doc.SetDBCSCodePage(932);