diff options
author | Neil <nyamatongwe@gmail.com> | 2021-07-11 19:31:58 +1000 |
---|---|---|
committer | Neil <nyamatongwe@gmail.com> | 2021-07-11 19:31:58 +1000 |
commit | 667fb0b9f8485613f3d18f15dec3466d615c6097 (patch) | |
tree | d73330ade53dc91e109bfea06609c139b15491fa /test/unit/testDocument.cxx | |
parent | 79f3a8bee6d69323ee7c6c334856e1de9f4aefc4 (diff) | |
download | scintilla-mirror-667fb0b9f8485613f3d18f15dec3466d615c6097.tar.gz |
Add search tests with different encodings, case-matching options, and directions.
Diffstat (limited to 'test/unit/testDocument.cxx')
-rw-r--r-- | test/unit/testDocument.cxx | 193 |
1 files changed, 189 insertions, 4 deletions
diff --git a/test/unit/testDocument.cxx b/test/unit/testDocument.cxx index c5275d25f..44bb68b7a 100644 --- a/test/unit/testDocument.cxx +++ b/test/unit/testDocument.cxx @@ -36,10 +36,31 @@ using namespace Scintilla::Internal; // Test Document. +struct Folding { + int from; + int to; + int length; +}; + +// Table of case folding for non-ASCII bytes in Windows Latin code page 1252 +Folding foldings1252[] = { + {0x8a, 0x9a, 0x01}, + {0x8c, 0x9c, 0x01}, + {0x8e, 0x9e, 0x01}, + {0x9f, 0xff, 0x01}, + {0xc0, 0xe0, 0x17}, + {0xd8, 0xf8, 0x07}, +}; + struct DocPlus { Document document; DocPlus(std::string_view svInitial, int codePage) : document(DocumentOption::Default) { + SetCodePage(codePage); + document.InsertString(0, svInitial.data(), svInitial.length()); + } + + void SetCodePage(int codePage) { document.SetDBCSCodePage(codePage); if (codePage == CpUtf8) { document.SetCaseFolder(std::make_unique<CaseFolderUnicode>()); @@ -50,9 +71,27 @@ struct DocPlus { pcft->StandardASCII(); document.SetCaseFolder(std::move(pcft)); } - document.InsertString(0, svInitial.data(), svInitial.length()); } + void SetSBCSFoldings(const Folding *foldings, size_t length) { + std::unique_ptr<CaseFolderTable> pcft = std::make_unique<CaseFolderTable>(); + pcft->StandardASCII(); + for (size_t block = 0; block < length; block++) { + for (int fold = 0; fold < foldings[block].length; fold++) { + pcft->SetTranslation(foldings[block].from + fold, foldings[block].to + fold); + } + } + document.SetCaseFolder(std::move(pcft)); + } + + Sci::Position FindNeedle(const std::string &needle, FindOption options, Sci::Position *length) { + assert(*length == static_cast<Sci::Position>(needle.length())); + return document.FindText(0, document.Length(), needle.c_str(), options, length); + } + Sci::Position FindNeedleReverse(const std::string &needle, FindOption options, Sci::Position *length) { + assert(*length == static_cast<Sci::Position>(needle.length())); + return document.FindText(document.Length(), 0, needle.c_str(), options, length); + } }; TEST_CASE("Document") { @@ -74,14 +113,130 @@ TEST_CASE("Document") { REQUIRE(!doc.document.CanRedo()); } + // Search ranges are from first argument to just before second argument + // Arguments are expected to be at character boundaries and will be tweaked if + // part way through a character. + SECTION("SearchInLatin") { + DocPlus doc("abcde", 0); // a b c d e + std::string finding = "b"; + Sci::Position lengthFinding = finding.length(); + Sci::Position location = doc.FindNeedle(finding, FindOption::MatchCase, &lengthFinding); + REQUIRE(location == 1); + location = doc.FindNeedleReverse(finding, FindOption::MatchCase, &lengthFinding); + REQUIRE(location == 1); + location = doc.document.FindText(0, 2, finding.c_str(), FindOption::MatchCase, &lengthFinding); + REQUIRE(location == 1); + location = doc.document.FindText(0, 1, finding.c_str(), FindOption::MatchCase, &lengthFinding); + REQUIRE(location == -1); + } + + SECTION("InsensitiveSearchInLatin") { + DocPlus doc("abcde", 0); // a b c d e + std::string finding = "B"; + Sci::Position lengthFinding = finding.length(); + Sci::Position location = doc.FindNeedle(finding, FindOption::None, &lengthFinding); + REQUIRE(location == 1); + location = doc.FindNeedleReverse(finding, FindOption::None, &lengthFinding); + REQUIRE(location == 1); + location = doc.document.FindText(0, 2, finding.c_str(), FindOption::None, &lengthFinding); + REQUIRE(location == 1); + location = doc.document.FindText(0, 1, finding.c_str(), FindOption::None, &lengthFinding); + REQUIRE(location == -1); + } + + SECTION("InsensitiveSearchIn1252") { + // In Windows Latin, code page 1252, C6 is AE and E6 is ae + DocPlus doc("tru\xc6s\xe6t", 0); // t r u AE s ae t + doc.SetSBCSFoldings(foldings1252, std::size(foldings1252)); + + // Search for upper-case AE + std::string finding = "\xc6"; + Sci::Position lengthFinding = finding.length(); + Sci::Position location = doc.FindNeedle(finding, FindOption::None, &lengthFinding); + REQUIRE(location == 3); + location = doc.document.FindText(4, doc.document.Length(), finding.c_str(), FindOption::None, &lengthFinding); + REQUIRE(location == 5); + location = doc.FindNeedleReverse(finding, FindOption::None, &lengthFinding); + REQUIRE(location == 5); + + // Search for lower-case ae + finding = "\xe6"; + location = doc.FindNeedle(finding, FindOption::None, &lengthFinding); + REQUIRE(location == 3); + location = doc.document.FindText(4, doc.document.Length(), finding.c_str(), FindOption::None, &lengthFinding); + REQUIRE(location == 5); + location = doc.FindNeedleReverse(finding, FindOption::None, &lengthFinding); + REQUIRE(location == 5); + } + + SECTION("Search2InLatin") { + // Checks that the initial '_' and final 'f' are ignored since they are outside the search bounds + DocPlus doc("_abcdef", 0); // _ a b c d e f + std::string finding = "cd"; + Sci::Position lengthFinding = finding.length(); + size_t docLength = doc.document.Length() - 1; + Sci::Position location = doc.document.FindText(1, docLength, finding.c_str(), FindOption::MatchCase, &lengthFinding); + REQUIRE(location == 3); + location = doc.document.FindText(docLength, 1, finding.c_str(), FindOption::MatchCase, &lengthFinding); + REQUIRE(location == 3); + location = doc.document.FindText(docLength, 1, "bc", FindOption::MatchCase, &lengthFinding); + REQUIRE(location == 2); + location = doc.document.FindText(docLength, 1, "ab", FindOption::MatchCase, &lengthFinding); + REQUIRE(location == 1); + location = doc.document.FindText(docLength, 1, "de", FindOption::MatchCase, &lengthFinding); + REQUIRE(location == 4); + location = doc.document.FindText(docLength, 1, "_a", FindOption::MatchCase, &lengthFinding); + REQUIRE(location == -1); + location = doc.document.FindText(docLength, 1, "ef", FindOption::MatchCase, &lengthFinding); + REQUIRE(location == -1); + lengthFinding = 3; + location = doc.document.FindText(docLength, 1, "cde", FindOption::MatchCase, &lengthFinding); + REQUIRE(location == 3); + } + SECTION("SearchInUTF8") { DocPlus doc("ab\xCE\x93" "d", CpUtf8); // a b gamma d - std::string finding = "b"; + const std::string finding = "b"; Sci::Position lengthFinding = finding.length(); - Sci::Position location = doc.document.FindText(0, doc.document.Length(), finding.c_str(), FindOption::MatchCase, &lengthFinding); + Sci::Position location = doc.FindNeedle(finding, FindOption::MatchCase, &lengthFinding); REQUIRE(location == 1); location = doc.document.FindText(doc.document.Length(), 0, finding.c_str(), FindOption::MatchCase, &lengthFinding); REQUIRE(location == 1); + location = doc.document.FindText(0, 1, finding.c_str(), FindOption::MatchCase, &lengthFinding); + REQUIRE(location == -1); + // Check doesn't try to follow a lead-byte past the search end + const std::string findingUTF = "\xCE\x93"; + lengthFinding = findingUTF.length(); + location = doc.document.FindText(0, 4, findingUTF.c_str(), FindOption::MatchCase, &lengthFinding); + REQUIRE(location == 2); + // Only succeeds as 3 is partway through character so adjusted to 4 + location = doc.document.FindText(0, 3, findingUTF.c_str(), FindOption::MatchCase, &lengthFinding); + REQUIRE(location == 2); + location = doc.document.FindText(0, 2, findingUTF.c_str(), FindOption::MatchCase, &lengthFinding); + REQUIRE(location == -1); + } + + SECTION("InsensitiveSearchInUTF8") { + DocPlus doc("ab\xCE\x93" "d", CpUtf8); // a b gamma d + const std::string finding = "b"; + Sci::Position lengthFinding = finding.length(); + Sci::Position location = doc.FindNeedle(finding, FindOption::None, &lengthFinding); + REQUIRE(location == 1); + location = doc.document.FindText(doc.document.Length(), 0, finding.c_str(), FindOption::None, &lengthFinding); + REQUIRE(location == 1); + const std::string findingUTF = "\xCE\x93"; + lengthFinding = findingUTF.length(); + location = doc.FindNeedle(findingUTF, FindOption::None, &lengthFinding); + REQUIRE(location == 2); + location = doc.document.FindText(doc.document.Length(), 0, findingUTF.c_str(), FindOption::None, &lengthFinding); + REQUIRE(location == 2); + location = doc.document.FindText(0, 4, findingUTF.c_str(), FindOption::None, &lengthFinding); + REQUIRE(location == 2); + // Only succeeds as 3 is partway through character so adjusted to 4 + location = doc.document.FindText(0, 3, findingUTF.c_str(), FindOption::None, &lengthFinding); + REQUIRE(location == 2); + location = doc.document.FindText(0, 2, findingUTF.c_str(), FindOption::None, &lengthFinding); + REQUIRE(location == -1); } SECTION("SearchInShiftJIS") { @@ -92,7 +247,7 @@ TEST_CASE("Document") { std::string finding = "b"; // Search forwards Sci::Position lengthFinding = finding.length(); - Sci::Position location = doc.document.FindText(0, doc.document.Length(), finding.c_str(), FindOption::MatchCase, &lengthFinding); + Sci::Position location = doc.FindNeedle(finding, FindOption::MatchCase, &lengthFinding); REQUIRE(location == 1); // Search backwards lengthFinding = finding.length(); @@ -100,6 +255,36 @@ TEST_CASE("Document") { REQUIRE(location == 1); } + SECTION("InsensitiveSearchInShiftJIS") { + // {CJK UNIFIED IDEOGRAPH-9955} is two bytes: {0xE9, 'b'} in Shift-JIS + // The 'b' can be incorrectly matched by the search string 'b' when the search + // does not iterate the text correctly. + DocPlus doc("ab\xe9" "b ", 932); // a b {CJK UNIFIED IDEOGRAPH-9955} {space} + std::string finding = "b"; + // Search forwards + Sci::Position lengthFinding = finding.length(); + Sci::Position location = doc.FindNeedle(finding, FindOption::None, &lengthFinding); + REQUIRE(location == 1); + // Search backwards + lengthFinding = finding.length(); + location = doc.document.FindText(doc.document.Length(), 0, finding.c_str(), FindOption::None, &lengthFinding); + REQUIRE(location == 1); + std::string finding932 = "\xe9" "b"; + // Search forwards + lengthFinding = finding932.length(); + location = doc.FindNeedle(finding932, FindOption::None, &lengthFinding); + REQUIRE(location == 2); + // Search backwards + lengthFinding = finding932.length(); + location = doc.document.FindText(doc.document.Length(), 0, finding932.c_str(), FindOption::None, &lengthFinding); + REQUIRE(location == 2); + location = doc.document.FindText(0, 3, finding932.c_str(), FindOption::None, &lengthFinding); + REQUIRE(location == 2); + location = doc.document.FindText(0, 2, finding932.c_str(), FindOption::None, &lengthFinding); + REQUIRE(location == -1); + // Can not test case mapping of double byte text as folder available here does not implement this + } + SECTION("GetCharacterAndWidth") { Document doc(DocumentOption::Default); doc.SetDBCSCodePage(932); |