From 8e55cc0c973cc2fbaac8cca1505524b86ce58dff Mon Sep 17 00:00:00 2001 From: Zufu Liu Date: Thu, 21 Dec 2023 16:00:00 +1100 Subject: Bug [#2405]. Avoid character fragments in regular expression search results. --- test/unit/testDocument.cxx | 38 ++++++++++++++++++++++++++++++++++++++ test/unit/testRESearch.cxx | 3 +++ 2 files changed, 41 insertions(+) (limited to 'test') diff --git a/test/unit/testDocument.cxx b/test/unit/testDocument.cxx index ba7bb8616..a3fca9f5e 100644 --- a/test/unit/testDocument.cxx +++ b/test/unit/testDocument.cxx @@ -682,6 +682,44 @@ TEST_CASE("Document") { #endif } + SECTION("RESearchMovePositionOutsideCharUTF8") { + DocPlus doc(" a\xCE\x93\xCE\x93z ", CpUtf8);// a gamma gamma z + const Sci::Position docLength = doc.document.Length(); + constexpr std::string_view finding = R"([a-z](\w)\1)"; + + Match match = doc.FindString(0, docLength, finding, rePosix); + REQUIRE(match == Match(1, 5)); + + constexpr std::string_view substituteText = R"(\t\1\n)"; + std::string substituted = doc.Substitute(substituteText); + REQUIRE(substituted == "\t\xCE\x93\n"); + + #ifndef NO_CXX11_REGEX + match = doc.FindString(0, docLength, finding, reCxx11); + REQUIRE(match == Match(1, 5)); + + substituted = doc.Substitute(substituteText); + REQUIRE(substituted == "\t\xCE\x93\n"); + #endif + } + + SECTION("RESearchMovePositionOutsideCharDBCS") { + DocPlus doc(" \x98\x61xx 1aa\x83\xA1\x83\xA1z ", 932);// U+548C xx 1aa gamma gamma z + const Sci::Position docLength = doc.document.Length(); + + Match match = doc.FindString(0, docLength, R"([a-z](\w)\1)", rePosix); + REQUIRE(match == Match(8, 5)); + + constexpr std::string_view substituteText = R"(\t\1\n)"; + std::string substituted = doc.Substitute(substituteText); + REQUIRE(substituted == "\t\x83\xA1\n"); + + match = doc.FindString(0, docLength, R"(\w([a-z])\1)", rePosix); + REQUIRE(match == Match(6, 3)); + + substituted = doc.Substitute(substituteText); + REQUIRE(substituted == "\ta\n"); + } } diff --git a/test/unit/testRESearch.cxx b/test/unit/testRESearch.cxx index 9bc1b43bb..1902fdb2b 100644 --- a/test/unit/testRESearch.cxx +++ b/test/unit/testRESearch.cxx @@ -41,6 +41,9 @@ public: char CharAt(Sci::Position index) const override { return s.at(index); } + Sci::Position MovePositionOutsideChar(Sci::Position pos, [[maybe_unused]] Sci::Position moveDir) const noexcept override { + return pos; + } std::string GetCharRange(Sci::Position position, Sci::Position lengthRetrieve) const { return s.substr(position, lengthRetrieve); } -- cgit v1.2.3