diff options
author | Zufu Liu <unknown> | 2023-12-21 16:00:00 +1100 |
---|---|---|
committer | Zufu Liu <unknown> | 2023-12-21 16:00:00 +1100 |
commit | 8e55cc0c973cc2fbaac8cca1505524b86ce58dff (patch) | |
tree | 0cf01d056b0c92de62b811466495a42d82a7879b | |
parent | 1b153f8d8d4b2f09afc2d039256c958e94bd3b05 (diff) | |
download | scintilla-mirror-8e55cc0c973cc2fbaac8cca1505524b86ce58dff.tar.gz |
Bug [#2405]. Avoid character fragments in regular expression search results.
-rw-r--r-- | doc/ScintillaHistory.html | 4 | ||||
-rw-r--r-- | src/Document.cxx | 9 | ||||
-rw-r--r-- | src/RESearch.cxx | 16 | ||||
-rw-r--r-- | src/RESearch.h | 1 | ||||
-rw-r--r-- | test/unit/testDocument.cxx | 38 | ||||
-rw-r--r-- | test/unit/testRESearch.cxx | 3 |
6 files changed, 65 insertions, 6 deletions
diff --git a/doc/ScintillaHistory.html b/doc/ScintillaHistory.html index a3d44d4ae..a7df63414 100644 --- a/doc/ScintillaHistory.html +++ b/doc/ScintillaHistory.html @@ -629,6 +629,10 @@ <a href="https://sourceforge.net/p/scintilla/bugs/2405/">Bug #2405</a>. </li> <li> + Avoid character fragments in regular expression search results. + <a href="https://sourceforge.net/p/scintilla/bugs/2405/">Bug #2405</a>. + </li> + <li> With a document that does not have the SC_DOCUMENTOPTION_TEXT_LARGE option set, allocating more than 2G (calling SCI_ALLOCATE or similar) will now fail with SC_STATUS_FAILURE. </li> diff --git a/src/Document.cxx b/src/Document.cxx index d67cac25e..aea2cfd0b 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -2883,6 +2883,9 @@ public: else return pdoc->CharAt(index); } + Sci::Position MovePositionOutsideChar(Sci::Position pos, Sci::Position moveDir) const noexcept override { + return pdoc->MovePositionOutsideChar(pos, moveDir, false); + } }; #ifndef NO_CXX11_REGEX @@ -3277,8 +3280,7 @@ Sci::Position BuiltinRegex::FindText(Document *doc, Sci::Position minPos, Sci::P search.SetLineRange(lineStartPos, lineEndPos); int success = search.Execute(di, startOfLine, endOfLine); if (success) { - // Ensure only whole characters selected - Sci::Position endPos = doc->MovePositionOutsideChar(search.eopat[0], 1, false); + Sci::Position endPos = search.eopat[0]; // There can be only one start of a line, so no need to look for last match in line if ((resr.increment == -1) && !searchforLineStart) { // Check for the last match on this line. @@ -3292,14 +3294,13 @@ Sci::Position BuiltinRegex::FindText(Document *doc, Sci::Position minPos, Sci::P } success = search.Execute(di, pos, endOfLine); if (success) { - endPos = doc->MovePositionOutsideChar(search.eopat[0], 1, false); + endPos = search.eopat[0]; } else { search.bopat = bopat; search.eopat = eopat; } } } - search.eopat[0] = endPos; pos = search.bopat[0]; lenRet = endPos - pos; break; diff --git a/src/RESearch.cxx b/src/RESearch.cxx index 7b2701aba..5a509ab6e 100644 --- a/src/RESearch.cxx +++ b/src/RESearch.cxx @@ -769,8 +769,15 @@ int RESearch::Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Positio default: /* regular matching all the way. */ while (lp < endp) { ep = PMatch(ci, lp, endp, ap); - if (ep != NOTFOUND) - break; + if (ep != NOTFOUND) { + // fix match started from middle of character like DBCS trailing ASCII byte + const Sci::Position pos = ci.MovePositionOutsideChar(lp, -1); + if (pos != lp) { + ep = NOTFOUND; + } else { + break; + } + } lp++; } break; @@ -791,6 +798,7 @@ int RESearch::Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Positio } } + ep = ci.MovePositionOutsideChar(ep, 1); bopat[0] = lp; eopat[0] = ep; return 1; @@ -865,9 +873,13 @@ Sci::Position RESearch::PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci return NOTFOUND; break; case BOT: + if (lp != ci.MovePositionOutsideChar(lp, -1)) { + return NOTFOUND; + } bopat[static_cast<unsigned char>(*ap++)] = lp; break; case EOT: + lp = ci.MovePositionOutsideChar(lp, 1); eopat[static_cast<unsigned char>(*ap++)] = lp; break; case BOW: diff --git a/src/RESearch.h b/src/RESearch.h index e3a9c8110..a6b9ac22e 100644 --- a/src/RESearch.h +++ b/src/RESearch.h @@ -14,6 +14,7 @@ namespace Scintilla::Internal { class CharacterIndexer { public: virtual char CharAt(Sci::Position index) const=0; + virtual Sci::Position MovePositionOutsideChar(Sci::Position pos, [[maybe_unused]] Sci::Position moveDir) const noexcept=0; }; class RESearch { diff --git a/test/unit/testDocument.cxx b/test/unit/testDocument.cxx index ba7bb8616..a3fca9f5e 100644 --- a/test/unit/testDocument.cxx +++ b/test/unit/testDocument.cxx @@ -682,6 +682,44 @@ TEST_CASE("Document") { #endif } + SECTION("RESearchMovePositionOutsideCharUTF8") { + DocPlus doc(" a\xCE\x93\xCE\x93z ", CpUtf8);// a gamma gamma z + const Sci::Position docLength = doc.document.Length(); + constexpr std::string_view finding = R"([a-z](\w)\1)"; + + Match match = doc.FindString(0, docLength, finding, rePosix); + REQUIRE(match == Match(1, 5)); + + constexpr std::string_view substituteText = R"(\t\1\n)"; + std::string substituted = doc.Substitute(substituteText); + REQUIRE(substituted == "\t\xCE\x93\n"); + + #ifndef NO_CXX11_REGEX + match = doc.FindString(0, docLength, finding, reCxx11); + REQUIRE(match == Match(1, 5)); + + substituted = doc.Substitute(substituteText); + REQUIRE(substituted == "\t\xCE\x93\n"); + #endif + } + + SECTION("RESearchMovePositionOutsideCharDBCS") { + DocPlus doc(" \x98\x61xx 1aa\x83\xA1\x83\xA1z ", 932);// U+548C xx 1aa gamma gamma z + const Sci::Position docLength = doc.document.Length(); + + Match match = doc.FindString(0, docLength, R"([a-z](\w)\1)", rePosix); + REQUIRE(match == Match(8, 5)); + + constexpr std::string_view substituteText = R"(\t\1\n)"; + std::string substituted = doc.Substitute(substituteText); + REQUIRE(substituted == "\t\x83\xA1\n"); + + match = doc.FindString(0, docLength, R"(\w([a-z])\1)", rePosix); + REQUIRE(match == Match(6, 3)); + + substituted = doc.Substitute(substituteText); + REQUIRE(substituted == "\ta\n"); + } } diff --git a/test/unit/testRESearch.cxx b/test/unit/testRESearch.cxx index 9bc1b43bb..1902fdb2b 100644 --- a/test/unit/testRESearch.cxx +++ b/test/unit/testRESearch.cxx @@ -41,6 +41,9 @@ public: char CharAt(Sci::Position index) const override { return s.at(index); } + Sci::Position MovePositionOutsideChar(Sci::Position pos, [[maybe_unused]] Sci::Position moveDir) const noexcept override { + return pos; + } std::string GetCharRange(Sci::Position position, Sci::Position lengthRetrieve) const { return s.substr(position, lengthRetrieve); } |