aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorZufu Liu <unknown>2023-12-21 16:00:00 +1100
committerZufu Liu <unknown>2023-12-21 16:00:00 +1100
commit8e55cc0c973cc2fbaac8cca1505524b86ce58dff (patch)
tree0cf01d056b0c92de62b811466495a42d82a7879b
parent1b153f8d8d4b2f09afc2d039256c958e94bd3b05 (diff)
downloadscintilla-mirror-8e55cc0c973cc2fbaac8cca1505524b86ce58dff.tar.gz
Bug [#2405]. Avoid character fragments in regular expression search results.
-rw-r--r--doc/ScintillaHistory.html4
-rw-r--r--src/Document.cxx9
-rw-r--r--src/RESearch.cxx16
-rw-r--r--src/RESearch.h1
-rw-r--r--test/unit/testDocument.cxx38
-rw-r--r--test/unit/testRESearch.cxx3
6 files changed, 65 insertions, 6 deletions
diff --git a/doc/ScintillaHistory.html b/doc/ScintillaHistory.html
index a3d44d4ae..a7df63414 100644
--- a/doc/ScintillaHistory.html
+++ b/doc/ScintillaHistory.html
@@ -629,6 +629,10 @@
<a href="https://sourceforge.net/p/scintilla/bugs/2405/">Bug #2405</a>.
</li>
<li>
+ Avoid character fragments in regular expression search results.
+ <a href="https://sourceforge.net/p/scintilla/bugs/2405/">Bug #2405</a>.
+ </li>
+ <li>
With a document that does not have the SC_DOCUMENTOPTION_TEXT_LARGE option set,
allocating more than 2G (calling SCI_ALLOCATE or similar) will now fail with SC_STATUS_FAILURE.
</li>
diff --git a/src/Document.cxx b/src/Document.cxx
index d67cac25e..aea2cfd0b 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -2883,6 +2883,9 @@ public:
else
return pdoc->CharAt(index);
}
+ Sci::Position MovePositionOutsideChar(Sci::Position pos, Sci::Position moveDir) const noexcept override {
+ return pdoc->MovePositionOutsideChar(pos, moveDir, false);
+ }
};
#ifndef NO_CXX11_REGEX
@@ -3277,8 +3280,7 @@ Sci::Position BuiltinRegex::FindText(Document *doc, Sci::Position minPos, Sci::P
search.SetLineRange(lineStartPos, lineEndPos);
int success = search.Execute(di, startOfLine, endOfLine);
if (success) {
- // Ensure only whole characters selected
- Sci::Position endPos = doc->MovePositionOutsideChar(search.eopat[0], 1, false);
+ Sci::Position endPos = search.eopat[0];
// There can be only one start of a line, so no need to look for last match in line
if ((resr.increment == -1) && !searchforLineStart) {
// Check for the last match on this line.
@@ -3292,14 +3294,13 @@ Sci::Position BuiltinRegex::FindText(Document *doc, Sci::Position minPos, Sci::P
}
success = search.Execute(di, pos, endOfLine);
if (success) {
- endPos = doc->MovePositionOutsideChar(search.eopat[0], 1, false);
+ endPos = search.eopat[0];
} else {
search.bopat = bopat;
search.eopat = eopat;
}
}
}
- search.eopat[0] = endPos;
pos = search.bopat[0];
lenRet = endPos - pos;
break;
diff --git a/src/RESearch.cxx b/src/RESearch.cxx
index 7b2701aba..5a509ab6e 100644
--- a/src/RESearch.cxx
+++ b/src/RESearch.cxx
@@ -769,8 +769,15 @@ int RESearch::Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Positio
default: /* regular matching all the way. */
while (lp < endp) {
ep = PMatch(ci, lp, endp, ap);
- if (ep != NOTFOUND)
- break;
+ if (ep != NOTFOUND) {
+ // fix match started from middle of character like DBCS trailing ASCII byte
+ const Sci::Position pos = ci.MovePositionOutsideChar(lp, -1);
+ if (pos != lp) {
+ ep = NOTFOUND;
+ } else {
+ break;
+ }
+ }
lp++;
}
break;
@@ -791,6 +798,7 @@ int RESearch::Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Positio
}
}
+ ep = ci.MovePositionOutsideChar(ep, 1);
bopat[0] = lp;
eopat[0] = ep;
return 1;
@@ -865,9 +873,13 @@ Sci::Position RESearch::PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci
return NOTFOUND;
break;
case BOT:
+ if (lp != ci.MovePositionOutsideChar(lp, -1)) {
+ return NOTFOUND;
+ }
bopat[static_cast<unsigned char>(*ap++)] = lp;
break;
case EOT:
+ lp = ci.MovePositionOutsideChar(lp, 1);
eopat[static_cast<unsigned char>(*ap++)] = lp;
break;
case BOW:
diff --git a/src/RESearch.h b/src/RESearch.h
index e3a9c8110..a6b9ac22e 100644
--- a/src/RESearch.h
+++ b/src/RESearch.h
@@ -14,6 +14,7 @@ namespace Scintilla::Internal {
class CharacterIndexer {
public:
virtual char CharAt(Sci::Position index) const=0;
+ virtual Sci::Position MovePositionOutsideChar(Sci::Position pos, [[maybe_unused]] Sci::Position moveDir) const noexcept=0;
};
class RESearch {
diff --git a/test/unit/testDocument.cxx b/test/unit/testDocument.cxx
index ba7bb8616..a3fca9f5e 100644
--- a/test/unit/testDocument.cxx
+++ b/test/unit/testDocument.cxx
@@ -682,6 +682,44 @@ TEST_CASE("Document") {
#endif
}
+ SECTION("RESearchMovePositionOutsideCharUTF8") {
+ DocPlus doc(" a\xCE\x93\xCE\x93z ", CpUtf8);// a gamma gamma z
+ const Sci::Position docLength = doc.document.Length();
+ constexpr std::string_view finding = R"([a-z](\w)\1)";
+
+ Match match = doc.FindString(0, docLength, finding, rePosix);
+ REQUIRE(match == Match(1, 5));
+
+ constexpr std::string_view substituteText = R"(\t\1\n)";
+ std::string substituted = doc.Substitute(substituteText);
+ REQUIRE(substituted == "\t\xCE\x93\n");
+
+ #ifndef NO_CXX11_REGEX
+ match = doc.FindString(0, docLength, finding, reCxx11);
+ REQUIRE(match == Match(1, 5));
+
+ substituted = doc.Substitute(substituteText);
+ REQUIRE(substituted == "\t\xCE\x93\n");
+ #endif
+ }
+
+ SECTION("RESearchMovePositionOutsideCharDBCS") {
+ DocPlus doc(" \x98\x61xx 1aa\x83\xA1\x83\xA1z ", 932);// U+548C xx 1aa gamma gamma z
+ const Sci::Position docLength = doc.document.Length();
+
+ Match match = doc.FindString(0, docLength, R"([a-z](\w)\1)", rePosix);
+ REQUIRE(match == Match(8, 5));
+
+ constexpr std::string_view substituteText = R"(\t\1\n)";
+ std::string substituted = doc.Substitute(substituteText);
+ REQUIRE(substituted == "\t\x83\xA1\n");
+
+ match = doc.FindString(0, docLength, R"(\w([a-z])\1)", rePosix);
+ REQUIRE(match == Match(6, 3));
+
+ substituted = doc.Substitute(substituteText);
+ REQUIRE(substituted == "\ta\n");
+ }
}
diff --git a/test/unit/testRESearch.cxx b/test/unit/testRESearch.cxx
index 9bc1b43bb..1902fdb2b 100644
--- a/test/unit/testRESearch.cxx
+++ b/test/unit/testRESearch.cxx
@@ -41,6 +41,9 @@ public:
char CharAt(Sci::Position index) const override {
return s.at(index);
}
+ Sci::Position MovePositionOutsideChar(Sci::Position pos, [[maybe_unused]] Sci::Position moveDir) const noexcept override {
+ return pos;
+ }
std::string GetCharRange(Sci::Position position, Sci::Position lengthRetrieve) const {
return s.substr(position, lengthRetrieve);
}