aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorZufu Liu <unknown>2023-12-02 08:20:55 +1100
committerZufu Liu <unknown>2023-12-02 08:20:55 +1100
commite6538bb38cd509111f0f595f46e7d1ff71bcc017 (patch)
treecdf847b194f9431c64d2b527c7b216e7486aceb2
parentc8ca5050cf8c908e49d28eda5642542f7ac1155b (diff)
downloadscintilla-mirror-e6538bb38cd509111f0f595f46e7d1ff71bcc017.tar.gz
Bug [#2157]. Fix regular expression search for word begin \< and word end \>.
-rw-r--r--doc/ScintillaHistory.html5
-rw-r--r--src/Document.cxx7
-rw-r--r--src/RESearch.cxx29
-rw-r--r--src/RESearch.h8
-rw-r--r--test/unit/testDocument.cxx130
5 files changed, 167 insertions, 12 deletions
diff --git a/doc/ScintillaHistory.html b/doc/ScintillaHistory.html
index b37d6bbf0..ed14dd626 100644
--- a/doc/ScintillaHistory.html
+++ b/doc/ScintillaHistory.html
@@ -605,6 +605,11 @@
<a href="https://sourceforge.net/p/scintilla/feature-requests/1502/">Feature #1502</a>.
</li>
<li>
+ Fix regular expression search for "\&lt;" matching beginning of search when not beginning of word and
+ for "\&gt;" not matching line end.
+ <a href="https://sourceforge.net/p/scintilla/bugs/2157/">Bug #2157</a>.
+ </li>
+ <li>
Fix regular expression search failure when search for "\&lt;" followed by search for "\&gt;".
<a href="https://sourceforge.net/p/scintilla/bugs/2413/">Bug #2413</a>.
</li>
diff --git a/src/Document.cxx b/src/Document.cxx
index a35105669..16595eafa 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -3232,8 +3232,10 @@ Sci::Position BuiltinRegex::FindText(Document *doc, Sci::Position minPos, Sci::P
const char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
const bool searchforLineEnd = (searchEnd == '$') && (searchEndPrev != '\\');
for (Sci::Line line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
- Sci::Position startOfLine = doc->LineStart(line);
- Sci::Position endOfLine = doc->LineEnd(line);
+ const Sci::Position lineStartPos = doc->LineStart(line);
+ const Sci::Position lineEndPos = doc->LineEnd(line);
+ Sci::Position startOfLine = lineStartPos;
+ Sci::Position endOfLine = lineEndPos;
if (resr.increment == 1) {
if (line == resr.lineRangeStart) {
if ((resr.startPos != startOfLine) && searchforLineStart)
@@ -3259,6 +3261,7 @@ Sci::Position BuiltinRegex::FindText(Document *doc, Sci::Position minPos, Sci::P
}
const DocumentIndexer di(doc, endOfLine);
+ search.SetLineRange(lineStartPos, lineEndPos);
int success = search.Execute(di, startOfLine, endOfLine);
if (success) {
pos = search.bopat[0];
diff --git a/src/RESearch.cxx b/src/RESearch.cxx
index de4dc08a9..7b2701aba 100644
--- a/src/RESearch.cxx
+++ b/src/RESearch.cxx
@@ -253,7 +253,8 @@ RESearch::RESearch(CharClassify *charClassTable) {
failure = 0;
charClass = charClassTable;
sta = NOP; /* status of lastpat */
- bol = 0;
+ lineStartPos = 0;
+ lineEndPos = 0;
nfa[0] = END;
Clear();
}
@@ -740,7 +741,6 @@ int RESearch::Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Positio
Sci::Position ep = NOTFOUND;
const char * const ap = nfa;
- bol = lp;
failure = 0;
Clear();
@@ -751,7 +751,7 @@ int RESearch::Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Positio
ep = PMatch(ci, lp, endp, ap);
break;
case EOL: /* just searching for end of line normal path doesn't work */
- if (ap[1] == END) {
+ if (endp == lineEndPos && ap[1] == END) {
lp = endp;
ep = lp;
break;
@@ -777,8 +777,19 @@ int RESearch::Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Positio
case END: /* munged automaton. fail always */
return 0;
}
- if (ep == NOTFOUND)
- return 0;
+ if (ep == NOTFOUND) {
+ /* similar to EOL, match EOW at line end */
+ if (endp == lineEndPos && *ap == EOW) {
+ if ((ap[1] == END || ((ap[1] == EOL && ap[2] == END))) && iswordc(ci.CharAt(lp - 1))) {
+ lp = endp;
+ ep = lp;
+ } else {
+ return 0;
+ }
+ } else {
+ return 0;
+ }
+ }
bopat[0] = lp;
eopat[0] = ep;
@@ -846,11 +857,11 @@ Sci::Position RESearch::PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci
ap += BITBLK;
break;
case BOL:
- if (lp != bol)
+ if (lp != lineStartPos)
return NOTFOUND;
break;
case EOL:
- if (lp < endp)
+ if (lp < lineEndPos)
return NOTFOUND;
break;
case BOT:
@@ -860,11 +871,11 @@ Sci::Position RESearch::PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci
eopat[static_cast<unsigned char>(*ap++)] = lp;
break;
case BOW:
- if ((lp!=bol && iswordc(ci.CharAt(lp-1))) || !iswordc(ci.CharAt(lp)))
+ if ((lp!=lineStartPos && iswordc(ci.CharAt(lp-1))) || !iswordc(ci.CharAt(lp)))
return NOTFOUND;
break;
case EOW:
- if (lp==bol || !iswordc(ci.CharAt(lp-1)) || iswordc(ci.CharAt(lp)))
+ if (lp==lineStartPos || !iswordc(ci.CharAt(lp-1)) || iswordc(ci.CharAt(lp)))
return NOTFOUND;
break;
case REF: {
diff --git a/src/RESearch.h b/src/RESearch.h
index b7955dc79..e3a9c8110 100644
--- a/src/RESearch.h
+++ b/src/RESearch.h
@@ -24,6 +24,10 @@ public:
void Clear();
const char *Compile(const char *pattern, Sci::Position length, bool caseSensitive, bool posix);
int Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Position endp);
+ void SetLineRange(Sci::Position startPos, Sci::Position endPos) noexcept {
+ lineStartPos = startPos;
+ lineEndPos = endPos;
+ }
static constexpr int MAXTAG = 10;
static constexpr int NOTFOUND = -1;
@@ -47,7 +51,9 @@ private:
Sci::Position PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci::Position endp, const char *ap);
- Sci::Position bol;
+ // positions to match line start and line end
+ Sci::Position lineStartPos;
+ Sci::Position lineEndPos;
char nfa[MAXNFA]; /* automaton */
int sta;
int failure;
diff --git a/test/unit/testDocument.cxx b/test/unit/testDocument.cxx
index 85ed3b813..025de3012 100644
--- a/test/unit/testDocument.cxx
+++ b/test/unit/testDocument.cxx
@@ -512,6 +512,136 @@ TEST_CASE("Document") {
REQUIRE(substituted == "\tb\xCE\x93y\n");
#endif
}
+
+ SECTION("RegexAssertion") {
+ DocPlus doc("ab cd ef\r\ngh ij kl", CpUtf8);
+ const Sci::Position docLength = doc.document.Length();
+ constexpr std::string_view findingBOL = "^";
+ Sci::Position lengthFinding = findingBOL.length();
+ Sci::Position location = doc.document.FindText(0, docLength, findingBOL.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding);
+ REQUIRE(location == 0);
+ REQUIRE(lengthFinding == 0);
+ lengthFinding = findingBOL.length();
+ location = doc.document.FindText(1, docLength, findingBOL.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding);
+ REQUIRE(location == 10);
+ REQUIRE(lengthFinding == 0);
+ lengthFinding = findingBOL.length();
+ location = doc.document.FindText(docLength, 0, findingBOL.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding);
+ REQUIRE(location == 10);
+ REQUIRE(lengthFinding == 0);
+ lengthFinding = findingBOL.length();
+ location = doc.document.FindText(docLength - 1, 0, findingBOL.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding);
+ REQUIRE(location == 10);
+ REQUIRE(lengthFinding == 0);
+
+ #ifndef NO_CXX11_REGEX
+ lengthFinding = findingBOL.length();
+ location = doc.document.FindText(0, docLength, findingBOL.data(), FindOption::RegExp | FindOption::Cxx11RegEx, &lengthFinding);
+ REQUIRE(location == 0);
+ REQUIRE(lengthFinding == 0);
+ lengthFinding = findingBOL.length();
+ location = doc.document.FindText(1, docLength, findingBOL.data(), FindOption::RegExp | FindOption::Cxx11RegEx, &lengthFinding);
+ REQUIRE(location == 10);
+ REQUIRE(lengthFinding == 0);
+ #endif
+
+ constexpr std::string_view findingEOL = "$";
+ lengthFinding = findingEOL.length();
+ location = doc.document.FindText(0, docLength, findingEOL.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding);
+ REQUIRE(location == 8);
+ REQUIRE(lengthFinding == 0);
+ lengthFinding = findingEOL.length();
+ location = doc.document.FindText(1, docLength, findingEOL.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding);
+ REQUIRE(location == 8);
+ REQUIRE(lengthFinding == 0);
+ lengthFinding = findingEOL.length();
+ location = doc.document.FindText(docLength, 0, findingEOL.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding);
+ REQUIRE(location == 18);
+ REQUIRE(lengthFinding == 0);
+ lengthFinding = findingEOL.length();
+ location = doc.document.FindText(docLength - 1, 0, findingEOL.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding);
+ REQUIRE(location == 8);
+ REQUIRE(lengthFinding == 0);
+
+ #ifndef NO_CXX11_REGEX
+ lengthFinding = findingEOL.length();
+ location = doc.document.FindText(0, docLength, findingEOL.data(), FindOption::RegExp | FindOption::Cxx11RegEx, &lengthFinding);
+ REQUIRE(location == 8);
+ REQUIRE(lengthFinding == 0);
+ lengthFinding = findingEOL.length();
+ location = doc.document.FindText(1, docLength, findingEOL.data(), FindOption::RegExp | FindOption::Cxx11RegEx, &lengthFinding);
+ REQUIRE(location == 8);
+ REQUIRE(lengthFinding == 0);
+ #endif
+
+ constexpr std::string_view findingBOW = "\\<";
+ lengthFinding = findingBOW.length();
+ location = doc.document.FindText(0, docLength, findingBOW.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding);
+ REQUIRE(location == 0);
+ REQUIRE(lengthFinding == 0);
+ lengthFinding = findingBOW.length();
+ location = doc.document.FindText(1, docLength, findingBOW.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding);
+ REQUIRE(location == 3);
+ REQUIRE(lengthFinding == 0);
+ lengthFinding = findingBOW.length();
+ location = doc.document.FindText(docLength, 0, findingBOW.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding);
+ REQUIRE(location == 16);
+ REQUIRE(lengthFinding == 0);
+ lengthFinding = findingBOW.length();
+ location = doc.document.FindText(docLength - 1, 0, findingBOW.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding);
+ REQUIRE(location == 16);
+ REQUIRE(lengthFinding == 0);
+
+ constexpr std::string_view findingEOW = "\\>";
+ lengthFinding = findingEOW.length();
+ location = doc.document.FindText(0, docLength, findingEOW.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding);
+ REQUIRE(location == 2);
+ REQUIRE(lengthFinding == 0);
+ lengthFinding = findingEOW.length();
+ location = doc.document.FindText(1, docLength, findingEOW.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding);
+ REQUIRE(location == 2);
+ REQUIRE(lengthFinding == 0);
+ lengthFinding = findingEOW.length();
+ location = doc.document.FindText(docLength, 0, findingEOW.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding);
+ REQUIRE(location == 18);
+ REQUIRE(lengthFinding == 0);
+ lengthFinding = findingEOW.length();
+ location = doc.document.FindText(docLength - 1, 0, findingEOW.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding);
+ REQUIRE(location == 15);
+ REQUIRE(lengthFinding == 0);
+
+ constexpr std::string_view findingEOWEOL = "\\>$";
+ lengthFinding = findingEOWEOL.length();
+ location = doc.document.FindText(0, docLength, findingEOWEOL.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding);
+ REQUIRE(location == 8);
+ REQUIRE(lengthFinding == 0);
+ lengthFinding = findingEOWEOL.length();
+ location = doc.document.FindText(10, docLength, findingEOWEOL.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding);
+ REQUIRE(location == 18);
+ REQUIRE(lengthFinding == 0);
+
+#ifndef NO_CXX11_REGEX
+ constexpr std::string_view findingWB = "\\b";
+ lengthFinding = findingWB.length();
+ location = doc.document.FindText(0, docLength, findingWB.data(), FindOption::RegExp | FindOption::Cxx11RegEx, &lengthFinding);
+ REQUIRE(location == 0);
+ REQUIRE(lengthFinding == 0);
+ lengthFinding = findingWB.length();
+ location = doc.document.FindText(1, docLength, findingWB.data(), FindOption::RegExp | FindOption::Cxx11RegEx, &lengthFinding);
+ REQUIRE(location == 1);
+ REQUIRE(lengthFinding == 0);
+
+ constexpr std::string_view findingNWB = "\\B";
+ lengthFinding = findingNWB.length();
+ location = doc.document.FindText(0, docLength, findingNWB.data(), FindOption::RegExp | FindOption::Cxx11RegEx, &lengthFinding);
+ REQUIRE(location == 1);
+ REQUIRE(lengthFinding == 0);
+ lengthFinding = findingNWB.length();
+ location = doc.document.FindText(1, docLength, findingNWB.data(), FindOption::RegExp | FindOption::Cxx11RegEx, &lengthFinding);
+ REQUIRE(location == 4);
+ REQUIRE(lengthFinding == 0);
+ #endif
+ }
}
TEST_CASE("Words") {