diff options
-rw-r--r-- | doc/ScintillaHistory.html | 5 | ||||
-rw-r--r-- | src/Document.cxx | 7 | ||||
-rw-r--r-- | src/RESearch.cxx | 29 | ||||
-rw-r--r-- | src/RESearch.h | 8 | ||||
-rw-r--r-- | test/unit/testDocument.cxx | 130 |
5 files changed, 167 insertions, 12 deletions
diff --git a/doc/ScintillaHistory.html b/doc/ScintillaHistory.html index b37d6bbf0..ed14dd626 100644 --- a/doc/ScintillaHistory.html +++ b/doc/ScintillaHistory.html @@ -605,6 +605,11 @@ <a href="https://sourceforge.net/p/scintilla/feature-requests/1502/">Feature #1502</a>. </li> <li> + Fix regular expression search for "\<" matching beginning of search when not beginning of word and + for "\>" not matching line end. + <a href="https://sourceforge.net/p/scintilla/bugs/2157/">Bug #2157</a>. + </li> + <li> Fix regular expression search failure when search for "\<" followed by search for "\>". <a href="https://sourceforge.net/p/scintilla/bugs/2413/">Bug #2413</a>. </li> diff --git a/src/Document.cxx b/src/Document.cxx index a35105669..16595eafa 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -3232,8 +3232,10 @@ Sci::Position BuiltinRegex::FindText(Document *doc, Sci::Position minPos, Sci::P const char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0'; const bool searchforLineEnd = (searchEnd == '$') && (searchEndPrev != '\\'); for (Sci::Line line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) { - Sci::Position startOfLine = doc->LineStart(line); - Sci::Position endOfLine = doc->LineEnd(line); + const Sci::Position lineStartPos = doc->LineStart(line); + const Sci::Position lineEndPos = doc->LineEnd(line); + Sci::Position startOfLine = lineStartPos; + Sci::Position endOfLine = lineEndPos; if (resr.increment == 1) { if (line == resr.lineRangeStart) { if ((resr.startPos != startOfLine) && searchforLineStart) @@ -3259,6 +3261,7 @@ Sci::Position BuiltinRegex::FindText(Document *doc, Sci::Position minPos, Sci::P } const DocumentIndexer di(doc, endOfLine); + search.SetLineRange(lineStartPos, lineEndPos); int success = search.Execute(di, startOfLine, endOfLine); if (success) { pos = search.bopat[0]; diff --git a/src/RESearch.cxx b/src/RESearch.cxx index de4dc08a9..7b2701aba 100644 --- a/src/RESearch.cxx +++ b/src/RESearch.cxx @@ -253,7 +253,8 @@ RESearch::RESearch(CharClassify *charClassTable) { failure = 0; charClass = charClassTable; sta = NOP; /* status of lastpat */ - bol = 0; + lineStartPos = 0; + lineEndPos = 0; nfa[0] = END; Clear(); } @@ -740,7 +741,6 @@ int RESearch::Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Positio Sci::Position ep = NOTFOUND; const char * const ap = nfa; - bol = lp; failure = 0; Clear(); @@ -751,7 +751,7 @@ int RESearch::Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Positio ep = PMatch(ci, lp, endp, ap); break; case EOL: /* just searching for end of line normal path doesn't work */ - if (ap[1] == END) { + if (endp == lineEndPos && ap[1] == END) { lp = endp; ep = lp; break; @@ -777,8 +777,19 @@ int RESearch::Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Positio case END: /* munged automaton. fail always */ return 0; } - if (ep == NOTFOUND) - return 0; + if (ep == NOTFOUND) { + /* similar to EOL, match EOW at line end */ + if (endp == lineEndPos && *ap == EOW) { + if ((ap[1] == END || ((ap[1] == EOL && ap[2] == END))) && iswordc(ci.CharAt(lp - 1))) { + lp = endp; + ep = lp; + } else { + return 0; + } + } else { + return 0; + } + } bopat[0] = lp; eopat[0] = ep; @@ -846,11 +857,11 @@ Sci::Position RESearch::PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci ap += BITBLK; break; case BOL: - if (lp != bol) + if (lp != lineStartPos) return NOTFOUND; break; case EOL: - if (lp < endp) + if (lp < lineEndPos) return NOTFOUND; break; case BOT: @@ -860,11 +871,11 @@ Sci::Position RESearch::PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci eopat[static_cast<unsigned char>(*ap++)] = lp; break; case BOW: - if ((lp!=bol && iswordc(ci.CharAt(lp-1))) || !iswordc(ci.CharAt(lp))) + if ((lp!=lineStartPos && iswordc(ci.CharAt(lp-1))) || !iswordc(ci.CharAt(lp))) return NOTFOUND; break; case EOW: - if (lp==bol || !iswordc(ci.CharAt(lp-1)) || iswordc(ci.CharAt(lp))) + if (lp==lineStartPos || !iswordc(ci.CharAt(lp-1)) || iswordc(ci.CharAt(lp))) return NOTFOUND; break; case REF: { diff --git a/src/RESearch.h b/src/RESearch.h index b7955dc79..e3a9c8110 100644 --- a/src/RESearch.h +++ b/src/RESearch.h @@ -24,6 +24,10 @@ public: void Clear(); const char *Compile(const char *pattern, Sci::Position length, bool caseSensitive, bool posix); int Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Position endp); + void SetLineRange(Sci::Position startPos, Sci::Position endPos) noexcept { + lineStartPos = startPos; + lineEndPos = endPos; + } static constexpr int MAXTAG = 10; static constexpr int NOTFOUND = -1; @@ -47,7 +51,9 @@ private: Sci::Position PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci::Position endp, const char *ap); - Sci::Position bol; + // positions to match line start and line end + Sci::Position lineStartPos; + Sci::Position lineEndPos; char nfa[MAXNFA]; /* automaton */ int sta; int failure; diff --git a/test/unit/testDocument.cxx b/test/unit/testDocument.cxx index 85ed3b813..025de3012 100644 --- a/test/unit/testDocument.cxx +++ b/test/unit/testDocument.cxx @@ -512,6 +512,136 @@ TEST_CASE("Document") { REQUIRE(substituted == "\tb\xCE\x93y\n"); #endif } + + SECTION("RegexAssertion") { + DocPlus doc("ab cd ef\r\ngh ij kl", CpUtf8); + const Sci::Position docLength = doc.document.Length(); + constexpr std::string_view findingBOL = "^"; + Sci::Position lengthFinding = findingBOL.length(); + Sci::Position location = doc.document.FindText(0, docLength, findingBOL.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding); + REQUIRE(location == 0); + REQUIRE(lengthFinding == 0); + lengthFinding = findingBOL.length(); + location = doc.document.FindText(1, docLength, findingBOL.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding); + REQUIRE(location == 10); + REQUIRE(lengthFinding == 0); + lengthFinding = findingBOL.length(); + location = doc.document.FindText(docLength, 0, findingBOL.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding); + REQUIRE(location == 10); + REQUIRE(lengthFinding == 0); + lengthFinding = findingBOL.length(); + location = doc.document.FindText(docLength - 1, 0, findingBOL.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding); + REQUIRE(location == 10); + REQUIRE(lengthFinding == 0); + + #ifndef NO_CXX11_REGEX + lengthFinding = findingBOL.length(); + location = doc.document.FindText(0, docLength, findingBOL.data(), FindOption::RegExp | FindOption::Cxx11RegEx, &lengthFinding); + REQUIRE(location == 0); + REQUIRE(lengthFinding == 0); + lengthFinding = findingBOL.length(); + location = doc.document.FindText(1, docLength, findingBOL.data(), FindOption::RegExp | FindOption::Cxx11RegEx, &lengthFinding); + REQUIRE(location == 10); + REQUIRE(lengthFinding == 0); + #endif + + constexpr std::string_view findingEOL = "$"; + lengthFinding = findingEOL.length(); + location = doc.document.FindText(0, docLength, findingEOL.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding); + REQUIRE(location == 8); + REQUIRE(lengthFinding == 0); + lengthFinding = findingEOL.length(); + location = doc.document.FindText(1, docLength, findingEOL.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding); + REQUIRE(location == 8); + REQUIRE(lengthFinding == 0); + lengthFinding = findingEOL.length(); + location = doc.document.FindText(docLength, 0, findingEOL.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding); + REQUIRE(location == 18); + REQUIRE(lengthFinding == 0); + lengthFinding = findingEOL.length(); + location = doc.document.FindText(docLength - 1, 0, findingEOL.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding); + REQUIRE(location == 8); + REQUIRE(lengthFinding == 0); + + #ifndef NO_CXX11_REGEX + lengthFinding = findingEOL.length(); + location = doc.document.FindText(0, docLength, findingEOL.data(), FindOption::RegExp | FindOption::Cxx11RegEx, &lengthFinding); + REQUIRE(location == 8); + REQUIRE(lengthFinding == 0); + lengthFinding = findingEOL.length(); + location = doc.document.FindText(1, docLength, findingEOL.data(), FindOption::RegExp | FindOption::Cxx11RegEx, &lengthFinding); + REQUIRE(location == 8); + REQUIRE(lengthFinding == 0); + #endif + + constexpr std::string_view findingBOW = "\\<"; + lengthFinding = findingBOW.length(); + location = doc.document.FindText(0, docLength, findingBOW.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding); + REQUIRE(location == 0); + REQUIRE(lengthFinding == 0); + lengthFinding = findingBOW.length(); + location = doc.document.FindText(1, docLength, findingBOW.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding); + REQUIRE(location == 3); + REQUIRE(lengthFinding == 0); + lengthFinding = findingBOW.length(); + location = doc.document.FindText(docLength, 0, findingBOW.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding); + REQUIRE(location == 16); + REQUIRE(lengthFinding == 0); + lengthFinding = findingBOW.length(); + location = doc.document.FindText(docLength - 1, 0, findingBOW.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding); + REQUIRE(location == 16); + REQUIRE(lengthFinding == 0); + + constexpr std::string_view findingEOW = "\\>"; + lengthFinding = findingEOW.length(); + location = doc.document.FindText(0, docLength, findingEOW.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding); + REQUIRE(location == 2); + REQUIRE(lengthFinding == 0); + lengthFinding = findingEOW.length(); + location = doc.document.FindText(1, docLength, findingEOW.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding); + REQUIRE(location == 2); + REQUIRE(lengthFinding == 0); + lengthFinding = findingEOW.length(); + location = doc.document.FindText(docLength, 0, findingEOW.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding); + REQUIRE(location == 18); + REQUIRE(lengthFinding == 0); + lengthFinding = findingEOW.length(); + location = doc.document.FindText(docLength - 1, 0, findingEOW.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding); + REQUIRE(location == 15); + REQUIRE(lengthFinding == 0); + + constexpr std::string_view findingEOWEOL = "\\>$"; + lengthFinding = findingEOWEOL.length(); + location = doc.document.FindText(0, docLength, findingEOWEOL.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding); + REQUIRE(location == 8); + REQUIRE(lengthFinding == 0); + lengthFinding = findingEOWEOL.length(); + location = doc.document.FindText(10, docLength, findingEOWEOL.data(), FindOption::RegExp | FindOption::Posix, &lengthFinding); + REQUIRE(location == 18); + REQUIRE(lengthFinding == 0); + +#ifndef NO_CXX11_REGEX + constexpr std::string_view findingWB = "\\b"; + lengthFinding = findingWB.length(); + location = doc.document.FindText(0, docLength, findingWB.data(), FindOption::RegExp | FindOption::Cxx11RegEx, &lengthFinding); + REQUIRE(location == 0); + REQUIRE(lengthFinding == 0); + lengthFinding = findingWB.length(); + location = doc.document.FindText(1, docLength, findingWB.data(), FindOption::RegExp | FindOption::Cxx11RegEx, &lengthFinding); + REQUIRE(location == 1); + REQUIRE(lengthFinding == 0); + + constexpr std::string_view findingNWB = "\\B"; + lengthFinding = findingNWB.length(); + location = doc.document.FindText(0, docLength, findingNWB.data(), FindOption::RegExp | FindOption::Cxx11RegEx, &lengthFinding); + REQUIRE(location == 1); + REQUIRE(lengthFinding == 0); + lengthFinding = findingNWB.length(); + location = doc.document.FindText(1, docLength, findingNWB.data(), FindOption::RegExp | FindOption::Cxx11RegEx, &lengthFinding); + REQUIRE(location == 4); + REQUIRE(lengthFinding == 0); + #endif + } } TEST_CASE("Words") { |