From e6538bb38cd509111f0f595f46e7d1ff71bcc017 Mon Sep 17 00:00:00 2001 From: Zufu Liu Date: Sat, 2 Dec 2023 08:20:55 +1100 Subject: Bug [#2157]. Fix regular expression search for word begin \< and word end \>. --- src/Document.cxx | 7 +++++-- src/RESearch.cxx | 29 ++++++++++++++++++++--------- src/RESearch.h | 8 +++++++- 3 files changed, 32 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/Document.cxx b/src/Document.cxx index a35105669..16595eafa 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -3232,8 +3232,10 @@ Sci::Position BuiltinRegex::FindText(Document *doc, Sci::Position minPos, Sci::P const char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0'; const bool searchforLineEnd = (searchEnd == '$') && (searchEndPrev != '\\'); for (Sci::Line line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) { - Sci::Position startOfLine = doc->LineStart(line); - Sci::Position endOfLine = doc->LineEnd(line); + const Sci::Position lineStartPos = doc->LineStart(line); + const Sci::Position lineEndPos = doc->LineEnd(line); + Sci::Position startOfLine = lineStartPos; + Sci::Position endOfLine = lineEndPos; if (resr.increment == 1) { if (line == resr.lineRangeStart) { if ((resr.startPos != startOfLine) && searchforLineStart) @@ -3259,6 +3261,7 @@ Sci::Position BuiltinRegex::FindText(Document *doc, Sci::Position minPos, Sci::P } const DocumentIndexer di(doc, endOfLine); + search.SetLineRange(lineStartPos, lineEndPos); int success = search.Execute(di, startOfLine, endOfLine); if (success) { pos = search.bopat[0]; diff --git a/src/RESearch.cxx b/src/RESearch.cxx index de4dc08a9..7b2701aba 100644 --- a/src/RESearch.cxx +++ b/src/RESearch.cxx @@ -253,7 +253,8 @@ RESearch::RESearch(CharClassify *charClassTable) { failure = 0; charClass = charClassTable; sta = NOP; /* status of lastpat */ - bol = 0; + lineStartPos = 0; + lineEndPos = 0; nfa[0] = END; Clear(); } @@ -740,7 +741,6 @@ int RESearch::Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Positio Sci::Position ep = NOTFOUND; const char * const ap = nfa; - bol = lp; failure = 0; Clear(); @@ -751,7 +751,7 @@ int RESearch::Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Positio ep = PMatch(ci, lp, endp, ap); break; case EOL: /* just searching for end of line normal path doesn't work */ - if (ap[1] == END) { + if (endp == lineEndPos && ap[1] == END) { lp = endp; ep = lp; break; @@ -777,8 +777,19 @@ int RESearch::Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Positio case END: /* munged automaton. fail always */ return 0; } - if (ep == NOTFOUND) - return 0; + if (ep == NOTFOUND) { + /* similar to EOL, match EOW at line end */ + if (endp == lineEndPos && *ap == EOW) { + if ((ap[1] == END || ((ap[1] == EOL && ap[2] == END))) && iswordc(ci.CharAt(lp - 1))) { + lp = endp; + ep = lp; + } else { + return 0; + } + } else { + return 0; + } + } bopat[0] = lp; eopat[0] = ep; @@ -846,11 +857,11 @@ Sci::Position RESearch::PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci ap += BITBLK; break; case BOL: - if (lp != bol) + if (lp != lineStartPos) return NOTFOUND; break; case EOL: - if (lp < endp) + if (lp < lineEndPos) return NOTFOUND; break; case BOT: @@ -860,11 +871,11 @@ Sci::Position RESearch::PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci eopat[static_cast(*ap++)] = lp; break; case BOW: - if ((lp!=bol && iswordc(ci.CharAt(lp-1))) || !iswordc(ci.CharAt(lp))) + if ((lp!=lineStartPos && iswordc(ci.CharAt(lp-1))) || !iswordc(ci.CharAt(lp))) return NOTFOUND; break; case EOW: - if (lp==bol || !iswordc(ci.CharAt(lp-1)) || iswordc(ci.CharAt(lp))) + if (lp==lineStartPos || !iswordc(ci.CharAt(lp-1)) || iswordc(ci.CharAt(lp))) return NOTFOUND; break; case REF: { diff --git a/src/RESearch.h b/src/RESearch.h index b7955dc79..e3a9c8110 100644 --- a/src/RESearch.h +++ b/src/RESearch.h @@ -24,6 +24,10 @@ public: void Clear(); const char *Compile(const char *pattern, Sci::Position length, bool caseSensitive, bool posix); int Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Position endp); + void SetLineRange(Sci::Position startPos, Sci::Position endPos) noexcept { + lineStartPos = startPos; + lineEndPos = endPos; + } static constexpr int MAXTAG = 10; static constexpr int NOTFOUND = -1; @@ -47,7 +51,9 @@ private: Sci::Position PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci::Position endp, const char *ap); - Sci::Position bol; + // positions to match line start and line end + Sci::Position lineStartPos; + Sci::Position lineEndPos; char nfa[MAXNFA]; /* automaton */ int sta; int failure; -- cgit v1.2.3