From 8e55cc0c973cc2fbaac8cca1505524b86ce58dff Mon Sep 17 00:00:00 2001 From: Zufu Liu Date: Thu, 21 Dec 2023 16:00:00 +1100 Subject: Bug [#2405]. Avoid character fragments in regular expression search results. --- src/RESearch.cxx | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'src/RESearch.cxx') diff --git a/src/RESearch.cxx b/src/RESearch.cxx index 7b2701aba..5a509ab6e 100644 --- a/src/RESearch.cxx +++ b/src/RESearch.cxx @@ -769,8 +769,15 @@ int RESearch::Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Positio default: /* regular matching all the way. */ while (lp < endp) { ep = PMatch(ci, lp, endp, ap); - if (ep != NOTFOUND) - break; + if (ep != NOTFOUND) { + // fix match started from middle of character like DBCS trailing ASCII byte + const Sci::Position pos = ci.MovePositionOutsideChar(lp, -1); + if (pos != lp) { + ep = NOTFOUND; + } else { + break; + } + } lp++; } break; @@ -791,6 +798,7 @@ int RESearch::Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Positio } } + ep = ci.MovePositionOutsideChar(ep, 1); bopat[0] = lp; eopat[0] = ep; return 1; @@ -865,9 +873,13 @@ Sci::Position RESearch::PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci return NOTFOUND; break; case BOT: + if (lp != ci.MovePositionOutsideChar(lp, -1)) { + return NOTFOUND; + } bopat[static_cast(*ap++)] = lp; break; case EOT: + lp = ci.MovePositionOutsideChar(lp, 1); eopat[static_cast(*ap++)] = lp; break; case BOW: -- cgit v1.2.3