From 9975609bf3b39f0e1cd121995ac49aea30a6c48f Mon Sep 17 00:00:00 2001 From: Zufu Liu Date: Thu, 21 Oct 2021 22:15:57 +1100 Subject: Feature [feature-requests:#1417] Use backward iteration to find space / control character and text / punctuation boundaries in SafeSegment as will be simpler and faster in almost all cases. Simplify BreakFinder::Next calling SafeSegment. --- src/PositionCache.cxx | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'src/PositionCache.cxx') diff --git a/src/PositionCache.cxx b/src/PositionCache.cxx index 6370edb33..c9f4e8793 100644 --- a/src/PositionCache.cxx +++ b/src/PositionCache.cxx @@ -755,21 +755,20 @@ TextSegment BreakFinder::Next() { } subBreak = prev; } + // Splitting up a long run from prev to nextBreak in lots of approximately lengthEachSubdivision. - // For very long runs add extra breaks after spaces or if no spaces before low punctuation. const int startSegment = subBreak; - if ((nextBreak - subBreak) <= lengthEachSubdivision) { - subBreak = -1; - return TextSegment(startSegment, nextBreak - startSegment); + const int remaining = nextBreak - startSegment; + int lengthSegment = remaining; + if (lengthSegment > lengthEachSubdivision) { + lengthSegment = static_cast(pdoc->SafeSegment(std::string_view(&ll->chars[startSegment], lengthEachSubdivision))); + } + if (lengthSegment < remaining) { + subBreak += lengthSegment; } else { - subBreak += pdoc->SafeSegment(&ll->chars[subBreak], lengthEachSubdivision); - if (subBreak >= nextBreak) { - subBreak = -1; - return TextSegment(startSegment, nextBreak - startSegment); - } else { - return TextSegment(startSegment, subBreak - startSegment); - } + subBreak = -1; } + return TextSegment(startSegment, lengthSegment); } bool BreakFinder::More() const noexcept { -- cgit v1.2.3