diff options
-rw-r--r-- | src/Document.cxx | 49 | ||||
-rw-r--r-- | src/Document.h | 1 | ||||
-rw-r--r-- | src/Editor.cxx | 8 | ||||
-rw-r--r-- | src/PositionCache.cxx | 62 | ||||
-rw-r--r-- | src/PositionCache.h | 17 |
5 files changed, 87 insertions, 50 deletions
diff --git a/src/Document.cxx b/src/Document.cxx index 08bc24ecf..7b718f272 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -713,6 +713,55 @@ bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const { return false; } +inline bool IsSpaceOrTab(int ch) { + return ch == ' ' || ch == '\t'; +} + +// Need to break text into segments near lengthSegment but taking into +// account the encoding to not break inside a UTF-8 or DBCS character +// and also trying to avoid breaking inside a pair of combining characters. +// The segment length must always be long enough (more than 4 bytes) +// so that there will be at least one whole character to make a segment. +// For UTF-8, text must consist only of valid whole characters. +// In preference order from best to worst: +// 1) Break after space +// 2) Break before punctuation +// 3) Break after whole character + +int Document::SafeSegment(const char *text, int length, int lengthSegment) { + if (length <= lengthSegment) + return length; + int lastSpaceBreak = -1; + int lastPunctuationBreak = -1; + int lastEncodingAllowedBreak = -1; + for (int j=0; j < lengthSegment;) { + unsigned char ch = static_cast<unsigned char>(text[j]); + if (j > 0) { + if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) { + lastSpaceBreak = j; + } + if (ch < 'A') { + lastPunctuationBreak = j; + } + } + lastEncodingAllowedBreak = j; + + if (dbcsCodePage == SC_CP_UTF8) { + j += (ch < 0x80) ? 1 : BytesFromLead(ch); + } else if (dbcsCodePage) { + j += IsDBCSLeadByte(ch) ? 2 : 1; + } else { + j++; + } + } + if (lastSpaceBreak >= 0) { + return lastSpaceBreak; + } else if (lastPunctuationBreak >= 0) { + return lastPunctuationBreak; + } + return lastEncodingAllowedBreak; +} + void Document::ModifiedAt(int pos) { if (endStyled > pos) endStyled = pos; diff --git a/src/Document.h b/src/Document.h index 274aa0baa..7858db727 100644 --- a/src/Document.h +++ b/src/Document.h @@ -275,6 +275,7 @@ public: bool NextCharacter(int &pos, int moveDir); // Returns true if pos changed int SCI_METHOD CodePage() const; bool SCI_METHOD IsDBCSLeadByte(char ch) const; + int SafeSegment(const char *text, int length, int lengthSegment); // Gateways to modifying document void ModifiedAt(int pos); diff --git a/src/Editor.cxx b/src/Editor.cxx index 03c7b1103..ae2d670ce 100644 --- a/src/Editor.cxx +++ b/src/Editor.cxx @@ -2185,7 +2185,7 @@ void Editor::LayoutLine(int line, Surface *surface, ViewStyle &vstyle, LineLayou } else { lastSegItalics = vstyle.styles[ll->styles[charInLine]].italic; posCache.MeasureWidths(surface, vstyle, ll->styles[charInLine], ll->chars + startseg, - lenSeg, ll->positions + startseg + 1); + lenSeg, ll->positions + startseg + 1, pdoc); } } } else { // invisible @@ -2801,7 +2801,7 @@ void Editor::DrawLine(Surface *surface, ViewStyle &vsDraw, int line, int lineVis ll->psel = &sel; - BreakFinder bfBack(ll, lineStart, lineEnd, posLineStart, IsUnicodeMode(), xStartVisible, selBackDrawn); + BreakFinder bfBack(ll, lineStart, lineEnd, posLineStart, xStartVisible, selBackDrawn, pdoc); int next = bfBack.First(); // Background drawing loop @@ -2891,8 +2891,8 @@ void Editor::DrawLine(Surface *surface, ViewStyle &vsDraw, int line, int lineVis inIndentation = subLine == 0; // Do not handle indentation except on first subline. // Foreground drawing loop - BreakFinder bfFore(ll, lineStart, lineEnd, posLineStart, IsUnicodeMode(), xStartVisible, - ((!twoPhaseDraw && selBackDrawn) || vsDraw.selforeset)); + BreakFinder bfFore(ll, lineStart, lineEnd, posLineStart, xStartVisible, + ((!twoPhaseDraw && selBackDrawn) || vsDraw.selforeset), pdoc); next = bfFore.First(); while (next < lineEnd) { diff --git a/src/PositionCache.cxx b/src/PositionCache.cxx index 52c4326c0..e59c12630 100644 --- a/src/PositionCache.cxx +++ b/src/PositionCache.cxx @@ -391,18 +391,19 @@ static int NextBadU(const char *s, int p, int len, int &trailBytes) { return -1; } -BreakFinder::BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posLineStart_, bool utf8_, int xStart, bool breakForSelection) : +BreakFinder::BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posLineStart_, + int xStart, bool breakForSelection, Document *pdoc_) : ll(ll_), lineStart(lineStart_), lineEnd(lineEnd_), posLineStart(posLineStart_), - utf8(utf8_), nextBreak(lineStart_), saeSize(0), saeLen(0), saeCurrentPos(0), saeNext(0), - subBreak(-1) { + subBreak(-1), + pdoc(pdoc_) { saeSize = 8; selAndEdge = new int[saeSize]; for (unsigned int j=0; j < saeSize; j++) { @@ -435,7 +436,7 @@ BreakFinder::BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posL Insert(ll->edgeColumn - 1); Insert(lineEnd - 1); - if (utf8) { + if (pdoc && (SC_CP_UTF8 == pdoc->dbcsCodePage)) { int trailBytes=0; for (int pos = -1;;) { pos = NextBadU(ll->chars, pos, lineEnd, trailBytes); @@ -456,10 +457,6 @@ int BreakFinder::First() const { return nextBreak; } -static bool IsTrailByte(int ch) { - return (ch >= 0x80) && (ch < (0x80 + 0x40)); -} - int BreakFinder::Next() { if (subBreak == -1) { int prev = nextBreak; @@ -490,34 +487,7 @@ int BreakFinder::Next() { subBreak = -1; return nextBreak; } else { - int lastGoodBreak = -1; - int lastOKBreak = -1; - int lastUTF8Break = -1; - int j; - for (j = subBreak + 1; j <= nextBreak; j++) { - if (IsSpaceOrTab(ll->chars[j - 1]) && !IsSpaceOrTab(ll->chars[j])) { - lastGoodBreak = j; - } - if (static_cast<unsigned char>(ll->chars[j]) < 'A') { - lastOKBreak = j; - } - if (utf8 && !IsTrailByte(static_cast<unsigned char>(ll->chars[j]))) { - lastUTF8Break = j; - } - if (((j - subBreak) >= lengthEachSubdivision) && - ((lastGoodBreak >= 0) || (lastOKBreak >= 0) || (lastUTF8Break >= 0))) { - break; - } - } - if (lastGoodBreak >= 0) { - subBreak = lastGoodBreak; - } else if (lastOKBreak >= 0) { - subBreak = lastOKBreak; - } else if (lastUTF8Break >= 0) { - subBreak = lastUTF8Break; - } else { - subBreak = nextBreak; - } + subBreak += pdoc->SafeSegment(ll->chars + subBreak, nextBreak-subBreak, lengthEachSubdivision); if (subBreak >= nextBreak) { subBreak = -1; return nextBreak; @@ -624,7 +594,8 @@ void PositionCache::SetSize(size_t size_) { } void PositionCache::MeasureWidths(Surface *surface, ViewStyle &vstyle, unsigned int styleNumber, - const char *s, unsigned int len, int *positions) { + const char *s, unsigned int len, int *positions, Document *pdoc) { + allClear = false; int probe = -1; if ((size > 0) && (len < 30)) { @@ -646,7 +617,22 @@ void PositionCache::MeasureWidths(Surface *surface, ViewStyle &vstyle, unsigned probe = probe2; } } - surface->MeasureWidths(vstyle.styles[styleNumber].font, s, len, positions); + if (len > BreakFinder::lengthStartSubdivision) { + // Break up into segments + unsigned int startSegment = 0; + int xStartSegment = 0; + while (startSegment < len) { + unsigned int lenSegment = pdoc->SafeSegment(s + startSegment, len - startSegment, BreakFinder::lengthEachSubdivision); + surface->MeasureWidths(vstyle.styles[styleNumber].font, s + startSegment, lenSegment, positions + startSegment); + for (unsigned int inSeg = 0; inSeg < lenSegment; inSeg++) { + positions[startSegment + inSeg] += xStartSegment; + } + xStartSegment = positions[startSegment + lenSegment - 1]; + startSegment += lenSegment; + } + } else { + surface->MeasureWidths(vstyle.styles[styleNumber].font, s, len, positions); + } if (probe >= 0) { clock++; if (clock > 60000) { diff --git a/src/PositionCache.h b/src/PositionCache.h index a76da574c..8bd4f1b43 100644 --- a/src/PositionCache.h +++ b/src/PositionCache.h @@ -117,16 +117,10 @@ public: // Class to break a line of text into shorter runs at sensible places. class BreakFinder { - // If a whole run is longer than lengthStartSubdivision then subdivide - // into smaller runs at spaces or punctuation. - enum { lengthStartSubdivision = 300 }; - // Try to make each subdivided run lengthEachSubdivision or shorter. - enum { lengthEachSubdivision = 100 }; LineLayout *ll; int lineStart; int lineEnd; int posLineStart; - bool utf8; int nextBreak; int *selAndEdge; unsigned int saeSize; @@ -134,9 +128,16 @@ class BreakFinder { unsigned int saeCurrentPos; int saeNext; int subBreak; + Document *pdoc; void Insert(int val); public: - BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posLineStart_, bool utf8_, int xStart, bool breakForSelection); + // If a whole run is longer than lengthStartSubdivision then subdivide + // into smaller runs at spaces or punctuation. + enum { lengthStartSubdivision = 300 }; + // Try to make each subdivided run lengthEachSubdivision or shorter. + enum { lengthEachSubdivision = 100 }; + BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posLineStart_, + int xStart, bool breakForSelection, Document *pdoc_); ~BreakFinder(); int First() const; int Next(); @@ -154,7 +155,7 @@ public: void SetSize(size_t size_); int GetSize() const { return size; } void MeasureWidths(Surface *surface, ViewStyle &vstyle, unsigned int styleNumber, - const char *s, unsigned int len, int *positions); + const char *s, unsigned int len, int *positions, Document *pdoc); }; inline bool IsSpaceOrTab(int ch) { |