aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authornyamatongwe <devnull@localhost>2011-05-13 10:27:30 +1000
committernyamatongwe <devnull@localhost>2011-05-13 10:27:30 +1000
commit15bfd9c723b0753dcf2b6e85f6b9be351af00378 (patch)
tree14707aa7cea073ecb62849d09697a02ca38e9375
parent3166c901c7d071f2083a1a29ca710171fd7cf699 (diff)
downloadscintilla-mirror-15bfd9c723b0753dcf2b6e85f6b9be351af00378.tar.gz
Break measurement of text into reasonable sized segments similar
to drawing. Drawing will now always be broken up at a character boundary even when there is a large number of alphabetic characters. Fixes bug #3165743.
-rw-r--r--src/Document.cxx49
-rw-r--r--src/Document.h1
-rw-r--r--src/Editor.cxx8
-rw-r--r--src/PositionCache.cxx62
-rw-r--r--src/PositionCache.h17
5 files changed, 87 insertions, 50 deletions
diff --git a/src/Document.cxx b/src/Document.cxx
index 08bc24ecf..7b718f272 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -713,6 +713,55 @@ bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
return false;
}
+inline bool IsSpaceOrTab(int ch) {
+ return ch == ' ' || ch == '\t';
+}
+
+// Need to break text into segments near lengthSegment but taking into
+// account the encoding to not break inside a UTF-8 or DBCS character
+// and also trying to avoid breaking inside a pair of combining characters.
+// The segment length must always be long enough (more than 4 bytes)
+// so that there will be at least one whole character to make a segment.
+// For UTF-8, text must consist only of valid whole characters.
+// In preference order from best to worst:
+// 1) Break after space
+// 2) Break before punctuation
+// 3) Break after whole character
+
+int Document::SafeSegment(const char *text, int length, int lengthSegment) {
+ if (length <= lengthSegment)
+ return length;
+ int lastSpaceBreak = -1;
+ int lastPunctuationBreak = -1;
+ int lastEncodingAllowedBreak = -1;
+ for (int j=0; j < lengthSegment;) {
+ unsigned char ch = static_cast<unsigned char>(text[j]);
+ if (j > 0) {
+ if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) {
+ lastSpaceBreak = j;
+ }
+ if (ch < 'A') {
+ lastPunctuationBreak = j;
+ }
+ }
+ lastEncodingAllowedBreak = j;
+
+ if (dbcsCodePage == SC_CP_UTF8) {
+ j += (ch < 0x80) ? 1 : BytesFromLead(ch);
+ } else if (dbcsCodePage) {
+ j += IsDBCSLeadByte(ch) ? 2 : 1;
+ } else {
+ j++;
+ }
+ }
+ if (lastSpaceBreak >= 0) {
+ return lastSpaceBreak;
+ } else if (lastPunctuationBreak >= 0) {
+ return lastPunctuationBreak;
+ }
+ return lastEncodingAllowedBreak;
+}
+
void Document::ModifiedAt(int pos) {
if (endStyled > pos)
endStyled = pos;
diff --git a/src/Document.h b/src/Document.h
index 274aa0baa..7858db727 100644
--- a/src/Document.h
+++ b/src/Document.h
@@ -275,6 +275,7 @@ public:
bool NextCharacter(int &pos, int moveDir); // Returns true if pos changed
int SCI_METHOD CodePage() const;
bool SCI_METHOD IsDBCSLeadByte(char ch) const;
+ int SafeSegment(const char *text, int length, int lengthSegment);
// Gateways to modifying document
void ModifiedAt(int pos);
diff --git a/src/Editor.cxx b/src/Editor.cxx
index 03c7b1103..ae2d670ce 100644
--- a/src/Editor.cxx
+++ b/src/Editor.cxx
@@ -2185,7 +2185,7 @@ void Editor::LayoutLine(int line, Surface *surface, ViewStyle &vstyle, LineLayou
} else {
lastSegItalics = vstyle.styles[ll->styles[charInLine]].italic;
posCache.MeasureWidths(surface, vstyle, ll->styles[charInLine], ll->chars + startseg,
- lenSeg, ll->positions + startseg + 1);
+ lenSeg, ll->positions + startseg + 1, pdoc);
}
}
} else { // invisible
@@ -2801,7 +2801,7 @@ void Editor::DrawLine(Surface *surface, ViewStyle &vsDraw, int line, int lineVis
ll->psel = &sel;
- BreakFinder bfBack(ll, lineStart, lineEnd, posLineStart, IsUnicodeMode(), xStartVisible, selBackDrawn);
+ BreakFinder bfBack(ll, lineStart, lineEnd, posLineStart, xStartVisible, selBackDrawn, pdoc);
int next = bfBack.First();
// Background drawing loop
@@ -2891,8 +2891,8 @@ void Editor::DrawLine(Surface *surface, ViewStyle &vsDraw, int line, int lineVis
inIndentation = subLine == 0; // Do not handle indentation except on first subline.
// Foreground drawing loop
- BreakFinder bfFore(ll, lineStart, lineEnd, posLineStart, IsUnicodeMode(), xStartVisible,
- ((!twoPhaseDraw && selBackDrawn) || vsDraw.selforeset));
+ BreakFinder bfFore(ll, lineStart, lineEnd, posLineStart, xStartVisible,
+ ((!twoPhaseDraw && selBackDrawn) || vsDraw.selforeset), pdoc);
next = bfFore.First();
while (next < lineEnd) {
diff --git a/src/PositionCache.cxx b/src/PositionCache.cxx
index 52c4326c0..e59c12630 100644
--- a/src/PositionCache.cxx
+++ b/src/PositionCache.cxx
@@ -391,18 +391,19 @@ static int NextBadU(const char *s, int p, int len, int &trailBytes) {
return -1;
}
-BreakFinder::BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posLineStart_, bool utf8_, int xStart, bool breakForSelection) :
+BreakFinder::BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posLineStart_,
+ int xStart, bool breakForSelection, Document *pdoc_) :
ll(ll_),
lineStart(lineStart_),
lineEnd(lineEnd_),
posLineStart(posLineStart_),
- utf8(utf8_),
nextBreak(lineStart_),
saeSize(0),
saeLen(0),
saeCurrentPos(0),
saeNext(0),
- subBreak(-1) {
+ subBreak(-1),
+ pdoc(pdoc_) {
saeSize = 8;
selAndEdge = new int[saeSize];
for (unsigned int j=0; j < saeSize; j++) {
@@ -435,7 +436,7 @@ BreakFinder::BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posL
Insert(ll->edgeColumn - 1);
Insert(lineEnd - 1);
- if (utf8) {
+ if (pdoc && (SC_CP_UTF8 == pdoc->dbcsCodePage)) {
int trailBytes=0;
for (int pos = -1;;) {
pos = NextBadU(ll->chars, pos, lineEnd, trailBytes);
@@ -456,10 +457,6 @@ int BreakFinder::First() const {
return nextBreak;
}
-static bool IsTrailByte(int ch) {
- return (ch >= 0x80) && (ch < (0x80 + 0x40));
-}
-
int BreakFinder::Next() {
if (subBreak == -1) {
int prev = nextBreak;
@@ -490,34 +487,7 @@ int BreakFinder::Next() {
subBreak = -1;
return nextBreak;
} else {
- int lastGoodBreak = -1;
- int lastOKBreak = -1;
- int lastUTF8Break = -1;
- int j;
- for (j = subBreak + 1; j <= nextBreak; j++) {
- if (IsSpaceOrTab(ll->chars[j - 1]) && !IsSpaceOrTab(ll->chars[j])) {
- lastGoodBreak = j;
- }
- if (static_cast<unsigned char>(ll->chars[j]) < 'A') {
- lastOKBreak = j;
- }
- if (utf8 && !IsTrailByte(static_cast<unsigned char>(ll->chars[j]))) {
- lastUTF8Break = j;
- }
- if (((j - subBreak) >= lengthEachSubdivision) &&
- ((lastGoodBreak >= 0) || (lastOKBreak >= 0) || (lastUTF8Break >= 0))) {
- break;
- }
- }
- if (lastGoodBreak >= 0) {
- subBreak = lastGoodBreak;
- } else if (lastOKBreak >= 0) {
- subBreak = lastOKBreak;
- } else if (lastUTF8Break >= 0) {
- subBreak = lastUTF8Break;
- } else {
- subBreak = nextBreak;
- }
+ subBreak += pdoc->SafeSegment(ll->chars + subBreak, nextBreak-subBreak, lengthEachSubdivision);
if (subBreak >= nextBreak) {
subBreak = -1;
return nextBreak;
@@ -624,7 +594,8 @@ void PositionCache::SetSize(size_t size_) {
}
void PositionCache::MeasureWidths(Surface *surface, ViewStyle &vstyle, unsigned int styleNumber,
- const char *s, unsigned int len, int *positions) {
+ const char *s, unsigned int len, int *positions, Document *pdoc) {
+
allClear = false;
int probe = -1;
if ((size > 0) && (len < 30)) {
@@ -646,7 +617,22 @@ void PositionCache::MeasureWidths(Surface *surface, ViewStyle &vstyle, unsigned
probe = probe2;
}
}
- surface->MeasureWidths(vstyle.styles[styleNumber].font, s, len, positions);
+ if (len > BreakFinder::lengthStartSubdivision) {
+ // Break up into segments
+ unsigned int startSegment = 0;
+ int xStartSegment = 0;
+ while (startSegment < len) {
+ unsigned int lenSegment = pdoc->SafeSegment(s + startSegment, len - startSegment, BreakFinder::lengthEachSubdivision);
+ surface->MeasureWidths(vstyle.styles[styleNumber].font, s + startSegment, lenSegment, positions + startSegment);
+ for (unsigned int inSeg = 0; inSeg < lenSegment; inSeg++) {
+ positions[startSegment + inSeg] += xStartSegment;
+ }
+ xStartSegment = positions[startSegment + lenSegment - 1];
+ startSegment += lenSegment;
+ }
+ } else {
+ surface->MeasureWidths(vstyle.styles[styleNumber].font, s, len, positions);
+ }
if (probe >= 0) {
clock++;
if (clock > 60000) {
diff --git a/src/PositionCache.h b/src/PositionCache.h
index a76da574c..8bd4f1b43 100644
--- a/src/PositionCache.h
+++ b/src/PositionCache.h
@@ -117,16 +117,10 @@ public:
// Class to break a line of text into shorter runs at sensible places.
class BreakFinder {
- // If a whole run is longer than lengthStartSubdivision then subdivide
- // into smaller runs at spaces or punctuation.
- enum { lengthStartSubdivision = 300 };
- // Try to make each subdivided run lengthEachSubdivision or shorter.
- enum { lengthEachSubdivision = 100 };
LineLayout *ll;
int lineStart;
int lineEnd;
int posLineStart;
- bool utf8;
int nextBreak;
int *selAndEdge;
unsigned int saeSize;
@@ -134,9 +128,16 @@ class BreakFinder {
unsigned int saeCurrentPos;
int saeNext;
int subBreak;
+ Document *pdoc;
void Insert(int val);
public:
- BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posLineStart_, bool utf8_, int xStart, bool breakForSelection);
+ // If a whole run is longer than lengthStartSubdivision then subdivide
+ // into smaller runs at spaces or punctuation.
+ enum { lengthStartSubdivision = 300 };
+ // Try to make each subdivided run lengthEachSubdivision or shorter.
+ enum { lengthEachSubdivision = 100 };
+ BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posLineStart_,
+ int xStart, bool breakForSelection, Document *pdoc_);
~BreakFinder();
int First() const;
int Next();
@@ -154,7 +155,7 @@ public:
void SetSize(size_t size_);
int GetSize() const { return size; }
void MeasureWidths(Surface *surface, ViewStyle &vstyle, unsigned int styleNumber,
- const char *s, unsigned int len, int *positions);
+ const char *s, unsigned int len, int *positions, Document *pdoc);
};
inline bool IsSpaceOrTab(int ch) {