diff options
| -rw-r--r-- | src/Document.cxx | 49 | ||||
| -rw-r--r-- | src/Document.h | 1 | ||||
| -rw-r--r-- | src/Editor.cxx | 8 | ||||
| -rw-r--r-- | src/PositionCache.cxx | 62 | ||||
| -rw-r--r-- | src/PositionCache.h | 17 | 
5 files changed, 87 insertions, 50 deletions
| diff --git a/src/Document.cxx b/src/Document.cxx index 08bc24ecf..7b718f272 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -713,6 +713,55 @@ bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {  	return false;  } +inline bool IsSpaceOrTab(int ch) { +	return ch == ' ' || ch == '\t'; +} + +// Need to break text into segments near lengthSegment but taking into +// account the encoding to not break inside a UTF-8 or DBCS character +// and also trying to avoid breaking inside a pair of combining characters. +// The segment length must always be long enough (more than 4 bytes) +// so that there will be at least one whole character to make a segment. +// For UTF-8, text must consist only of valid whole characters. +// In preference order from best to worst: +//   1) Break after space +//   2) Break before punctuation +//   3) Break after whole character + +int Document::SafeSegment(const char *text, int length, int lengthSegment) { +	if (length <= lengthSegment) +		return length; +	int lastSpaceBreak = -1; +	int lastPunctuationBreak = -1; +	int lastEncodingAllowedBreak = -1; +	for (int j=0; j < lengthSegment;) { +		unsigned char ch = static_cast<unsigned char>(text[j]); +		if (j > 0) { +			if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) { +				lastSpaceBreak = j; +			} +			if (ch < 'A') { +				lastPunctuationBreak = j; +			} +		} +		lastEncodingAllowedBreak = j; + +		if (dbcsCodePage == SC_CP_UTF8) { +			j += (ch < 0x80) ? 1 : BytesFromLead(ch); +		} else if (dbcsCodePage) { +			j += IsDBCSLeadByte(ch) ? 2 : 1; +		} else { +			j++; +		} +	} +	if (lastSpaceBreak >= 0) { +		return lastSpaceBreak; +	} else if (lastPunctuationBreak >= 0) { +		return lastPunctuationBreak; +	} +	return lastEncodingAllowedBreak; +} +  void Document::ModifiedAt(int pos) {  	if (endStyled > pos)  		endStyled = pos; diff --git a/src/Document.h b/src/Document.h index 274aa0baa..7858db727 100644 --- a/src/Document.h +++ b/src/Document.h @@ -275,6 +275,7 @@ public:  	bool NextCharacter(int &pos, int moveDir);	// Returns true if pos changed  	int SCI_METHOD CodePage() const;  	bool SCI_METHOD IsDBCSLeadByte(char ch) const; +	int SafeSegment(const char *text, int length, int lengthSegment);  	// Gateways to modifying document  	void ModifiedAt(int pos); diff --git a/src/Editor.cxx b/src/Editor.cxx index 03c7b1103..ae2d670ce 100644 --- a/src/Editor.cxx +++ b/src/Editor.cxx @@ -2185,7 +2185,7 @@ void Editor::LayoutLine(int line, Surface *surface, ViewStyle &vstyle, LineLayou  						} else {  							lastSegItalics = vstyle.styles[ll->styles[charInLine]].italic;  							posCache.MeasureWidths(surface, vstyle, ll->styles[charInLine], ll->chars + startseg, -							        lenSeg, ll->positions + startseg + 1); +							        lenSeg, ll->positions + startseg + 1, pdoc);  						}  					}  				} else {    // invisible @@ -2801,7 +2801,7 @@ void Editor::DrawLine(Surface *surface, ViewStyle &vsDraw, int line, int lineVis  	ll->psel = &sel; -	BreakFinder bfBack(ll, lineStart, lineEnd, posLineStart, IsUnicodeMode(), xStartVisible, selBackDrawn); +	BreakFinder bfBack(ll, lineStart, lineEnd, posLineStart, xStartVisible, selBackDrawn, pdoc);  	int next = bfBack.First();  	// Background drawing loop @@ -2891,8 +2891,8 @@ void Editor::DrawLine(Surface *surface, ViewStyle &vsDraw, int line, int lineVis  	inIndentation = subLine == 0;	// Do not handle indentation except on first subline.  	// Foreground drawing loop -	BreakFinder bfFore(ll, lineStart, lineEnd, posLineStart, IsUnicodeMode(), xStartVisible, -		((!twoPhaseDraw && selBackDrawn) || vsDraw.selforeset)); +	BreakFinder bfFore(ll, lineStart, lineEnd, posLineStart, xStartVisible, +		((!twoPhaseDraw && selBackDrawn) || vsDraw.selforeset), pdoc);  	next = bfFore.First();  	while (next < lineEnd) { diff --git a/src/PositionCache.cxx b/src/PositionCache.cxx index 52c4326c0..e59c12630 100644 --- a/src/PositionCache.cxx +++ b/src/PositionCache.cxx @@ -391,18 +391,19 @@ static int NextBadU(const char *s, int p, int len, int &trailBytes) {  	return -1;  } -BreakFinder::BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posLineStart_, bool utf8_, int xStart, bool breakForSelection) : +BreakFinder::BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posLineStart_, +	int xStart, bool breakForSelection, Document *pdoc_) :  	ll(ll_),  	lineStart(lineStart_),  	lineEnd(lineEnd_),  	posLineStart(posLineStart_), -	utf8(utf8_),  	nextBreak(lineStart_),  	saeSize(0),  	saeLen(0),  	saeCurrentPos(0),  	saeNext(0), -	subBreak(-1) { +	subBreak(-1), +	pdoc(pdoc_) {  	saeSize = 8;  	selAndEdge = new int[saeSize];  	for (unsigned int j=0; j < saeSize; j++) { @@ -435,7 +436,7 @@ BreakFinder::BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posL  	Insert(ll->edgeColumn - 1);  	Insert(lineEnd - 1); -	if (utf8) { +	if (pdoc && (SC_CP_UTF8 == pdoc->dbcsCodePage)) {  		int trailBytes=0;  		for (int pos = -1;;) {  			pos = NextBadU(ll->chars, pos, lineEnd, trailBytes); @@ -456,10 +457,6 @@ int BreakFinder::First() const {  	return nextBreak;  } -static bool IsTrailByte(int ch) { -	return (ch >= 0x80) && (ch < (0x80 + 0x40)); -} -  int BreakFinder::Next() {  	if (subBreak == -1) {  		int prev = nextBreak; @@ -490,34 +487,7 @@ int BreakFinder::Next() {  		subBreak = -1;  		return nextBreak;  	} else { -		int lastGoodBreak = -1; -		int lastOKBreak = -1; -		int lastUTF8Break = -1; -		int j; -		for (j = subBreak + 1; j <= nextBreak; j++) { -			if (IsSpaceOrTab(ll->chars[j - 1]) && !IsSpaceOrTab(ll->chars[j])) { -				lastGoodBreak = j; -			} -			if (static_cast<unsigned char>(ll->chars[j]) < 'A') { -				lastOKBreak = j; -			} -			if (utf8 && !IsTrailByte(static_cast<unsigned char>(ll->chars[j]))) { -				lastUTF8Break = j; -			} -			if (((j - subBreak) >= lengthEachSubdivision) && -				((lastGoodBreak >= 0) || (lastOKBreak >= 0) || (lastUTF8Break >= 0))) { -				break; -			} -		} -		if (lastGoodBreak >= 0) { -			subBreak = lastGoodBreak; -		} else if (lastOKBreak >= 0) { -			subBreak = lastOKBreak; -		} else if (lastUTF8Break >= 0) { -			subBreak = lastUTF8Break; -		} else { -			subBreak = nextBreak; -		} +		subBreak += pdoc->SafeSegment(ll->chars + subBreak, nextBreak-subBreak, lengthEachSubdivision);  		if (subBreak >= nextBreak) {  			subBreak = -1;  			return nextBreak; @@ -624,7 +594,8 @@ void PositionCache::SetSize(size_t size_) {  }  void PositionCache::MeasureWidths(Surface *surface, ViewStyle &vstyle, unsigned int styleNumber, -	const char *s, unsigned int len, int *positions) { +	const char *s, unsigned int len, int *positions, Document *pdoc) { +  	allClear = false;  	int probe = -1;  	if ((size > 0) && (len < 30)) { @@ -646,7 +617,22 @@ void PositionCache::MeasureWidths(Surface *surface, ViewStyle &vstyle, unsigned  			probe = probe2;  		}  	} -	surface->MeasureWidths(vstyle.styles[styleNumber].font, s, len, positions); +	if (len > BreakFinder::lengthStartSubdivision) { +		// Break up into segments +		unsigned int startSegment = 0; +		int xStartSegment = 0; +		while (startSegment < len) { +			unsigned int lenSegment = pdoc->SafeSegment(s + startSegment, len - startSegment, BreakFinder::lengthEachSubdivision); +			surface->MeasureWidths(vstyle.styles[styleNumber].font, s + startSegment, lenSegment, positions + startSegment); +			for (unsigned int inSeg = 0; inSeg < lenSegment; inSeg++) { +				positions[startSegment + inSeg] += xStartSegment; +			} +			xStartSegment = positions[startSegment + lenSegment - 1]; +			startSegment += lenSegment; +		} +	} else { +		surface->MeasureWidths(vstyle.styles[styleNumber].font, s, len, positions); +	}  	if (probe >= 0) {  		clock++;  		if (clock > 60000) { diff --git a/src/PositionCache.h b/src/PositionCache.h index a76da574c..8bd4f1b43 100644 --- a/src/PositionCache.h +++ b/src/PositionCache.h @@ -117,16 +117,10 @@ public:  // Class to break a line of text into shorter runs at sensible places.  class BreakFinder { -	// If a whole run is longer than lengthStartSubdivision then subdivide -	// into smaller runs at spaces or punctuation. -	enum { lengthStartSubdivision = 300 }; -	// Try to make each subdivided run lengthEachSubdivision or shorter. -	enum { lengthEachSubdivision = 100 };  	LineLayout *ll;  	int lineStart;  	int lineEnd;  	int posLineStart; -	bool utf8;  	int nextBreak;  	int *selAndEdge;  	unsigned int saeSize; @@ -134,9 +128,16 @@ class BreakFinder {  	unsigned int saeCurrentPos;  	int saeNext;  	int subBreak; +	Document *pdoc;  	void Insert(int val);  public: -	BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posLineStart_, bool utf8_, int xStart, bool breakForSelection); +	// If a whole run is longer than lengthStartSubdivision then subdivide +	// into smaller runs at spaces or punctuation. +	enum { lengthStartSubdivision = 300 }; +	// Try to make each subdivided run lengthEachSubdivision or shorter. +	enum { lengthEachSubdivision = 100 }; +	BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posLineStart_,  +		int xStart, bool breakForSelection, Document *pdoc_);  	~BreakFinder();  	int First() const;  	int Next(); @@ -154,7 +155,7 @@ public:  	void SetSize(size_t size_);  	int GetSize() const { return size; }  	void MeasureWidths(Surface *surface, ViewStyle &vstyle, unsigned int styleNumber, -		const char *s, unsigned int len, int *positions); +		const char *s, unsigned int len, int *positions, Document *pdoc);  };  inline bool IsSpaceOrTab(int ch) { | 
