diff options
| -rw-r--r-- | src/Document.cxx | 60 | ||||
| -rw-r--r-- | src/Document.h | 1 | ||||
| -rw-r--r-- | src/Editor.cxx | 118 | ||||
| -rw-r--r-- | src/PositionCache.cxx | 47 | ||||
| -rw-r--r-- | src/PositionCache.h | 7 | 
5 files changed, 201 insertions, 32 deletions
| diff --git a/src/Document.cxx b/src/Document.cxx index e2ca7a32a..ff8d0fbcf 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -287,6 +287,55 @@ int Document::LenChar(int pos) {  	}  } +static bool IsTrailByte(int ch) { +	return (ch >= 0x80) && (ch < (0x80 + 0x40)); +} + +static int BytesFromLead(int leadByte) { +	if (leadByte > 0xF4) { +		// Characters longer than 4 bytes not possible in current UTF-8 +		return 0; +	} else if (leadByte >= 0xF0) { +		return 4; +	} else if (leadByte >= 0xE0) { +		return 3; +	} else if (leadByte >= 0xC2) { +		return 2; +	} +	return 0; +} + +bool Document::InGoodUTF8(int pos, int &start, int &end) { +	int lead = pos; +	while ((lead>0) && (pos-lead < 4) && IsTrailByte(static_cast<unsigned char>(cb.CharAt(lead-1)))) +		lead--; +	start = 0; +	if (lead > 0) { +		start = lead-1; +	} +	int leadByte = static_cast<unsigned char>(cb.CharAt(start)); +	int bytes = BytesFromLead(leadByte); +	if (bytes == 0) { +		return false; +	} else { +		int trailBytes = bytes - 1; +		int len = pos - lead + 1; +		if (len > trailBytes) +			// pos too far from lead +			return false; +		// Check that there are enough trails for this lead +		int trail = pos + 1; +		while ((trail-lead<trailBytes) && (trail < Length())) { +			if (!IsTrailByte(static_cast<unsigned char>(cb.CharAt(trail)))) { +				return false; +			} +			trail++; +		} +		end = start + bytes; +		return true; +	} +} +  // Normalise a position so that it is not halfway through a two byte character.  // This can occur in two situations -  // When lines are terminated with \r\n pairs which should be treated as one character. @@ -313,13 +362,14 @@ int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) {  	if (dbcsCodePage) {  		if (SC_CP_UTF8 == dbcsCodePage) {  			unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos)); -			while ((pos > 0) && (pos < Length()) && (ch >= 0x80) && (ch < (0x80 + 0x40))) { -				// ch is a trail byte +			int startUTF = pos; +			int endUTF = pos; +			if (IsTrailByte(ch) && InGoodUTF8(pos, startUTF, endUTF)) { +				// ch is a trail byte within a UTF-8 character  				if (moveDir > 0) -					pos++; +					pos = endUTF;  				else -					pos--; -				ch = static_cast<unsigned char>(cb.CharAt(pos)); +					pos = startUTF;  			}  		} else {  			// Anchor DBCS calculations at start of line because start of line can diff --git a/src/Document.h b/src/Document.h index 9143ec6e4..a36c4aafe 100644 --- a/src/Document.h +++ b/src/Document.h @@ -138,6 +138,7 @@ public:  	int ClampPositionIntoDocument(int pos);  	bool IsCrLf(int pos);  	int LenChar(int pos); +	bool InGoodUTF8(int pos, int &start, int &end);  	int MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd=true);  	// Gateways to modifying document diff --git a/src/Editor.cxx b/src/Editor.cxx index 372ba0809..a1a48a08b 100644 --- a/src/Editor.cxx +++ b/src/Editor.cxx @@ -1684,6 +1684,61 @@ LineLayout *Editor::RetrieveLineLayout(int lineNumber) {  	        LinesOnScreen() + 1, pdoc->LinesTotal());  } +static bool GoodTrailByte(int v) { +	return (v >= 0x80) && (v < 0xc0); +} + +bool BadUTF(const char *s, int len, int &trailBytes) { +	if (trailBytes) { +		trailBytes--; +		return false; +	} +	const unsigned char *us = reinterpret_cast<const unsigned char *>(s); +	if (*us < 0x80) { +		// Single bytes easy +		return false; +	} else if (*us > 0xF4) { +		// Characters longer than 4 bytes not possible in current UTF-8 +		return true; +	} else if (*us >= 0xF0) { +		// 4 bytes +		if (len < 4) +			return true; +		if (GoodTrailByte(us[1]) && GoodTrailByte(us[2]) && GoodTrailByte(us[3])) { +			trailBytes = 3; +			return false; +		} else { +			return true; +		} +	} else if (*us >= 0xE0) { +		// 3 bytes +		if (len < 3) +			return true; +		if (GoodTrailByte(us[1]) && GoodTrailByte(us[2])) { +			trailBytes = 2; +			return false; +		} else { +			return true; +		} +	} else if (*us >= 0xC2) { +		// 2 bytes +		if (len < 2) +			return true; +		if (GoodTrailByte(us[1])) { +			trailBytes = 1; +			return false; +		} else { +			return true; +		} +	} else if (*us >= 0xC0) { +		// Overlong encoding +		return true; +	} else { +		// Trail byte +		return true; +	} +} +  /**   * Fill in the LineLayout data for the given line.   * Copy the given @a line and its styles from the document into local arrays. @@ -1795,11 +1850,15 @@ void Editor::LayoutLine(int line, Surface *surface, ViewStyle &vstyle, LineLayou  		int ctrlCharWidth[32] = {0};  		bool isControlNext = IsControlCharacter(ll->chars[0]); +		int trailBytes = 0; +		bool isBadUTFNext = IsUnicodeMode() && BadUTF(ll->chars, numCharsInLine, trailBytes);  		for (int charInLine = 0; charInLine < numCharsInLine; charInLine++) {  			bool isControl = isControlNext;  			isControlNext = IsControlCharacter(ll->chars[charInLine + 1]); +			bool isBadUTF = isBadUTFNext; +			isBadUTFNext = IsUnicodeMode() && BadUTF(ll->chars + charInLine + 1, numCharsInLine - charInLine - 1, trailBytes);  			if ((ll->styles[charInLine] != ll->styles[charInLine + 1]) || -			        isControl || isControlNext) { +			        isControl || isControlNext || isBadUTF || isBadUTFNext) {  				ll->positions[startseg] = 0;  				if (vstyle.styles[ll->styles[charInLine]].visible) {  					if (isControl) { @@ -1820,6 +1879,11 @@ void Editor::LayoutLine(int line, Surface *surface, ViewStyle &vstyle, LineLayou  							        ll->positions + startseg + 1);  						}  						lastSegItalics = false; +					} else if (isBadUTF) { +						char hexits[3]; +						sprintf(hexits, "%2X", ll->chars[charInLine] & 0xff); +						ll->positions[charInLine + 1] = +						    surface->WidthText(ctrlCharsFont, hexits, istrlen(hexits)) + 3;  					} else {	// Regular character  						int lenSeg = charInLine - startseg + 1;  						if ((lenSeg == 1) && (' ' == ll->chars[startseg])) { @@ -2133,6 +2197,30 @@ void Editor::DrawIndicators(Surface *surface, ViewStyle &vsDraw, int line, int x  	}  } +void DrawTextBlob(Surface *surface, ViewStyle &vsDraw, PRectangle rcSegment, +				  const char *s, ColourAllocated textBack, ColourAllocated textFore, bool twoPhaseDraw) { +	if (!twoPhaseDraw) { +		surface->FillRectangle(rcSegment, textBack); +	} +	Font &ctrlCharsFont = vsDraw.styles[STYLE_CONTROLCHAR].font; +	int normalCharHeight = surface->Ascent(ctrlCharsFont) - +	        surface->InternalLeading(ctrlCharsFont); +	PRectangle rcCChar = rcSegment; +	rcCChar.left = rcCChar.left + 1; +	rcCChar.top = rcSegment.top + vsDraw.maxAscent - normalCharHeight; +	rcCChar.bottom = rcSegment.top + vsDraw.maxAscent + 1; +	PRectangle rcCentral = rcCChar; +	rcCentral.top++; +	rcCentral.bottom--; +	surface->FillRectangle(rcCentral, textFore); +	PRectangle rcChar = rcCChar; +	rcChar.left++; +	rcChar.right--; +	surface->DrawTextClipped(rcChar, ctrlCharsFont, +	        rcSegment.top + vsDraw.maxAscent, s, istrlen(s), +	        textBack, textFore); +} +  void Editor::DrawLine(Surface *surface, ViewStyle &vsDraw, int line, int lineVisible, int xStart,          PRectangle rcLine, LineLayout *ll, int subLine) { @@ -2251,7 +2339,7 @@ void Editor::DrawLine(Surface *surface, ViewStyle &vsDraw, int line, int lineVis  	// Does not take margin into account but not significant  	int xStartVisible = subLineStart - xStart; -	BreakFinder bfBack(ll, lineStart, lineEnd, posLineStart, xStartVisible); +	BreakFinder bfBack(ll, lineStart, lineEnd, posLineStart, IsUnicodeMode(), xStartVisible);  	int next = bfBack.First();  	// Background drawing loop @@ -2326,7 +2414,7 @@ void Editor::DrawLine(Surface *surface, ViewStyle &vsDraw, int line, int lineVis  	inIndentation = subLine == 0;	// Do not handle indentation except on first subline.  	// Foreground drawing loop -	BreakFinder bfFore(ll, lineStart, lineEnd, posLineStart, xStartVisible); +	BreakFinder bfFore(ll, lineStart, lineEnd, posLineStart, IsUnicodeMode(), xStartVisible);  	next = bfFore.First();  	while (next < lineEnd) { @@ -2391,31 +2479,17 @@ void Editor::DrawLine(Surface *surface, ViewStyle &vsDraw, int line, int lineVis  				if (controlCharSymbol < 32) {  					// Draw the character  					const char *ctrlChar = ControlCharacterString(ll->chars[i]); -					if (!twoPhaseDraw) { -						surface->FillRectangle(rcSegment, textBack); -					} -					int normalCharHeight = surface->Ascent(ctrlCharsFont) - -					        surface->InternalLeading(ctrlCharsFont); -					PRectangle rcCChar = rcSegment; -					rcCChar.left = rcCChar.left + 1; -					rcCChar.top = rcSegment.top + vsDraw.maxAscent - normalCharHeight; -					rcCChar.bottom = rcSegment.top + vsDraw.maxAscent + 1; -					PRectangle rcCentral = rcCChar; -					rcCentral.top++; -					rcCentral.bottom--; -					surface->FillRectangle(rcCentral, textFore); -					PRectangle rcChar = rcCChar; -					rcChar.left++; -					rcChar.right--; -					surface->DrawTextClipped(rcChar, ctrlCharsFont, -					        rcSegment.top + vsDraw.maxAscent, ctrlChar, istrlen(ctrlChar), -					        textBack, textFore); +					DrawTextBlob(surface, vsDraw, rcSegment, ctrlChar, textBack, textFore, twoPhaseDraw);  				} else {  					char cc[2] = { static_cast<char>(controlCharSymbol), '\0' };  					surface->DrawTextNoClip(rcSegment, ctrlCharsFont,  					        rcSegment.top + vsDraw.maxAscent,  					        cc, 1, textBack, textFore);  				} +			} else if ((i == startseg) && (static_cast<unsigned char>(ll->chars[i]) >= 0x80) && IsUnicodeMode()) { +				char hexits[3]; +				sprintf(hexits, "%2X", ll->chars[i] & 0xff); +				DrawTextBlob(surface, vsDraw, rcSegment, hexits, textBack, textFore, twoPhaseDraw);  			} else {  				// Normal text display  				if (vsDraw.styles[styleMain].visible) { diff --git a/src/PositionCache.cxx b/src/PositionCache.cxx index 1763b6530..f40a15378 100644 --- a/src/PositionCache.cxx +++ b/src/PositionCache.cxx @@ -345,12 +345,23 @@ void LineLayoutCache::Dispose(LineLayout *ll) {  }  void BreakFinder::Insert(int val) { +	// Expand if needed +	if (saeLen >= saeSize) { +		saeSize *= 2; +		int *selAndEdgeNew = new int[saeSize]; +		for (unsigned int j = 0; j<saeLen; j++) { +			selAndEdgeNew[j] = selAndEdge[j]; +		} +		delete []selAndEdge; +		selAndEdge = selAndEdgeNew; +	} +  	if (val >= nextBreak) {  		for (unsigned int j = 0; j<saeLen; j++) {  			if (val == selAndEdge[j]) {  				return;  			} if (val < selAndEdge[j]) { -				for (unsigned int k = saeLen; j>k; k--) { +				for (unsigned int k = saeLen; k>j; k--) {  					selAndEdge[k] = selAndEdge[k-1];  				}  				saeLen++; @@ -363,17 +374,32 @@ void BreakFinder::Insert(int val) {  	}  } -BreakFinder::BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posLineStart_, int xStart) : +extern bool BadUTF(const char *s, int len, int &trailBytes); + +static int NextBadU(const char *s, int p, int len, int &trailBytes) { +	while (p < len) { +		p++; +		if (BadUTF(s + p, len - p, trailBytes)) +			return p; +	} +	return -1; +} + +BreakFinder::BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posLineStart_, bool utf8_, int xStart) :  	ll(ll_),  	lineStart(lineStart_),  	lineEnd(lineEnd_),  	posLineStart(posLineStart_), +	utf8(utf8_),  	nextBreak(lineStart_), +	saeSize(0),  	saeLen(0),  	saeCurrentPos(0),  	saeNext(0),  	subBreak(-1) { -	for (unsigned int j=0; j < sizeof(selAndEdge) / sizeof(selAndEdge[0]); j++) { +	saeSize = 8; +	selAndEdge = new int[saeSize]; +	for (unsigned int j=0; j < saeSize; j++) {  		selAndEdge[j] = 0;  	} @@ -392,9 +418,24 @@ BreakFinder::BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posL  	Insert(ll->edgeColumn - 1);  	Insert(lineEnd - 1); + +	if (utf8) { +		int trailBytes=0; +		for (int pos = -1;;) { +			pos = NextBadU(ll->chars, pos, lineEnd, trailBytes); +			if (pos < 0) +				break; +			Insert(pos-1); +			Insert(pos); +		} +	}  	saeNext = (saeLen > 0) ? selAndEdge[0] : -1;  } +BreakFinder::~BreakFinder() { +	delete []selAndEdge; +} +  int BreakFinder::First() {  	return nextBreak;  } diff --git a/src/PositionCache.h b/src/PositionCache.h index 764702fce..5d486cb60 100644 --- a/src/PositionCache.h +++ b/src/PositionCache.h @@ -124,15 +124,18 @@ class BreakFinder {  	int lineStart;  	int lineEnd;  	int posLineStart; +	bool utf8;  	int nextBreak; -	int selAndEdge[5]; +	int *selAndEdge; +	unsigned int saeSize;  	unsigned int saeLen;  	unsigned int saeCurrentPos;  	int saeNext;  	int subBreak;  	void Insert(int val);  public: -	BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posLineStart_, int xStart); +	BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posLineStart_, bool utf8_, int xStart); +	~BreakFinder();  	int First();  	int Next();  }; | 
