diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Document.cxx | 60 | ||||
-rw-r--r-- | src/Document.h | 1 | ||||
-rw-r--r-- | src/Editor.cxx | 118 | ||||
-rw-r--r-- | src/PositionCache.cxx | 47 | ||||
-rw-r--r-- | src/PositionCache.h | 7 |
5 files changed, 201 insertions, 32 deletions
diff --git a/src/Document.cxx b/src/Document.cxx index e2ca7a32a..ff8d0fbcf 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -287,6 +287,55 @@ int Document::LenChar(int pos) { } } +static bool IsTrailByte(int ch) { + return (ch >= 0x80) && (ch < (0x80 + 0x40)); +} + +static int BytesFromLead(int leadByte) { + if (leadByte > 0xF4) { + // Characters longer than 4 bytes not possible in current UTF-8 + return 0; + } else if (leadByte >= 0xF0) { + return 4; + } else if (leadByte >= 0xE0) { + return 3; + } else if (leadByte >= 0xC2) { + return 2; + } + return 0; +} + +bool Document::InGoodUTF8(int pos, int &start, int &end) { + int lead = pos; + while ((lead>0) && (pos-lead < 4) && IsTrailByte(static_cast<unsigned char>(cb.CharAt(lead-1)))) + lead--; + start = 0; + if (lead > 0) { + start = lead-1; + } + int leadByte = static_cast<unsigned char>(cb.CharAt(start)); + int bytes = BytesFromLead(leadByte); + if (bytes == 0) { + return false; + } else { + int trailBytes = bytes - 1; + int len = pos - lead + 1; + if (len > trailBytes) + // pos too far from lead + return false; + // Check that there are enough trails for this lead + int trail = pos + 1; + while ((trail-lead<trailBytes) && (trail < Length())) { + if (!IsTrailByte(static_cast<unsigned char>(cb.CharAt(trail)))) { + return false; + } + trail++; + } + end = start + bytes; + return true; + } +} + // Normalise a position so that it is not halfway through a two byte character. // This can occur in two situations - // When lines are terminated with \r\n pairs which should be treated as one character. @@ -313,13 +362,14 @@ int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) { if (dbcsCodePage) { if (SC_CP_UTF8 == dbcsCodePage) { unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos)); - while ((pos > 0) && (pos < Length()) && (ch >= 0x80) && (ch < (0x80 + 0x40))) { - // ch is a trail byte + int startUTF = pos; + int endUTF = pos; + if (IsTrailByte(ch) && InGoodUTF8(pos, startUTF, endUTF)) { + // ch is a trail byte within a UTF-8 character if (moveDir > 0) - pos++; + pos = endUTF; else - pos--; - ch = static_cast<unsigned char>(cb.CharAt(pos)); + pos = startUTF; } } else { // Anchor DBCS calculations at start of line because start of line can diff --git a/src/Document.h b/src/Document.h index 9143ec6e4..a36c4aafe 100644 --- a/src/Document.h +++ b/src/Document.h @@ -138,6 +138,7 @@ public: int ClampPositionIntoDocument(int pos); bool IsCrLf(int pos); int LenChar(int pos); + bool InGoodUTF8(int pos, int &start, int &end); int MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd=true); // Gateways to modifying document diff --git a/src/Editor.cxx b/src/Editor.cxx index 372ba0809..a1a48a08b 100644 --- a/src/Editor.cxx +++ b/src/Editor.cxx @@ -1684,6 +1684,61 @@ LineLayout *Editor::RetrieveLineLayout(int lineNumber) { LinesOnScreen() + 1, pdoc->LinesTotal()); } +static bool GoodTrailByte(int v) { + return (v >= 0x80) && (v < 0xc0); +} + +bool BadUTF(const char *s, int len, int &trailBytes) { + if (trailBytes) { + trailBytes--; + return false; + } + const unsigned char *us = reinterpret_cast<const unsigned char *>(s); + if (*us < 0x80) { + // Single bytes easy + return false; + } else if (*us > 0xF4) { + // Characters longer than 4 bytes not possible in current UTF-8 + return true; + } else if (*us >= 0xF0) { + // 4 bytes + if (len < 4) + return true; + if (GoodTrailByte(us[1]) && GoodTrailByte(us[2]) && GoodTrailByte(us[3])) { + trailBytes = 3; + return false; + } else { + return true; + } + } else if (*us >= 0xE0) { + // 3 bytes + if (len < 3) + return true; + if (GoodTrailByte(us[1]) && GoodTrailByte(us[2])) { + trailBytes = 2; + return false; + } else { + return true; + } + } else if (*us >= 0xC2) { + // 2 bytes + if (len < 2) + return true; + if (GoodTrailByte(us[1])) { + trailBytes = 1; + return false; + } else { + return true; + } + } else if (*us >= 0xC0) { + // Overlong encoding + return true; + } else { + // Trail byte + return true; + } +} + /** * Fill in the LineLayout data for the given line. * Copy the given @a line and its styles from the document into local arrays. @@ -1795,11 +1850,15 @@ void Editor::LayoutLine(int line, Surface *surface, ViewStyle &vstyle, LineLayou int ctrlCharWidth[32] = {0}; bool isControlNext = IsControlCharacter(ll->chars[0]); + int trailBytes = 0; + bool isBadUTFNext = IsUnicodeMode() && BadUTF(ll->chars, numCharsInLine, trailBytes); for (int charInLine = 0; charInLine < numCharsInLine; charInLine++) { bool isControl = isControlNext; isControlNext = IsControlCharacter(ll->chars[charInLine + 1]); + bool isBadUTF = isBadUTFNext; + isBadUTFNext = IsUnicodeMode() && BadUTF(ll->chars + charInLine + 1, numCharsInLine - charInLine - 1, trailBytes); if ((ll->styles[charInLine] != ll->styles[charInLine + 1]) || - isControl || isControlNext) { + isControl || isControlNext || isBadUTF || isBadUTFNext) { ll->positions[startseg] = 0; if (vstyle.styles[ll->styles[charInLine]].visible) { if (isControl) { @@ -1820,6 +1879,11 @@ void Editor::LayoutLine(int line, Surface *surface, ViewStyle &vstyle, LineLayou ll->positions + startseg + 1); } lastSegItalics = false; + } else if (isBadUTF) { + char hexits[3]; + sprintf(hexits, "%2X", ll->chars[charInLine] & 0xff); + ll->positions[charInLine + 1] = + surface->WidthText(ctrlCharsFont, hexits, istrlen(hexits)) + 3; } else { // Regular character int lenSeg = charInLine - startseg + 1; if ((lenSeg == 1) && (' ' == ll->chars[startseg])) { @@ -2133,6 +2197,30 @@ void Editor::DrawIndicators(Surface *surface, ViewStyle &vsDraw, int line, int x } } +void DrawTextBlob(Surface *surface, ViewStyle &vsDraw, PRectangle rcSegment, + const char *s, ColourAllocated textBack, ColourAllocated textFore, bool twoPhaseDraw) { + if (!twoPhaseDraw) { + surface->FillRectangle(rcSegment, textBack); + } + Font &ctrlCharsFont = vsDraw.styles[STYLE_CONTROLCHAR].font; + int normalCharHeight = surface->Ascent(ctrlCharsFont) - + surface->InternalLeading(ctrlCharsFont); + PRectangle rcCChar = rcSegment; + rcCChar.left = rcCChar.left + 1; + rcCChar.top = rcSegment.top + vsDraw.maxAscent - normalCharHeight; + rcCChar.bottom = rcSegment.top + vsDraw.maxAscent + 1; + PRectangle rcCentral = rcCChar; + rcCentral.top++; + rcCentral.bottom--; + surface->FillRectangle(rcCentral, textFore); + PRectangle rcChar = rcCChar; + rcChar.left++; + rcChar.right--; + surface->DrawTextClipped(rcChar, ctrlCharsFont, + rcSegment.top + vsDraw.maxAscent, s, istrlen(s), + textBack, textFore); +} + void Editor::DrawLine(Surface *surface, ViewStyle &vsDraw, int line, int lineVisible, int xStart, PRectangle rcLine, LineLayout *ll, int subLine) { @@ -2251,7 +2339,7 @@ void Editor::DrawLine(Surface *surface, ViewStyle &vsDraw, int line, int lineVis // Does not take margin into account but not significant int xStartVisible = subLineStart - xStart; - BreakFinder bfBack(ll, lineStart, lineEnd, posLineStart, xStartVisible); + BreakFinder bfBack(ll, lineStart, lineEnd, posLineStart, IsUnicodeMode(), xStartVisible); int next = bfBack.First(); // Background drawing loop @@ -2326,7 +2414,7 @@ void Editor::DrawLine(Surface *surface, ViewStyle &vsDraw, int line, int lineVis inIndentation = subLine == 0; // Do not handle indentation except on first subline. // Foreground drawing loop - BreakFinder bfFore(ll, lineStart, lineEnd, posLineStart, xStartVisible); + BreakFinder bfFore(ll, lineStart, lineEnd, posLineStart, IsUnicodeMode(), xStartVisible); next = bfFore.First(); while (next < lineEnd) { @@ -2391,31 +2479,17 @@ void Editor::DrawLine(Surface *surface, ViewStyle &vsDraw, int line, int lineVis if (controlCharSymbol < 32) { // Draw the character const char *ctrlChar = ControlCharacterString(ll->chars[i]); - if (!twoPhaseDraw) { - surface->FillRectangle(rcSegment, textBack); - } - int normalCharHeight = surface->Ascent(ctrlCharsFont) - - surface->InternalLeading(ctrlCharsFont); - PRectangle rcCChar = rcSegment; - rcCChar.left = rcCChar.left + 1; - rcCChar.top = rcSegment.top + vsDraw.maxAscent - normalCharHeight; - rcCChar.bottom = rcSegment.top + vsDraw.maxAscent + 1; - PRectangle rcCentral = rcCChar; - rcCentral.top++; - rcCentral.bottom--; - surface->FillRectangle(rcCentral, textFore); - PRectangle rcChar = rcCChar; - rcChar.left++; - rcChar.right--; - surface->DrawTextClipped(rcChar, ctrlCharsFont, - rcSegment.top + vsDraw.maxAscent, ctrlChar, istrlen(ctrlChar), - textBack, textFore); + DrawTextBlob(surface, vsDraw, rcSegment, ctrlChar, textBack, textFore, twoPhaseDraw); } else { char cc[2] = { static_cast<char>(controlCharSymbol), '\0' }; surface->DrawTextNoClip(rcSegment, ctrlCharsFont, rcSegment.top + vsDraw.maxAscent, cc, 1, textBack, textFore); } + } else if ((i == startseg) && (static_cast<unsigned char>(ll->chars[i]) >= 0x80) && IsUnicodeMode()) { + char hexits[3]; + sprintf(hexits, "%2X", ll->chars[i] & 0xff); + DrawTextBlob(surface, vsDraw, rcSegment, hexits, textBack, textFore, twoPhaseDraw); } else { // Normal text display if (vsDraw.styles[styleMain].visible) { diff --git a/src/PositionCache.cxx b/src/PositionCache.cxx index 1763b6530..f40a15378 100644 --- a/src/PositionCache.cxx +++ b/src/PositionCache.cxx @@ -345,12 +345,23 @@ void LineLayoutCache::Dispose(LineLayout *ll) { } void BreakFinder::Insert(int val) { + // Expand if needed + if (saeLen >= saeSize) { + saeSize *= 2; + int *selAndEdgeNew = new int[saeSize]; + for (unsigned int j = 0; j<saeLen; j++) { + selAndEdgeNew[j] = selAndEdge[j]; + } + delete []selAndEdge; + selAndEdge = selAndEdgeNew; + } + if (val >= nextBreak) { for (unsigned int j = 0; j<saeLen; j++) { if (val == selAndEdge[j]) { return; } if (val < selAndEdge[j]) { - for (unsigned int k = saeLen; j>k; k--) { + for (unsigned int k = saeLen; k>j; k--) { selAndEdge[k] = selAndEdge[k-1]; } saeLen++; @@ -363,17 +374,32 @@ void BreakFinder::Insert(int val) { } } -BreakFinder::BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posLineStart_, int xStart) : +extern bool BadUTF(const char *s, int len, int &trailBytes); + +static int NextBadU(const char *s, int p, int len, int &trailBytes) { + while (p < len) { + p++; + if (BadUTF(s + p, len - p, trailBytes)) + return p; + } + return -1; +} + +BreakFinder::BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posLineStart_, bool utf8_, int xStart) : ll(ll_), lineStart(lineStart_), lineEnd(lineEnd_), posLineStart(posLineStart_), + utf8(utf8_), nextBreak(lineStart_), + saeSize(0), saeLen(0), saeCurrentPos(0), saeNext(0), subBreak(-1) { - for (unsigned int j=0; j < sizeof(selAndEdge) / sizeof(selAndEdge[0]); j++) { + saeSize = 8; + selAndEdge = new int[saeSize]; + for (unsigned int j=0; j < saeSize; j++) { selAndEdge[j] = 0; } @@ -392,9 +418,24 @@ BreakFinder::BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posL Insert(ll->edgeColumn - 1); Insert(lineEnd - 1); + + if (utf8) { + int trailBytes=0; + for (int pos = -1;;) { + pos = NextBadU(ll->chars, pos, lineEnd, trailBytes); + if (pos < 0) + break; + Insert(pos-1); + Insert(pos); + } + } saeNext = (saeLen > 0) ? selAndEdge[0] : -1; } +BreakFinder::~BreakFinder() { + delete []selAndEdge; +} + int BreakFinder::First() { return nextBreak; } diff --git a/src/PositionCache.h b/src/PositionCache.h index 764702fce..5d486cb60 100644 --- a/src/PositionCache.h +++ b/src/PositionCache.h @@ -124,15 +124,18 @@ class BreakFinder { int lineStart; int lineEnd; int posLineStart; + bool utf8; int nextBreak; - int selAndEdge[5]; + int *selAndEdge; + unsigned int saeSize; unsigned int saeLen; unsigned int saeCurrentPos; int saeNext; int subBreak; void Insert(int val); public: - BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posLineStart_, int xStart); + BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posLineStart_, bool utf8_, int xStart); + ~BreakFinder(); int First(); int Next(); }; |