diff options
author | Neil <nyamatongwe@gmail.com> | 2013-07-22 19:36:55 +1000 |
---|---|---|
committer | Neil <nyamatongwe@gmail.com> | 2013-07-22 19:36:55 +1000 |
commit | 5cbf336f95db427027a72699be6f5034762515aa (patch) | |
tree | fb8e4f0eccb49363752995c85b5b9217f340282b /src | |
parent | 79bd92adfc0ee27029fed81efd03fb5784c60f73 (diff) | |
download | scintilla-mirror-5cbf336f95db427027a72699be6f5034762515aa.tar.gz |
Added the character representation feature.
Diffstat (limited to 'src')
-rw-r--r-- | src/Document.cxx | 9 | ||||
-rw-r--r-- | src/Document.h | 3 | ||||
-rw-r--r-- | src/Editor.cxx | 261 | ||||
-rw-r--r-- | src/Editor.h | 2 | ||||
-rw-r--r-- | src/PositionCache.cxx | 88 | ||||
-rw-r--r-- | src/PositionCache.h | 28 | ||||
-rw-r--r-- | src/ScintillaBase.cxx | 3 | ||||
-rw-r--r-- | src/UniConversion.cxx | 5 | ||||
-rw-r--r-- | src/UniConversion.h | 4 |
9 files changed, 298 insertions, 105 deletions
diff --git a/src/Document.cxx b/src/Document.cxx index 98574fe13..578adfefd 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -854,6 +854,15 @@ int Document::SafeSegment(const char *text, int length, int lengthSegment) const return lastEncodingAllowedBreak; } +EncodingFamily Document::CodePageFamily() const { + if (SC_CP_UTF8 == dbcsCodePage) + return efUnicode; + else if (dbcsCodePage) + return efDBCS; + else + return efEightBit; +} + void Document::ModifiedAt(int pos) { if (endStyled > pos) endStyled = pos; diff --git a/src/Document.h b/src/Document.h index 5147875b1..d02025cb5 100644 --- a/src/Document.h +++ b/src/Document.h @@ -19,6 +19,8 @@ namespace Scintilla { typedef int Position; const Position invalidPosition = -1; +enum EncodingFamily { efEightBit, efUnicode, efDBCS }; + /** * The range class represents a range of text in a document. * The two values are not sorted as one end may be more significant than the other @@ -266,6 +268,7 @@ public: int SCI_METHOD CodePage() const; bool SCI_METHOD IsDBCSLeadByte(char ch) const; int SafeSegment(const char *text, int length, int lengthSegment) const; + EncodingFamily CodePageFamily() const; // Gateways to modifying document void ModifiedAt(int pos); diff --git a/src/Editor.cxx b/src/Editor.cxx index acb840fdf..acd5b3611 100644 --- a/src/Editor.cxx +++ b/src/Editor.cxx @@ -15,6 +15,9 @@ #include <string> #include <vector> #include <map> +#ifndef SCINTILLA_NO_UNORDERED_MAP +#include <unordered_map> +#endif #include <algorithm> #include <memory> @@ -244,6 +247,47 @@ void Editor::Finalise() { CancelModes(); } +void Editor::SetRepresentations() { + reprs.Clear(); + + // C0 control set + const char *reps[] = { + "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL", + "BS", "HT", "LF", "VT", "FF", "CR", "SO", "SI", + "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", + "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US" + }; + for (size_t j=0; j < (sizeof(reps) / sizeof(reps[0])); j++) { + char c[2] = { static_cast<char>(j), 0 }; + reprs.SetRepresentation(c, reps[j]); + } + + // C1 control set + // As well as Unicode mode, ISO-8859-1 should use these + if (IsUnicodeMode()) { + const char *repsC1[] = { + "PAD", "HOP", "BPH", "NBH", "IND", "NEL", "SSA", "ESA", + "HTS", "HTJ", "VTS", "PLD", "PLU", "RI", "SS2", "SS3", + "DCS", "PU1", "PU2", "STS", "CCH", "MW", "SPA", "EPA", + "SOS", "SGCI", "SCI", "CSI", "ST", "OSC", "PM", "APC" + }; + for (size_t j=0; j < (sizeof(repsC1) / sizeof(repsC1[0])); j++) { + char c1[3] = { '\xc2', static_cast<char>(0x80+j), 0 }; + reprs.SetRepresentation(c1, repsC1[j]); + } + } + + // UTF-8 invalid bytes + if (IsUnicodeMode()) { + for (int k=0x80; k < 0x100; k++) { + char hiByte[2] = { static_cast<char>(k), 0 }; + char hexits[4]; + sprintf(hexits, "x%2X", k); + reprs.SetRepresentation(hiByte, hexits); + } + } +} + void Editor::DropGraphics(bool freeObjects) { if (freeObjects) { delete pixmapLine; @@ -2127,6 +2171,7 @@ LineLayout *Editor::RetrieveLineLayout(int lineNumber) { LinesOnScreen() + 1, pdoc->LinesTotal()); } +/* bool BadUTF(const char *s, int len, int &trailBytes) { // For the rules: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 if (trailBytes) { @@ -2141,6 +2186,7 @@ bool BadUTF(const char *s, int len, int &trailBytes) { return false; } } +*/ /** * Fill in the LineLayout data for the given line. @@ -2250,45 +2296,53 @@ void Editor::LayoutLine(int line, Surface *surface, ViewStyle &vstyle, LineLayou ll->positions[0] = 0; XYPOSITION tabWidth = vstyle.spaceWidth * pdoc->tabInChars; bool lastSegItalics = false; - Font &ctrlCharsFont = vstyle.styles[STYLE_CONTROLCHAR].font; - - XYPOSITION ctrlCharWidth[32] = {0}; - bool isControlNext = IsControlCharacter(ll->chars[0]); - int trailBytes = 0; - bool isBadUTFNext = IsUnicodeMode() && BadUTF(ll->chars, numCharsInLine, trailBytes); - for (int charInLine = 0; charInLine < numCharsInLine; charInLine++) { - bool isControl = isControlNext; - isControlNext = IsControlCharacter(ll->chars[charInLine + 1]); - bool isBadUTF = isBadUTFNext; - isBadUTFNext = IsUnicodeMode() && BadUTF(ll->chars + charInLine + 1, numCharsInLine - charInLine - 1, trailBytes); + + EncodingFamily encodingFamily = pdoc->CodePageFamily(); + + XYPOSITION positionsRepr[256]; // Should expand when needed + int charWidthNext = 1; + if (encodingFamily == efUnicode) + charWidthNext = UTF8DrawBytes(reinterpret_cast<unsigned char *>(ll->chars), numCharsInLine); + else if (encodingFamily == efDBCS) + charWidthNext = pdoc->IsDBCSLeadByte(ll->chars[0]) ? 2 : 1; + Representation *reprNext = reprs.RepresentationFromCharacter(ll->chars, charWidthNext); + int charWidth = 1; + + for (int charInLine = 0; charInLine < numCharsInLine; charInLine += charWidth) { + + charWidth = charWidthNext; + Representation *repr = reprNext; + + charWidthNext = 1; + if (encodingFamily == efUnicode) + charWidthNext = UTF8DrawBytes(reinterpret_cast<unsigned char *>(ll->chars+charInLine + charWidth), numCharsInLine - charInLine - charWidth); + else if (encodingFamily == efDBCS) + charWidthNext = pdoc->IsDBCSLeadByte(ll->chars[charInLine + charWidth]) ? 2 : 1; + reprNext = reprs.RepresentationFromCharacter(ll->chars+charInLine + charWidth, charWidthNext); + if ((ll->styles[charInLine] != ll->styles[charInLine + 1]) || - isControl || isControlNext || isBadUTF || isBadUTFNext || ((charInLine+1) >= numCharsBeforeEOL)) { + repr || reprNext || ((charInLine+charWidth) >= numCharsBeforeEOL)) { ll->positions[startseg] = 0; if (vstyle.styles[ll->styles[charInLine]].visible) { - if (isControl) { + if (repr) { if (ll->chars[charInLine] == '\t') { + // Tab is a special case of repr, taking a variable amount of space ll->positions[charInLine + 1] = ((static_cast<int>((startsegx + 2) / tabWidth) + 1) * tabWidth) - startsegx; } else if (controlCharSymbol < 32) { - if (ctrlCharWidth[ll->chars[charInLine]] == 0) { - const char *ctrlChar = ControlCharacterString(ll->chars[charInLine]); - ctrlCharWidth[ll->chars[charInLine]] = - surface->WidthText(ctrlCharsFont, ctrlChar, istrlen(ctrlChar)) + ctrlCharPadding; - } - ll->positions[charInLine + 1] = ctrlCharWidth[ll->chars[charInLine]]; + posCache.MeasureWidths(surface, vstyle, STYLE_CONTROLCHAR, repr->stringRep.c_str(), + static_cast<unsigned int>(repr->stringRep.length()), positionsRepr, pdoc); + XYPOSITION endRepr = positionsRepr[repr->stringRep.length()-1] + 3; + for (int ii=0; ii < charWidth; ii++) + ll->positions[startseg + 1 + ii] = endRepr; } else { char cc[2] = { static_cast<char>(controlCharSymbol), '\0' }; - surface->MeasureWidths(ctrlCharsFont, cc, 1, + surface->MeasureWidths(vstyle.styles[STYLE_CONTROLCHAR].font, cc, 1, ll->positions + startseg + 1); - } + } lastSegItalics = false; - } else if ((isBadUTF) || (charInLine >= numCharsBeforeEOL)) { - char hexits[4]; - sprintf(hexits, "x%2X", ll->chars[charInLine] & 0xff); - ll->positions[charInLine + 1] = - surface->WidthText(ctrlCharsFont, hexits, istrlen(hexits)) + 3; - } else { // Regular character - int lenSeg = charInLine - startseg + 1; + } else { + int lenSeg = charInLine - startseg + charWidth; if ((lenSeg == 1) && (' ' == ll->chars[startseg])) { lastSegItalics = false; // Over half the segments are single characters and of these about half are space characters. @@ -2300,15 +2354,15 @@ void Editor::LayoutLine(int line, Surface *surface, ViewStyle &vstyle, LineLayou } } } else { // invisible - for (int posToZero = startseg; posToZero <= (charInLine + 1); posToZero++) { + for (int posToZero = startseg; posToZero <= (charInLine + charWidth); posToZero++) { ll->positions[posToZero] = 0; } } - for (int posToIncrease = startseg; posToIncrease <= (charInLine + 1); posToIncrease++) { + for (int posToIncrease = startseg; posToIncrease <= (charInLine + charWidth); posToIncrease++) { ll->positions[posToIncrease] += startsegx; } - startsegx = ll->positions[charInLine + 1]; - startseg = charInLine + 1; + startsegx = ll->positions[charInLine + charWidth]; + startseg = charInLine + charWidth; } } // Small hack to make lines that end with italics not cut off the edge of the last character @@ -2933,7 +2987,7 @@ void Editor::DrawLine(Surface *surface, ViewStyle &vsDraw, int line, int lineVis ll->psel = &sel; - BreakFinder bfBack(ll, lineStart, lineEnd, posLineStart, xStartVisible, selBackDrawn, pdoc); + BreakFinder bfBack(ll, lineStart, lineEnd, posLineStart, xStartVisible, selBackDrawn, pdoc, &reprs); int next = bfBack.First(); // Background drawing loop @@ -3028,7 +3082,7 @@ void Editor::DrawLine(Surface *surface, ViewStyle &vsDraw, int line, int lineVis inIndentation = subLine == 0; // Do not handle indentation except on first subline. // Foreground drawing loop BreakFinder bfFore(ll, lineStart, lineEnd, posLineStart, xStartVisible, - ((!twoPhaseDraw && selBackDrawn) || vsDraw.selforeset), pdoc); + ((!twoPhaseDraw && selBackDrawn) || vsDraw.selforeset), pdoc, &reprs); next = bfFore.First(); while (next < lineEnd) { @@ -3087,74 +3141,71 @@ void Editor::DrawLine(Surface *surface, ViewStyle &vsDraw, int line, int lineVis DrawTabArrow(surface, rcTab, rcSegment.top + vsDraw.lineHeight / 2); } } - } else if (IsControlCharacter(ll->chars[i])) { - // Control character display - inIndentation = false; - if (controlCharSymbol < 32) { - // Draw the character - const char *ctrlChar = ControlCharacterString(ll->chars[i]); - DrawTextBlob(surface, vsDraw, rcSegment, ctrlChar, textBack, textFore, twoPhaseDraw); - } else { - char cc[2] = { static_cast<char>(controlCharSymbol), '\0' }; - surface->DrawTextNoClip(rcSegment, ctrlCharsFont, - rcSegment.top + vsDraw.maxAscent, - cc, 1, textBack, textFore); - } - } else if ((i == startseg) && (static_cast<unsigned char>(ll->chars[i]) >= 0x80) && IsUnicodeMode()) { - // A single byte >= 0x80 in UTF-8 is a bad byte and is displayed as its hex value - char hexits[4]; - sprintf(hexits, "x%2X", ll->chars[i] & 0xff); - DrawTextBlob(surface, vsDraw, rcSegment, hexits, textBack, textFore, twoPhaseDraw); } else { - // Normal text display - if (vsDraw.styles[styleMain].visible) { - if (twoPhaseDraw) { - surface->DrawTextTransparent(rcSegment, textFont, - rcSegment.top + vsDraw.maxAscent, ll->chars + startseg, - i - startseg + 1, textFore); + Representation *repr = 0; + if ((i - startseg + 1) <= 4) + repr = reprs.RepresentationFromCharacter(ll->chars + startseg, i - startseg + 1); + if (repr) { + inIndentation = false; // May need to special case ' '. + if (controlCharSymbol < 32) { + DrawTextBlob(surface, vsDraw, rcSegment, repr->stringRep.c_str(), textBack, textFore, twoPhaseDraw); } else { - surface->DrawTextNoClip(rcSegment, textFont, - rcSegment.top + vsDraw.maxAscent, ll->chars + startseg, - i - startseg + 1, textFore, textBack); + char cc[2] = { static_cast<char>(controlCharSymbol), '\0' }; + surface->DrawTextNoClip(rcSegment, ctrlCharsFont, + rcSegment.top + vsDraw.maxAscent, + cc, 1, textBack, textFore); } - } - if (vsDraw.viewWhitespace != wsInvisible || - (inIndentation && vsDraw.viewIndentationGuides != ivNone)) { - for (int cpos = 0; cpos <= i - startseg; cpos++) { - if (ll->chars[cpos + startseg] == ' ') { - if (vsDraw.viewWhitespace != wsInvisible) { - if (vsDraw.whitespaceForegroundSet) - textFore = vsDraw.whitespaceForeground; - if (!inIndentation || vsDraw.viewWhitespace == wsVisibleAlways) { - XYPOSITION xmid = (ll->positions[cpos + startseg] + ll->positions[cpos + startseg + 1]) / 2; - if (!twoPhaseDraw && drawWhitespaceBackground && - (!inIndentation || vsDraw.viewWhitespace == wsVisibleAlways)) { - textBack = vsDraw.whitespaceBackground; - PRectangle rcSpace(ll->positions[cpos + startseg] + xStart - subLineStart, - rcSegment.top, - ll->positions[cpos + startseg + 1] + xStart - subLineStart, - rcSegment.bottom); - surface->FillRectangle(rcSpace, textBack); + } else { + // Normal text display + if (vsDraw.styles[styleMain].visible) { + if (twoPhaseDraw) { + surface->DrawTextTransparent(rcSegment, textFont, + rcSegment.top + vsDraw.maxAscent, ll->chars + startseg, + i - startseg + 1, textFore); + } else { + surface->DrawTextNoClip(rcSegment, textFont, + rcSegment.top + vsDraw.maxAscent, ll->chars + startseg, + i - startseg + 1, textFore, textBack); + } + } + if (vsDraw.viewWhitespace != wsInvisible || + (inIndentation && vsDraw.viewIndentationGuides != ivNone)) { + for (int cpos = 0; cpos <= i - startseg; cpos++) { + if (ll->chars[cpos + startseg] == ' ') { + if (vsDraw.viewWhitespace != wsInvisible) { + if (vsDraw.whitespaceForegroundSet) + textFore = vsDraw.whitespaceForeground; + if (!inIndentation || vsDraw.viewWhitespace == wsVisibleAlways) { + XYPOSITION xmid = (ll->positions[cpos + startseg] + ll->positions[cpos + startseg + 1]) / 2; + if (!twoPhaseDraw && drawWhitespaceBackground && + (!inIndentation || vsDraw.viewWhitespace == wsVisibleAlways)) { + textBack = vsDraw.whitespaceBackground; + PRectangle rcSpace(ll->positions[cpos + startseg] + xStart - subLineStart, + rcSegment.top, + ll->positions[cpos + startseg + 1] + xStart - subLineStart, + rcSegment.bottom); + surface->FillRectangle(rcSpace, textBack); + } + PRectangle rcDot(xmid + xStart - subLineStart, rcSegment.top + vsDraw.lineHeight / 2, 0, 0); + rcDot.right = rcDot.left + vs.whitespaceSize; + rcDot.bottom = rcDot.top + vs.whitespaceSize; + surface->FillRectangle(rcDot, textFore); } - PRectangle rcDot(xmid + xStart - subLineStart, rcSegment.top + vsDraw.lineHeight / 2, 0, 0); - rcDot.right = rcDot.left + vs.whitespaceSize; - rcDot.bottom = rcDot.top + vs.whitespaceSize; - surface->FillRectangle(rcDot, textFore); } - } - if (inIndentation && vsDraw.viewIndentationGuides == ivReal) { - for (int indentCount = (ll->positions[cpos + startseg] + epsilon) / indentWidth; - indentCount <= (ll->positions[cpos + startseg + 1] - epsilon) / indentWidth; - indentCount++) { - if (indentCount > 0) { - int xIndent = indentCount * indentWidth; - DrawIndentGuide(surface, lineVisible, vsDraw.lineHeight, xIndent + xStart, rcSegment, - (ll->xHighlightGuide == xIndent)); + if (inIndentation && vsDraw.viewIndentationGuides == ivReal) { + for (int indentCount = (ll->positions[cpos + startseg] + epsilon) / indentWidth; + indentCount <= (ll->positions[cpos + startseg + 1] - epsilon) / indentWidth; + indentCount++) { + if (indentCount > 0) { + int xIndent = indentCount * indentWidth; + DrawIndentGuide(surface, lineVisible, vsDraw.lineHeight, xIndent + xStart, rcSegment, + (ll->xHighlightGuide == xIndent)); + } } } + } else { + inIndentation = false; } - } else { - inIndentation = false; } } } @@ -6902,6 +6953,8 @@ void Editor::SetDocPointer(Document *document) { vs.ReleaseAllExtendedStyles(); + SetRepresentations(); + // Reset the contraction state to fully shown. cs.Clear(); cs.InsertLines(0, pdoc->LinesTotal() - 1); @@ -8296,6 +8349,7 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) { cs.InsertLines(0, pdoc->LinesTotal() - 1); SetAnnotationHeights(0, pdoc->LinesTotal()); InvalidateStyleRedraw(); + SetRepresentations(); } } break; @@ -9170,6 +9224,25 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) { case SCI_GETCONTROLCHARSYMBOL: return controlCharSymbol; + case SCI_SETREPRESENTATION: + reprs.SetRepresentation(reinterpret_cast<const char *>(wParam), CharPtrFromSPtr(lParam)); + break; + + case SCI_GETREPRESENTATION: { + Representation *repr = reprs.RepresentationFromCharacter( + reinterpret_cast<const char *>(wParam), UTF8MaxBytes); + if (repr) { + if (lParam != 0) + strcpy(CharPtrFromSPtr(lParam), repr->stringRep.c_str()); + return repr->stringRep.size(); + } + return 0; + } + + case SCI_CLEARREPRESENTATION: + reprs.ClearRepresentation(reinterpret_cast<const char *>(wParam)); + break; + case SCI_STARTRECORD: recordingMacro = true; return 0; diff --git a/src/Editor.h b/src/Editor.h index 3a1456a96..d1879d6ec 100644 --- a/src/Editor.h +++ b/src/Editor.h @@ -231,6 +231,7 @@ protected: // ScintillaBase subclass needs access to much of Editor LineLayoutCache llc; PositionCache posCache; + SpecialRepresentations reprs; KeyMap kmap; @@ -332,6 +333,7 @@ protected: // ScintillaBase subclass needs access to much of Editor void InvalidateStyleData(); void InvalidateStyleRedraw(); void RefreshStyleData(); + void SetRepresentations(); void DropGraphics(bool freeObjects); void AllocateGraphics(); diff --git a/src/PositionCache.cxx b/src/PositionCache.cxx index 742a226b9..d92d49338 100644 --- a/src/PositionCache.cxx +++ b/src/PositionCache.cxx @@ -13,6 +13,9 @@ #include <string> #include <vector> #include <map> +#ifndef SCINTILLA_NO_UNORDERED_MAP +#include <unordered_map> +#endif #include "Platform.h" @@ -34,6 +37,7 @@ #include "ILexer.h" #include "CaseFolder.h" #include "Document.h" +#include "UniConversion.h" #include "Selection.h" #include "PositionCache.h" @@ -336,6 +340,62 @@ void LineLayoutCache::Dispose(LineLayout *ll) { } } +// Simply pack the (maximum 4) character bytes into an int +static inline int KeyFromString(const char *charBytes, size_t len) { + PLATFORM_ASSERT(len <= 4); + int k=0; + for (size_t i=0; i<len && charBytes[i]; i++) { + k = k * 0x100; + k += charBytes[i]; + } + return k; +} + +SpecialRepresentations::SpecialRepresentations() { + std::fill(startByteHasReprs, startByteHasReprs+0x100, 0); +} + +void SpecialRepresentations::SetRepresentation(const char *charBytes, const char *value) { + MapRepresentation::iterator it = mapReprs.find(KeyFromString(charBytes, UTF8MaxBytes)); + if (it == mapReprs.end()) { + // New entry so increment for first byte + startByteHasReprs[static_cast<unsigned char>(charBytes[0])]++; + } + mapReprs[KeyFromString(charBytes, UTF8MaxBytes)] = value; +} + +void SpecialRepresentations::ClearRepresentation(const char *charBytes) { + MapRepresentation::iterator it = mapReprs.find(KeyFromString(charBytes, UTF8MaxBytes)); + if (it != mapReprs.end()) { + mapReprs.erase(it); + startByteHasReprs[static_cast<unsigned char>(charBytes[0])]--; + } +} + +Representation *SpecialRepresentations::RepresentationFromCharacter(const char *charBytes, size_t len) { + PLATFORM_ASSERT(len <= 4); + if (!startByteHasReprs[static_cast<unsigned char>(charBytes[0])]) + return 0; + MapRepresentation::iterator it = mapReprs.find(KeyFromString(charBytes, len)); + if (it != mapReprs.end()) { + return &(it->second); + } + return 0; +} + +bool SpecialRepresentations::Contains(const char *charBytes, size_t len) const { + PLATFORM_ASSERT(len <= 4); + if (!startByteHasReprs[static_cast<unsigned char>(charBytes[0])]) + return false; + MapRepresentation::const_iterator it = mapReprs.find(KeyFromString(charBytes, len)); + return it != mapReprs.end(); +} + +void SpecialRepresentations::Clear() { + mapReprs.clear(); + std::fill(startByteHasReprs, startByteHasReprs+0x100, 0); +} + void BreakFinder::Insert(int val) { if (val >= nextBreak) { for (std::vector<int>::iterator it = selAndEdge.begin(); it != selAndEdge.end(); ++it) { @@ -352,6 +412,7 @@ void BreakFinder::Insert(int val) { } } +/* extern bool BadUTF(const char *s, int len, int &trailBytes); static int NextBadU(const char *s, int p, int len, int &trailBytes) { @@ -362,9 +423,10 @@ static int NextBadU(const char *s, int p, int len, int &trailBytes) { } return -1; } +*/ BreakFinder::BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posLineStart_, - int xStart, bool breakForSelection, Document *pdoc_) : + int xStart, bool breakForSelection, Document *pdoc_, SpecialRepresentations *preprs_) : ll(ll_), lineStart(lineStart_), lineEnd(lineEnd_), @@ -373,7 +435,8 @@ BreakFinder::BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posL saeCurrentPos(0), saeNext(0), subBreak(-1), - pdoc(pdoc_) { + pdoc(pdoc_), + preprs(preprs_) { // Search for first visible break // First find the first visible character @@ -401,14 +464,19 @@ BreakFinder::BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posL Insert(ll->edgeColumn - 1); Insert(lineEnd - 1); - if (pdoc && (SC_CP_UTF8 == pdoc->dbcsCodePage)) { - int trailBytes=0; - for (int pos = -1;;) { - pos = NextBadU(ll->chars, pos, lineEnd, trailBytes); - if (pos < 0) - break; - Insert(pos-1); - Insert(pos); + if (pdoc && preprs) { + EncodingFamily encodingFamily = pdoc->CodePageFamily(); + for (int posRepr=0; posRepr<lineEnd;) { + int charWidth = 1; + if (encodingFamily == efUnicode) + charWidth = UTF8DrawBytes(reinterpret_cast<unsigned char *>(ll->chars) + posRepr, lineEnd - posRepr); + else if (encodingFamily == efDBCS) + charWidth = pdoc->IsDBCSLeadByte(ll->chars[posRepr]) ? 2 : 1; + if (preprs->Contains(ll->chars + posRepr, charWidth)) { + Insert(posRepr - 1); + Insert(posRepr + charWidth - 1); + } + posRepr += charWidth; } } saeNext = (!selAndEdge.empty()) ? selAndEdge[0] : -1; diff --git a/src/PositionCache.h b/src/PositionCache.h index 34f237705..bab43390a 100644 --- a/src/PositionCache.h +++ b/src/PositionCache.h @@ -113,6 +113,31 @@ public: void ResetClock(); }; +class Representation { +public: + std::string stringRep; + Representation(const char *value="") : stringRep(value) { + } +}; + +#ifdef SCINTILLA_NO_UNORDERED_MAP +typedef std::map<int, Representation> MapRepresentation; +#else +typedef std::unordered_map<int, Representation> MapRepresentation; +#endif + +class SpecialRepresentations { + MapRepresentation mapReprs; + int startByteHasReprs[0x100]; +public: + SpecialRepresentations(); + void SetRepresentation(const char *charBytes, const char *value); + void ClearRepresentation(const char *charBytes); + Representation *RepresentationFromCharacter(const char *charBytes, size_t len); + bool Contains(const char *charBytes, size_t len) const; + void Clear(); +}; + // Class to break a line of text into shorter runs at sensible places. class BreakFinder { LineLayout *ll; @@ -125,6 +150,7 @@ class BreakFinder { int saeNext; int subBreak; Document *pdoc; + SpecialRepresentations *preprs; void Insert(int val); // Private so BreakFinder objects can not be copied BreakFinder(const BreakFinder &); @@ -135,7 +161,7 @@ public: // Try to make each subdivided run lengthEachSubdivision or shorter. enum { lengthEachSubdivision = 100 }; BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posLineStart_, - int xStart, bool breakForSelection, Document *pdoc_); + int xStart, bool breakForSelection, Document *pdoc_, SpecialRepresentations *preprs_); ~BreakFinder(); int First() const; int Next(); diff --git a/src/ScintillaBase.cxx b/src/ScintillaBase.cxx index 05768799d..462d90ea4 100644 --- a/src/ScintillaBase.cxx +++ b/src/ScintillaBase.cxx @@ -14,6 +14,9 @@ #include <string> #include <vector> #include <map> +#ifndef SCINTILLA_NO_UNORDERED_MAP +#include <unordered_map> +#endif #include <algorithm> #include "Platform.h" diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx index 1973dc7f2..b769250c8 100644 --- a/src/UniConversion.cxx +++ b/src/UniConversion.cxx @@ -255,6 +255,11 @@ int UTF8Classify(const unsigned char *us, int len) { } } +int UTF8DrawBytes(const unsigned char *us, int len) { + int utf8StatusNext = UTF8Classify(us, len); + return (utf8StatusNext & UTF8MaskInvalid) ? 1 : (utf8StatusNext & UTF8MaskWidth); +} + #ifdef SCI_NAMESPACE } #endif diff --git a/src/UniConversion.h b/src/UniConversion.h index 1c54506dd..753490bab 100644 --- a/src/UniConversion.h +++ b/src/UniConversion.h @@ -34,6 +34,10 @@ inline bool UTF8IsAscii(int ch) { enum { UTF8MaskWidth=0x7, UTF8MaskInvalid=0x8 }; int UTF8Classify(const unsigned char *us, int len); +// Similar to UTF8Classify but returns a length of 1 for invalid bytes +// instead of setting the invalid flag +int UTF8DrawBytes(const unsigned char *us, int len); + // Line separator is U+2028 \xe2\x80\xa8 // Paragraph separator is U+2029 \xe2\x80\xa9 const int UTF8SeparatorLength = 3; |