aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Document.cxx9
-rw-r--r--src/Document.h3
-rw-r--r--src/Editor.cxx261
-rw-r--r--src/Editor.h2
-rw-r--r--src/PositionCache.cxx88
-rw-r--r--src/PositionCache.h28
-rw-r--r--src/ScintillaBase.cxx3
-rw-r--r--src/UniConversion.cxx5
-rw-r--r--src/UniConversion.h4
9 files changed, 298 insertions, 105 deletions
diff --git a/src/Document.cxx b/src/Document.cxx
index 98574fe13..578adfefd 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -854,6 +854,15 @@ int Document::SafeSegment(const char *text, int length, int lengthSegment) const
return lastEncodingAllowedBreak;
}
+EncodingFamily Document::CodePageFamily() const {
+ if (SC_CP_UTF8 == dbcsCodePage)
+ return efUnicode;
+ else if (dbcsCodePage)
+ return efDBCS;
+ else
+ return efEightBit;
+}
+
void Document::ModifiedAt(int pos) {
if (endStyled > pos)
endStyled = pos;
diff --git a/src/Document.h b/src/Document.h
index 5147875b1..d02025cb5 100644
--- a/src/Document.h
+++ b/src/Document.h
@@ -19,6 +19,8 @@ namespace Scintilla {
typedef int Position;
const Position invalidPosition = -1;
+enum EncodingFamily { efEightBit, efUnicode, efDBCS };
+
/**
* The range class represents a range of text in a document.
* The two values are not sorted as one end may be more significant than the other
@@ -266,6 +268,7 @@ public:
int SCI_METHOD CodePage() const;
bool SCI_METHOD IsDBCSLeadByte(char ch) const;
int SafeSegment(const char *text, int length, int lengthSegment) const;
+ EncodingFamily CodePageFamily() const;
// Gateways to modifying document
void ModifiedAt(int pos);
diff --git a/src/Editor.cxx b/src/Editor.cxx
index acb840fdf..acd5b3611 100644
--- a/src/Editor.cxx
+++ b/src/Editor.cxx
@@ -15,6 +15,9 @@
#include <string>
#include <vector>
#include <map>
+#ifndef SCINTILLA_NO_UNORDERED_MAP
+#include <unordered_map>
+#endif
#include <algorithm>
#include <memory>
@@ -244,6 +247,47 @@ void Editor::Finalise() {
CancelModes();
}
+void Editor::SetRepresentations() {
+ reprs.Clear();
+
+ // C0 control set
+ const char *reps[] = {
+ "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "BEL",
+ "BS", "HT", "LF", "VT", "FF", "CR", "SO", "SI",
+ "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB",
+ "CAN", "EM", "SUB", "ESC", "FS", "GS", "RS", "US"
+ };
+ for (size_t j=0; j < (sizeof(reps) / sizeof(reps[0])); j++) {
+ char c[2] = { static_cast<char>(j), 0 };
+ reprs.SetRepresentation(c, reps[j]);
+ }
+
+ // C1 control set
+ // As well as Unicode mode, ISO-8859-1 should use these
+ if (IsUnicodeMode()) {
+ const char *repsC1[] = {
+ "PAD", "HOP", "BPH", "NBH", "IND", "NEL", "SSA", "ESA",
+ "HTS", "HTJ", "VTS", "PLD", "PLU", "RI", "SS2", "SS3",
+ "DCS", "PU1", "PU2", "STS", "CCH", "MW", "SPA", "EPA",
+ "SOS", "SGCI", "SCI", "CSI", "ST", "OSC", "PM", "APC"
+ };
+ for (size_t j=0; j < (sizeof(repsC1) / sizeof(repsC1[0])); j++) {
+ char c1[3] = { '\xc2', static_cast<char>(0x80+j), 0 };
+ reprs.SetRepresentation(c1, repsC1[j]);
+ }
+ }
+
+ // UTF-8 invalid bytes
+ if (IsUnicodeMode()) {
+ for (int k=0x80; k < 0x100; k++) {
+ char hiByte[2] = { static_cast<char>(k), 0 };
+ char hexits[4];
+ sprintf(hexits, "x%2X", k);
+ reprs.SetRepresentation(hiByte, hexits);
+ }
+ }
+}
+
void Editor::DropGraphics(bool freeObjects) {
if (freeObjects) {
delete pixmapLine;
@@ -2127,6 +2171,7 @@ LineLayout *Editor::RetrieveLineLayout(int lineNumber) {
LinesOnScreen() + 1, pdoc->LinesTotal());
}
+/*
bool BadUTF(const char *s, int len, int &trailBytes) {
// For the rules: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
if (trailBytes) {
@@ -2141,6 +2186,7 @@ bool BadUTF(const char *s, int len, int &trailBytes) {
return false;
}
}
+*/
/**
* Fill in the LineLayout data for the given line.
@@ -2250,45 +2296,53 @@ void Editor::LayoutLine(int line, Surface *surface, ViewStyle &vstyle, LineLayou
ll->positions[0] = 0;
XYPOSITION tabWidth = vstyle.spaceWidth * pdoc->tabInChars;
bool lastSegItalics = false;
- Font &ctrlCharsFont = vstyle.styles[STYLE_CONTROLCHAR].font;
-
- XYPOSITION ctrlCharWidth[32] = {0};
- bool isControlNext = IsControlCharacter(ll->chars[0]);
- int trailBytes = 0;
- bool isBadUTFNext = IsUnicodeMode() && BadUTF(ll->chars, numCharsInLine, trailBytes);
- for (int charInLine = 0; charInLine < numCharsInLine; charInLine++) {
- bool isControl = isControlNext;
- isControlNext = IsControlCharacter(ll->chars[charInLine + 1]);
- bool isBadUTF = isBadUTFNext;
- isBadUTFNext = IsUnicodeMode() && BadUTF(ll->chars + charInLine + 1, numCharsInLine - charInLine - 1, trailBytes);
+
+ EncodingFamily encodingFamily = pdoc->CodePageFamily();
+
+ XYPOSITION positionsRepr[256]; // Should expand when needed
+ int charWidthNext = 1;
+ if (encodingFamily == efUnicode)
+ charWidthNext = UTF8DrawBytes(reinterpret_cast<unsigned char *>(ll->chars), numCharsInLine);
+ else if (encodingFamily == efDBCS)
+ charWidthNext = pdoc->IsDBCSLeadByte(ll->chars[0]) ? 2 : 1;
+ Representation *reprNext = reprs.RepresentationFromCharacter(ll->chars, charWidthNext);
+ int charWidth = 1;
+
+ for (int charInLine = 0; charInLine < numCharsInLine; charInLine += charWidth) {
+
+ charWidth = charWidthNext;
+ Representation *repr = reprNext;
+
+ charWidthNext = 1;
+ if (encodingFamily == efUnicode)
+ charWidthNext = UTF8DrawBytes(reinterpret_cast<unsigned char *>(ll->chars+charInLine + charWidth), numCharsInLine - charInLine - charWidth);
+ else if (encodingFamily == efDBCS)
+ charWidthNext = pdoc->IsDBCSLeadByte(ll->chars[charInLine + charWidth]) ? 2 : 1;
+ reprNext = reprs.RepresentationFromCharacter(ll->chars+charInLine + charWidth, charWidthNext);
+
if ((ll->styles[charInLine] != ll->styles[charInLine + 1]) ||
- isControl || isControlNext || isBadUTF || isBadUTFNext || ((charInLine+1) >= numCharsBeforeEOL)) {
+ repr || reprNext || ((charInLine+charWidth) >= numCharsBeforeEOL)) {
ll->positions[startseg] = 0;
if (vstyle.styles[ll->styles[charInLine]].visible) {
- if (isControl) {
+ if (repr) {
if (ll->chars[charInLine] == '\t') {
+ // Tab is a special case of repr, taking a variable amount of space
ll->positions[charInLine + 1] =
((static_cast<int>((startsegx + 2) / tabWidth) + 1) * tabWidth) - startsegx;
} else if (controlCharSymbol < 32) {
- if (ctrlCharWidth[ll->chars[charInLine]] == 0) {
- const char *ctrlChar = ControlCharacterString(ll->chars[charInLine]);
- ctrlCharWidth[ll->chars[charInLine]] =
- surface->WidthText(ctrlCharsFont, ctrlChar, istrlen(ctrlChar)) + ctrlCharPadding;
- }
- ll->positions[charInLine + 1] = ctrlCharWidth[ll->chars[charInLine]];
+ posCache.MeasureWidths(surface, vstyle, STYLE_CONTROLCHAR, repr->stringRep.c_str(),
+ static_cast<unsigned int>(repr->stringRep.length()), positionsRepr, pdoc);
+ XYPOSITION endRepr = positionsRepr[repr->stringRep.length()-1] + 3;
+ for (int ii=0; ii < charWidth; ii++)
+ ll->positions[startseg + 1 + ii] = endRepr;
} else {
char cc[2] = { static_cast<char>(controlCharSymbol), '\0' };
- surface->MeasureWidths(ctrlCharsFont, cc, 1,
+ surface->MeasureWidths(vstyle.styles[STYLE_CONTROLCHAR].font, cc, 1,
ll->positions + startseg + 1);
- }
+ }
lastSegItalics = false;
- } else if ((isBadUTF) || (charInLine >= numCharsBeforeEOL)) {
- char hexits[4];
- sprintf(hexits, "x%2X", ll->chars[charInLine] & 0xff);
- ll->positions[charInLine + 1] =
- surface->WidthText(ctrlCharsFont, hexits, istrlen(hexits)) + 3;
- } else { // Regular character
- int lenSeg = charInLine - startseg + 1;
+ } else {
+ int lenSeg = charInLine - startseg + charWidth;
if ((lenSeg == 1) && (' ' == ll->chars[startseg])) {
lastSegItalics = false;
// Over half the segments are single characters and of these about half are space characters.
@@ -2300,15 +2354,15 @@ void Editor::LayoutLine(int line, Surface *surface, ViewStyle &vstyle, LineLayou
}
}
} else { // invisible
- for (int posToZero = startseg; posToZero <= (charInLine + 1); posToZero++) {
+ for (int posToZero = startseg; posToZero <= (charInLine + charWidth); posToZero++) {
ll->positions[posToZero] = 0;
}
}
- for (int posToIncrease = startseg; posToIncrease <= (charInLine + 1); posToIncrease++) {
+ for (int posToIncrease = startseg; posToIncrease <= (charInLine + charWidth); posToIncrease++) {
ll->positions[posToIncrease] += startsegx;
}
- startsegx = ll->positions[charInLine + 1];
- startseg = charInLine + 1;
+ startsegx = ll->positions[charInLine + charWidth];
+ startseg = charInLine + charWidth;
}
}
// Small hack to make lines that end with italics not cut off the edge of the last character
@@ -2933,7 +2987,7 @@ void Editor::DrawLine(Surface *surface, ViewStyle &vsDraw, int line, int lineVis
ll->psel = &sel;
- BreakFinder bfBack(ll, lineStart, lineEnd, posLineStart, xStartVisible, selBackDrawn, pdoc);
+ BreakFinder bfBack(ll, lineStart, lineEnd, posLineStart, xStartVisible, selBackDrawn, pdoc, &reprs);
int next = bfBack.First();
// Background drawing loop
@@ -3028,7 +3082,7 @@ void Editor::DrawLine(Surface *surface, ViewStyle &vsDraw, int line, int lineVis
inIndentation = subLine == 0; // Do not handle indentation except on first subline.
// Foreground drawing loop
BreakFinder bfFore(ll, lineStart, lineEnd, posLineStart, xStartVisible,
- ((!twoPhaseDraw && selBackDrawn) || vsDraw.selforeset), pdoc);
+ ((!twoPhaseDraw && selBackDrawn) || vsDraw.selforeset), pdoc, &reprs);
next = bfFore.First();
while (next < lineEnd) {
@@ -3087,74 +3141,71 @@ void Editor::DrawLine(Surface *surface, ViewStyle &vsDraw, int line, int lineVis
DrawTabArrow(surface, rcTab, rcSegment.top + vsDraw.lineHeight / 2);
}
}
- } else if (IsControlCharacter(ll->chars[i])) {
- // Control character display
- inIndentation = false;
- if (controlCharSymbol < 32) {
- // Draw the character
- const char *ctrlChar = ControlCharacterString(ll->chars[i]);
- DrawTextBlob(surface, vsDraw, rcSegment, ctrlChar, textBack, textFore, twoPhaseDraw);
- } else {
- char cc[2] = { static_cast<char>(controlCharSymbol), '\0' };
- surface->DrawTextNoClip(rcSegment, ctrlCharsFont,
- rcSegment.top + vsDraw.maxAscent,
- cc, 1, textBack, textFore);
- }
- } else if ((i == startseg) && (static_cast<unsigned char>(ll->chars[i]) >= 0x80) && IsUnicodeMode()) {
- // A single byte >= 0x80 in UTF-8 is a bad byte and is displayed as its hex value
- char hexits[4];
- sprintf(hexits, "x%2X", ll->chars[i] & 0xff);
- DrawTextBlob(surface, vsDraw, rcSegment, hexits, textBack, textFore, twoPhaseDraw);
} else {
- // Normal text display
- if (vsDraw.styles[styleMain].visible) {
- if (twoPhaseDraw) {
- surface->DrawTextTransparent(rcSegment, textFont,
- rcSegment.top + vsDraw.maxAscent, ll->chars + startseg,
- i - startseg + 1, textFore);
+ Representation *repr = 0;
+ if ((i - startseg + 1) <= 4)
+ repr = reprs.RepresentationFromCharacter(ll->chars + startseg, i - startseg + 1);
+ if (repr) {
+ inIndentation = false; // May need to special case ' '.
+ if (controlCharSymbol < 32) {
+ DrawTextBlob(surface, vsDraw, rcSegment, repr->stringRep.c_str(), textBack, textFore, twoPhaseDraw);
} else {
- surface->DrawTextNoClip(rcSegment, textFont,
- rcSegment.top + vsDraw.maxAscent, ll->chars + startseg,
- i - startseg + 1, textFore, textBack);
+ char cc[2] = { static_cast<char>(controlCharSymbol), '\0' };
+ surface->DrawTextNoClip(rcSegment, ctrlCharsFont,
+ rcSegment.top + vsDraw.maxAscent,
+ cc, 1, textBack, textFore);
}
- }
- if (vsDraw.viewWhitespace != wsInvisible ||
- (inIndentation && vsDraw.viewIndentationGuides != ivNone)) {
- for (int cpos = 0; cpos <= i - startseg; cpos++) {
- if (ll->chars[cpos + startseg] == ' ') {
- if (vsDraw.viewWhitespace != wsInvisible) {
- if (vsDraw.whitespaceForegroundSet)
- textFore = vsDraw.whitespaceForeground;
- if (!inIndentation || vsDraw.viewWhitespace == wsVisibleAlways) {
- XYPOSITION xmid = (ll->positions[cpos + startseg] + ll->positions[cpos + startseg + 1]) / 2;
- if (!twoPhaseDraw && drawWhitespaceBackground &&
- (!inIndentation || vsDraw.viewWhitespace == wsVisibleAlways)) {
- textBack = vsDraw.whitespaceBackground;
- PRectangle rcSpace(ll->positions[cpos + startseg] + xStart - subLineStart,
- rcSegment.top,
- ll->positions[cpos + startseg + 1] + xStart - subLineStart,
- rcSegment.bottom);
- surface->FillRectangle(rcSpace, textBack);
+ } else {
+ // Normal text display
+ if (vsDraw.styles[styleMain].visible) {
+ if (twoPhaseDraw) {
+ surface->DrawTextTransparent(rcSegment, textFont,
+ rcSegment.top + vsDraw.maxAscent, ll->chars + startseg,
+ i - startseg + 1, textFore);
+ } else {
+ surface->DrawTextNoClip(rcSegment, textFont,
+ rcSegment.top + vsDraw.maxAscent, ll->chars + startseg,
+ i - startseg + 1, textFore, textBack);
+ }
+ }
+ if (vsDraw.viewWhitespace != wsInvisible ||
+ (inIndentation && vsDraw.viewIndentationGuides != ivNone)) {
+ for (int cpos = 0; cpos <= i - startseg; cpos++) {
+ if (ll->chars[cpos + startseg] == ' ') {
+ if (vsDraw.viewWhitespace != wsInvisible) {
+ if (vsDraw.whitespaceForegroundSet)
+ textFore = vsDraw.whitespaceForeground;
+ if (!inIndentation || vsDraw.viewWhitespace == wsVisibleAlways) {
+ XYPOSITION xmid = (ll->positions[cpos + startseg] + ll->positions[cpos + startseg + 1]) / 2;
+ if (!twoPhaseDraw && drawWhitespaceBackground &&
+ (!inIndentation || vsDraw.viewWhitespace == wsVisibleAlways)) {
+ textBack = vsDraw.whitespaceBackground;
+ PRectangle rcSpace(ll->positions[cpos + startseg] + xStart - subLineStart,
+ rcSegment.top,
+ ll->positions[cpos + startseg + 1] + xStart - subLineStart,
+ rcSegment.bottom);
+ surface->FillRectangle(rcSpace, textBack);
+ }
+ PRectangle rcDot(xmid + xStart - subLineStart, rcSegment.top + vsDraw.lineHeight / 2, 0, 0);
+ rcDot.right = rcDot.left + vs.whitespaceSize;
+ rcDot.bottom = rcDot.top + vs.whitespaceSize;
+ surface->FillRectangle(rcDot, textFore);
}
- PRectangle rcDot(xmid + xStart - subLineStart, rcSegment.top + vsDraw.lineHeight / 2, 0, 0);
- rcDot.right = rcDot.left + vs.whitespaceSize;
- rcDot.bottom = rcDot.top + vs.whitespaceSize;
- surface->FillRectangle(rcDot, textFore);
}
- }
- if (inIndentation && vsDraw.viewIndentationGuides == ivReal) {
- for (int indentCount = (ll->positions[cpos + startseg] + epsilon) / indentWidth;
- indentCount <= (ll->positions[cpos + startseg + 1] - epsilon) / indentWidth;
- indentCount++) {
- if (indentCount > 0) {
- int xIndent = indentCount * indentWidth;
- DrawIndentGuide(surface, lineVisible, vsDraw.lineHeight, xIndent + xStart, rcSegment,
- (ll->xHighlightGuide == xIndent));
+ if (inIndentation && vsDraw.viewIndentationGuides == ivReal) {
+ for (int indentCount = (ll->positions[cpos + startseg] + epsilon) / indentWidth;
+ indentCount <= (ll->positions[cpos + startseg + 1] - epsilon) / indentWidth;
+ indentCount++) {
+ if (indentCount > 0) {
+ int xIndent = indentCount * indentWidth;
+ DrawIndentGuide(surface, lineVisible, vsDraw.lineHeight, xIndent + xStart, rcSegment,
+ (ll->xHighlightGuide == xIndent));
+ }
}
}
+ } else {
+ inIndentation = false;
}
- } else {
- inIndentation = false;
}
}
}
@@ -6902,6 +6953,8 @@ void Editor::SetDocPointer(Document *document) {
vs.ReleaseAllExtendedStyles();
+ SetRepresentations();
+
// Reset the contraction state to fully shown.
cs.Clear();
cs.InsertLines(0, pdoc->LinesTotal() - 1);
@@ -8296,6 +8349,7 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) {
cs.InsertLines(0, pdoc->LinesTotal() - 1);
SetAnnotationHeights(0, pdoc->LinesTotal());
InvalidateStyleRedraw();
+ SetRepresentations();
}
}
break;
@@ -9170,6 +9224,25 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) {
case SCI_GETCONTROLCHARSYMBOL:
return controlCharSymbol;
+ case SCI_SETREPRESENTATION:
+ reprs.SetRepresentation(reinterpret_cast<const char *>(wParam), CharPtrFromSPtr(lParam));
+ break;
+
+ case SCI_GETREPRESENTATION: {
+ Representation *repr = reprs.RepresentationFromCharacter(
+ reinterpret_cast<const char *>(wParam), UTF8MaxBytes);
+ if (repr) {
+ if (lParam != 0)
+ strcpy(CharPtrFromSPtr(lParam), repr->stringRep.c_str());
+ return repr->stringRep.size();
+ }
+ return 0;
+ }
+
+ case SCI_CLEARREPRESENTATION:
+ reprs.ClearRepresentation(reinterpret_cast<const char *>(wParam));
+ break;
+
case SCI_STARTRECORD:
recordingMacro = true;
return 0;
diff --git a/src/Editor.h b/src/Editor.h
index 3a1456a96..d1879d6ec 100644
--- a/src/Editor.h
+++ b/src/Editor.h
@@ -231,6 +231,7 @@ protected: // ScintillaBase subclass needs access to much of Editor
LineLayoutCache llc;
PositionCache posCache;
+ SpecialRepresentations reprs;
KeyMap kmap;
@@ -332,6 +333,7 @@ protected: // ScintillaBase subclass needs access to much of Editor
void InvalidateStyleData();
void InvalidateStyleRedraw();
void RefreshStyleData();
+ void SetRepresentations();
void DropGraphics(bool freeObjects);
void AllocateGraphics();
diff --git a/src/PositionCache.cxx b/src/PositionCache.cxx
index 742a226b9..d92d49338 100644
--- a/src/PositionCache.cxx
+++ b/src/PositionCache.cxx
@@ -13,6 +13,9 @@
#include <string>
#include <vector>
#include <map>
+#ifndef SCINTILLA_NO_UNORDERED_MAP
+#include <unordered_map>
+#endif
#include "Platform.h"
@@ -34,6 +37,7 @@
#include "ILexer.h"
#include "CaseFolder.h"
#include "Document.h"
+#include "UniConversion.h"
#include "Selection.h"
#include "PositionCache.h"
@@ -336,6 +340,62 @@ void LineLayoutCache::Dispose(LineLayout *ll) {
}
}
+// Simply pack the (maximum 4) character bytes into an int
+static inline int KeyFromString(const char *charBytes, size_t len) {
+ PLATFORM_ASSERT(len <= 4);
+ int k=0;
+ for (size_t i=0; i<len && charBytes[i]; i++) {
+ k = k * 0x100;
+ k += charBytes[i];
+ }
+ return k;
+}
+
+SpecialRepresentations::SpecialRepresentations() {
+ std::fill(startByteHasReprs, startByteHasReprs+0x100, 0);
+}
+
+void SpecialRepresentations::SetRepresentation(const char *charBytes, const char *value) {
+ MapRepresentation::iterator it = mapReprs.find(KeyFromString(charBytes, UTF8MaxBytes));
+ if (it == mapReprs.end()) {
+ // New entry so increment for first byte
+ startByteHasReprs[static_cast<unsigned char>(charBytes[0])]++;
+ }
+ mapReprs[KeyFromString(charBytes, UTF8MaxBytes)] = value;
+}
+
+void SpecialRepresentations::ClearRepresentation(const char *charBytes) {
+ MapRepresentation::iterator it = mapReprs.find(KeyFromString(charBytes, UTF8MaxBytes));
+ if (it != mapReprs.end()) {
+ mapReprs.erase(it);
+ startByteHasReprs[static_cast<unsigned char>(charBytes[0])]--;
+ }
+}
+
+Representation *SpecialRepresentations::RepresentationFromCharacter(const char *charBytes, size_t len) {
+ PLATFORM_ASSERT(len <= 4);
+ if (!startByteHasReprs[static_cast<unsigned char>(charBytes[0])])
+ return 0;
+ MapRepresentation::iterator it = mapReprs.find(KeyFromString(charBytes, len));
+ if (it != mapReprs.end()) {
+ return &(it->second);
+ }
+ return 0;
+}
+
+bool SpecialRepresentations::Contains(const char *charBytes, size_t len) const {
+ PLATFORM_ASSERT(len <= 4);
+ if (!startByteHasReprs[static_cast<unsigned char>(charBytes[0])])
+ return false;
+ MapRepresentation::const_iterator it = mapReprs.find(KeyFromString(charBytes, len));
+ return it != mapReprs.end();
+}
+
+void SpecialRepresentations::Clear() {
+ mapReprs.clear();
+ std::fill(startByteHasReprs, startByteHasReprs+0x100, 0);
+}
+
void BreakFinder::Insert(int val) {
if (val >= nextBreak) {
for (std::vector<int>::iterator it = selAndEdge.begin(); it != selAndEdge.end(); ++it) {
@@ -352,6 +412,7 @@ void BreakFinder::Insert(int val) {
}
}
+/*
extern bool BadUTF(const char *s, int len, int &trailBytes);
static int NextBadU(const char *s, int p, int len, int &trailBytes) {
@@ -362,9 +423,10 @@ static int NextBadU(const char *s, int p, int len, int &trailBytes) {
}
return -1;
}
+*/
BreakFinder::BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posLineStart_,
- int xStart, bool breakForSelection, Document *pdoc_) :
+ int xStart, bool breakForSelection, Document *pdoc_, SpecialRepresentations *preprs_) :
ll(ll_),
lineStart(lineStart_),
lineEnd(lineEnd_),
@@ -373,7 +435,8 @@ BreakFinder::BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posL
saeCurrentPos(0),
saeNext(0),
subBreak(-1),
- pdoc(pdoc_) {
+ pdoc(pdoc_),
+ preprs(preprs_) {
// Search for first visible break
// First find the first visible character
@@ -401,14 +464,19 @@ BreakFinder::BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posL
Insert(ll->edgeColumn - 1);
Insert(lineEnd - 1);
- if (pdoc && (SC_CP_UTF8 == pdoc->dbcsCodePage)) {
- int trailBytes=0;
- for (int pos = -1;;) {
- pos = NextBadU(ll->chars, pos, lineEnd, trailBytes);
- if (pos < 0)
- break;
- Insert(pos-1);
- Insert(pos);
+ if (pdoc && preprs) {
+ EncodingFamily encodingFamily = pdoc->CodePageFamily();
+ for (int posRepr=0; posRepr<lineEnd;) {
+ int charWidth = 1;
+ if (encodingFamily == efUnicode)
+ charWidth = UTF8DrawBytes(reinterpret_cast<unsigned char *>(ll->chars) + posRepr, lineEnd - posRepr);
+ else if (encodingFamily == efDBCS)
+ charWidth = pdoc->IsDBCSLeadByte(ll->chars[posRepr]) ? 2 : 1;
+ if (preprs->Contains(ll->chars + posRepr, charWidth)) {
+ Insert(posRepr - 1);
+ Insert(posRepr + charWidth - 1);
+ }
+ posRepr += charWidth;
}
}
saeNext = (!selAndEdge.empty()) ? selAndEdge[0] : -1;
diff --git a/src/PositionCache.h b/src/PositionCache.h
index 34f237705..bab43390a 100644
--- a/src/PositionCache.h
+++ b/src/PositionCache.h
@@ -113,6 +113,31 @@ public:
void ResetClock();
};
+class Representation {
+public:
+ std::string stringRep;
+ Representation(const char *value="") : stringRep(value) {
+ }
+};
+
+#ifdef SCINTILLA_NO_UNORDERED_MAP
+typedef std::map<int, Representation> MapRepresentation;
+#else
+typedef std::unordered_map<int, Representation> MapRepresentation;
+#endif
+
+class SpecialRepresentations {
+ MapRepresentation mapReprs;
+ int startByteHasReprs[0x100];
+public:
+ SpecialRepresentations();
+ void SetRepresentation(const char *charBytes, const char *value);
+ void ClearRepresentation(const char *charBytes);
+ Representation *RepresentationFromCharacter(const char *charBytes, size_t len);
+ bool Contains(const char *charBytes, size_t len) const;
+ void Clear();
+};
+
// Class to break a line of text into shorter runs at sensible places.
class BreakFinder {
LineLayout *ll;
@@ -125,6 +150,7 @@ class BreakFinder {
int saeNext;
int subBreak;
Document *pdoc;
+ SpecialRepresentations *preprs;
void Insert(int val);
// Private so BreakFinder objects can not be copied
BreakFinder(const BreakFinder &);
@@ -135,7 +161,7 @@ public:
// Try to make each subdivided run lengthEachSubdivision or shorter.
enum { lengthEachSubdivision = 100 };
BreakFinder(LineLayout *ll_, int lineStart_, int lineEnd_, int posLineStart_,
- int xStart, bool breakForSelection, Document *pdoc_);
+ int xStart, bool breakForSelection, Document *pdoc_, SpecialRepresentations *preprs_);
~BreakFinder();
int First() const;
int Next();
diff --git a/src/ScintillaBase.cxx b/src/ScintillaBase.cxx
index 05768799d..462d90ea4 100644
--- a/src/ScintillaBase.cxx
+++ b/src/ScintillaBase.cxx
@@ -14,6 +14,9 @@
#include <string>
#include <vector>
#include <map>
+#ifndef SCINTILLA_NO_UNORDERED_MAP
+#include <unordered_map>
+#endif
#include <algorithm>
#include "Platform.h"
diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx
index 1973dc7f2..b769250c8 100644
--- a/src/UniConversion.cxx
+++ b/src/UniConversion.cxx
@@ -255,6 +255,11 @@ int UTF8Classify(const unsigned char *us, int len) {
}
}
+int UTF8DrawBytes(const unsigned char *us, int len) {
+ int utf8StatusNext = UTF8Classify(us, len);
+ return (utf8StatusNext & UTF8MaskInvalid) ? 1 : (utf8StatusNext & UTF8MaskWidth);
+}
+
#ifdef SCI_NAMESPACE
}
#endif
diff --git a/src/UniConversion.h b/src/UniConversion.h
index 1c54506dd..753490bab 100644
--- a/src/UniConversion.h
+++ b/src/UniConversion.h
@@ -34,6 +34,10 @@ inline bool UTF8IsAscii(int ch) {
enum { UTF8MaskWidth=0x7, UTF8MaskInvalid=0x8 };
int UTF8Classify(const unsigned char *us, int len);
+// Similar to UTF8Classify but returns a length of 1 for invalid bytes
+// instead of setting the invalid flag
+int UTF8DrawBytes(const unsigned char *us, int len);
+
// Line separator is U+2028 \xe2\x80\xa8
// Paragraph separator is U+2029 \xe2\x80\xa9
const int UTF8SeparatorLength = 3;