aboutsummaryrefslogtreecommitdiffhomepage
path: root/lexlib/StyleContext.h
diff options
context:
space:
mode:
authornyamatongwe <unknown>2013-01-19 12:33:20 +1100
committernyamatongwe <unknown>2013-01-19 12:33:20 +1100
commit5d17740fdedcea321a23ffd3350aa7adbf4c2329 (patch)
tree1512465a2bbf066e96eb1ae2d10fdf3fc7dbd42b /lexlib/StyleContext.h
parentf46c96ecb682ad736453f78f6709fca6c6911886 (diff)
downloadscintilla-mirror-5d17740fdedcea321a23ffd3350aa7adbf4c2329.tar.gz
Implement generic support for Unicode line ends and sub styles in lexer support classes.
Diffstat (limited to 'lexlib/StyleContext.h')
-rw-r--r--lexlib/StyleContext.h97
1 files changed, 83 insertions, 14 deletions
diff --git a/lexlib/StyleContext.h b/lexlib/StyleContext.h
index c2d223e3f..9f1818f21 100644
--- a/lexlib/StyleContext.h
+++ b/lexlib/StyleContext.h
@@ -19,6 +19,30 @@ static inline int MakeLowerCase(int ch) {
return ch - 'A' + 'a';
}
+inline int UnicodeCodePoint(const unsigned char *us) {
+ if (us[0] < 0xC2) {
+ return us[0];
+ } else if (us[0] < 0xE0) {
+ return ((us[0] & 0x1F) << 6) + (us[1] & 0x3F);
+ } else if (us[0] < 0xF0) {
+ return ((us[0] & 0xF) << 12) + ((us[1] & 0x3F) << 6) + (us[2] & 0x3F);
+ } else if (us[0] < 0xF5) {
+ return ((us[0] & 0x7) << 18) + ((us[1] & 0x3F) << 12) + ((us[2] & 0x3F) << 6) + (us[3] & 0x3F);
+ }
+ return us[0];
+}
+
+inline int BytesInUnicodeCodePoint(int codePoint) {
+ if (codePoint < 0x80)
+ return 1;
+ else if (codePoint < 0x800)
+ return 2;
+ else if (codePoint < 0x10000)
+ return 3;
+ else
+ return 4;
+}
+
// All languages handled so far can treat all characters >= 0x80 as one class
// which just continues the current token or starts an identifier if in default.
// DBCS treated specially as the second character can be < 0x80 and hence
@@ -27,22 +51,40 @@ class StyleContext {
LexAccessor &styler;
unsigned int endPos;
StyleContext &operator=(const StyleContext &);
+
void GetNextChar(unsigned int pos) {
chNext = static_cast<unsigned char>(styler.SafeGetCharAt(pos+1));
- if (styler.IsLeadByte(static_cast<char>(chNext))) {
- chNext = chNext << 8;
- chNext |= static_cast<unsigned char>(styler.SafeGetCharAt(pos+2));
+ if (styler.Encoding() == encUnicode) {
+ if (chNext >= 0x80) {
+ unsigned char bytes[4] = { static_cast<unsigned char>(chNext), 0, 0, 0 };
+ for (int trail=1; trail<3; trail++) {
+ bytes[trail] = static_cast<unsigned char>(styler.SafeGetCharAt(pos+1+trail));
+ if (!((bytes[trail] >= 0x80) && (bytes[trail] < 0xc0))) {
+ bytes[trail] = 0;
+ break;
+ }
+ }
+ chNext = UnicodeCodePoint(bytes);
+ }
+ } else if (styler.Encoding() == encDBCS) {
+ if (styler.IsLeadByte(static_cast<char>(chNext))) {
+ chNext = chNext << 8;
+ chNext |= static_cast<unsigned char>(styler.SafeGetCharAt(pos+2));
+ }
}
// End of line?
// Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win)
// or on LF alone (Unix). Avoid triggering two times on Dos/Win.
- atLineEnd = (ch == '\r' && chNext != '\n') ||
- (ch == '\n') ||
- (currentPos >= endPos);
+ if (lineStartNext < styler.Length())
+ atLineEnd = static_cast<int>(pos) >= (lineStartNext-1);
+ else // Last line
+ atLineEnd = static_cast<int>(pos) >= lineStartNext;
}
public:
unsigned int currentPos;
+ int currentLine;
+ int lineStartNext;
bool atLineStart;
bool atLineEnd;
int state;
@@ -55,6 +97,8 @@ public:
styler(styler_),
endPos(startPos + length),
currentPos(startPos),
+ currentLine(-1),
+ lineStartNext(-1),
atLineEnd(false),
state(initStyle & chMask), // Mask off all bits which aren't in the chMask.
chPrev(0),
@@ -62,13 +106,22 @@ public:
chNext(0) {
styler.StartAt(startPos, chMask);
styler.StartSegment(startPos);
- atLineStart = static_cast<unsigned int>(styler.LineStart(styler.GetLine(startPos))) == startPos;
+ currentLine = styler.GetLine(startPos);
+ lineStartNext = styler.LineStart(currentLine+1);
+ atLineStart = static_cast<unsigned int>(styler.LineStart(currentLine)) == startPos;
unsigned int pos = currentPos;
ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos));
- if (styler.IsLeadByte(static_cast<char>(ch))) {
- pos++;
- ch = ch << 8;
- ch |= static_cast<unsigned char>(styler.SafeGetCharAt(pos));
+ if (styler.Encoding() == encUnicode) {
+ // Get the current char
+ GetNextChar(pos-1);
+ ch = chNext;
+ pos += BytesInUnicodeCodePoint(ch) - 1;
+ } else if (styler.Encoding() == encDBCS) {
+ if (styler.IsLeadByte(static_cast<char>(ch))) {
+ pos++;
+ ch = ch << 8;
+ ch |= static_cast<unsigned char>(styler.SafeGetCharAt(pos));
+ }
}
GetNextChar(pos);
}
@@ -82,12 +135,28 @@ public:
void Forward() {
if (currentPos < endPos) {
atLineStart = atLineEnd;
+ if (atLineStart) {
+ currentLine++;
+ lineStartNext = styler.LineStart(currentLine+1);
+ }
chPrev = ch;
- currentPos++;
- if (ch >= 0x100)
+ if (styler.Encoding() == encUnicode) {
+ currentPos += BytesInUnicodeCodePoint(ch);
+ } else if (styler.Encoding() == encDBCS) {
+ currentPos++;
+ if (ch >= 0x100)
+ currentPos++;
+ } else {
currentPos++;
+ }
ch = chNext;
- GetNextChar(currentPos + ((ch >= 0x100) ? 1 : 0));
+ if (styler.Encoding() == encUnicode) {
+ GetNextChar(currentPos + BytesInUnicodeCodePoint(ch)-1);
+ } else if (styler.Encoding() == encDBCS) {
+ GetNextChar(currentPos + ((ch >= 0x100) ? 1 : 0));
+ } else {
+ GetNextChar(currentPos);
+ }
} else {
atLineStart = false;
chPrev = ' ';