diff options
Diffstat (limited to 'lexlib/StyleContext.h')
-rw-r--r-- | lexlib/StyleContext.h | 97 |
1 files changed, 83 insertions, 14 deletions
diff --git a/lexlib/StyleContext.h b/lexlib/StyleContext.h index c2d223e3f..9f1818f21 100644 --- a/lexlib/StyleContext.h +++ b/lexlib/StyleContext.h @@ -19,6 +19,30 @@ static inline int MakeLowerCase(int ch) { return ch - 'A' + 'a'; } +inline int UnicodeCodePoint(const unsigned char *us) { + if (us[0] < 0xC2) { + return us[0]; + } else if (us[0] < 0xE0) { + return ((us[0] & 0x1F) << 6) + (us[1] & 0x3F); + } else if (us[0] < 0xF0) { + return ((us[0] & 0xF) << 12) + ((us[1] & 0x3F) << 6) + (us[2] & 0x3F); + } else if (us[0] < 0xF5) { + return ((us[0] & 0x7) << 18) + ((us[1] & 0x3F) << 12) + ((us[2] & 0x3F) << 6) + (us[3] & 0x3F); + } + return us[0]; +} + +inline int BytesInUnicodeCodePoint(int codePoint) { + if (codePoint < 0x80) + return 1; + else if (codePoint < 0x800) + return 2; + else if (codePoint < 0x10000) + return 3; + else + return 4; +} + // All languages handled so far can treat all characters >= 0x80 as one class // which just continues the current token or starts an identifier if in default. // DBCS treated specially as the second character can be < 0x80 and hence @@ -27,22 +51,40 @@ class StyleContext { LexAccessor &styler; unsigned int endPos; StyleContext &operator=(const StyleContext &); + void GetNextChar(unsigned int pos) { chNext = static_cast<unsigned char>(styler.SafeGetCharAt(pos+1)); - if (styler.IsLeadByte(static_cast<char>(chNext))) { - chNext = chNext << 8; - chNext |= static_cast<unsigned char>(styler.SafeGetCharAt(pos+2)); + if (styler.Encoding() == encUnicode) { + if (chNext >= 0x80) { + unsigned char bytes[4] = { static_cast<unsigned char>(chNext), 0, 0, 0 }; + for (int trail=1; trail<3; trail++) { + bytes[trail] = static_cast<unsigned char>(styler.SafeGetCharAt(pos+1+trail)); + if (!((bytes[trail] >= 0x80) && (bytes[trail] < 0xc0))) { + bytes[trail] = 0; + break; + } + } + chNext = UnicodeCodePoint(bytes); + } + } else if (styler.Encoding() == encDBCS) { + if (styler.IsLeadByte(static_cast<char>(chNext))) { + chNext = chNext << 8; + chNext |= static_cast<unsigned char>(styler.SafeGetCharAt(pos+2)); + } } // End of line? // Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) // or on LF alone (Unix). Avoid triggering two times on Dos/Win. - atLineEnd = (ch == '\r' && chNext != '\n') || - (ch == '\n') || - (currentPos >= endPos); + if (lineStartNext < styler.Length()) + atLineEnd = static_cast<int>(pos) >= (lineStartNext-1); + else // Last line + atLineEnd = static_cast<int>(pos) >= lineStartNext; } public: unsigned int currentPos; + int currentLine; + int lineStartNext; bool atLineStart; bool atLineEnd; int state; @@ -55,6 +97,8 @@ public: styler(styler_), endPos(startPos + length), currentPos(startPos), + currentLine(-1), + lineStartNext(-1), atLineEnd(false), state(initStyle & chMask), // Mask off all bits which aren't in the chMask. chPrev(0), @@ -62,13 +106,22 @@ public: chNext(0) { styler.StartAt(startPos, chMask); styler.StartSegment(startPos); - atLineStart = static_cast<unsigned int>(styler.LineStart(styler.GetLine(startPos))) == startPos; + currentLine = styler.GetLine(startPos); + lineStartNext = styler.LineStart(currentLine+1); + atLineStart = static_cast<unsigned int>(styler.LineStart(currentLine)) == startPos; unsigned int pos = currentPos; ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos)); - if (styler.IsLeadByte(static_cast<char>(ch))) { - pos++; - ch = ch << 8; - ch |= static_cast<unsigned char>(styler.SafeGetCharAt(pos)); + if (styler.Encoding() == encUnicode) { + // Get the current char + GetNextChar(pos-1); + ch = chNext; + pos += BytesInUnicodeCodePoint(ch) - 1; + } else if (styler.Encoding() == encDBCS) { + if (styler.IsLeadByte(static_cast<char>(ch))) { + pos++; + ch = ch << 8; + ch |= static_cast<unsigned char>(styler.SafeGetCharAt(pos)); + } } GetNextChar(pos); } @@ -82,12 +135,28 @@ public: void Forward() { if (currentPos < endPos) { atLineStart = atLineEnd; + if (atLineStart) { + currentLine++; + lineStartNext = styler.LineStart(currentLine+1); + } chPrev = ch; - currentPos++; - if (ch >= 0x100) + if (styler.Encoding() == encUnicode) { + currentPos += BytesInUnicodeCodePoint(ch); + } else if (styler.Encoding() == encDBCS) { + currentPos++; + if (ch >= 0x100) + currentPos++; + } else { currentPos++; + } ch = chNext; - GetNextChar(currentPos + ((ch >= 0x100) ? 1 : 0)); + if (styler.Encoding() == encUnicode) { + GetNextChar(currentPos + BytesInUnicodeCodePoint(ch)-1); + } else if (styler.Encoding() == encDBCS) { + GetNextChar(currentPos + ((ch >= 0x100) ? 1 : 0)); + } else { + GetNextChar(currentPos); + } } else { atLineStart = false; chPrev = ' '; |