diff options
author | Neil <nyamatongwe@gmail.com> | 2022-11-12 20:37:31 +1100 |
---|---|---|
committer | Neil <nyamatongwe@gmail.com> | 2022-11-12 20:37:31 +1100 |
commit | 386b3dc9ddb38992ebc1c05b034b3dd2d8dcc2d9 (patch) | |
tree | 106f4ad4308aabef8de78b2121698e4a87f118f3 /src/Document.cxx | |
parent | 5ae14dd681c7f78af6d184286f3c2a94dac9a40b (diff) | |
download | scintilla-mirror-386b3dc9ddb38992ebc1c05b034b3dd2d8dcc2d9.tar.gz |
Hoist common conversion from UTF-8 byte string into CharacterExtracted
constructor.
Move CharacterExtracted out of Document so it can be more widely used.
Diffstat (limited to 'src/Document.cxx')
-rw-r--r-- | src/Document.cxx | 44 |
1 files changed, 19 insertions, 25 deletions
diff --git a/src/Document.cxx b/src/Document.cxx index 10c8e9ce5..87cace721 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -125,6 +125,18 @@ size_t ActionDuration::ActionsInAllowedTime(double secondsAllowed) const noexcep return std::lround(secondsAllowed / Duration()); } +CharacterExtracted::CharacterExtracted(const unsigned char *charBytes, size_t widthCharBytes) noexcept { + const int utf8status = UTF8Classify(charBytes, widthCharBytes); + if (utf8status & UTF8MaskInvalid) { + // Treat as invalid and use up just one byte + character = unicodeReplacementChar; + widthBytes = 1; + } else { + character = UnicodeFromUTF8(charBytes); + widthBytes = utf8status & UTF8MaskWidth; + } +} + Document::Document(DocumentOption options) : cb(!FlagSet(options, DocumentOption::StylesNone), FlagSet(options, DocumentOption::TextLarge)), durationStyleOneByte(0.000001, 0.0000001, 0.00001) { @@ -917,7 +929,7 @@ bool Document::NextCharacter(Sci::Position &pos, int moveDir) const noexcept { } } -Document::CharacterExtracted Document::CharacterAfter(Sci::Position position) const noexcept { +CharacterExtracted Document::CharacterAfter(Sci::Position position) const noexcept { if (position >= LengthNoExcept()) { return CharacterExtracted(unicodeReplacementChar, 0); } @@ -931,13 +943,7 @@ Document::CharacterExtracted Document::CharacterAfter(Sci::Position position) co unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 }; for (int b = 1; b<widthCharBytes; b++) charBytes[b] = cb.UCharAt(position + b); - const int utf8status = UTF8Classify(charBytes, widthCharBytes); - if (utf8status & UTF8MaskInvalid) { - // Treat as invalid and use up just one byte - return CharacterExtracted(unicodeReplacementChar, 1); - } else { - return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth); - } + return CharacterExtracted(charBytes, widthCharBytes); } else { if (IsDBCSLeadByteNoExcept(leadByte)) { const unsigned char trailByte = cb.UCharAt(position + 1); @@ -949,7 +955,7 @@ Document::CharacterExtracted Document::CharacterAfter(Sci::Position position) co } } -Document::CharacterExtracted Document::CharacterBefore(Sci::Position position) const noexcept { +CharacterExtracted Document::CharacterBefore(Sci::Position position) const noexcept { if (position <= 0) { return CharacterExtracted(unicodeReplacementChar, 0); } @@ -972,13 +978,7 @@ Document::CharacterExtracted Document::CharacterBefore(Sci::Position position) c unsigned char charBytes[UTF8MaxBytes] = { 0, 0, 0, 0 }; for (Sci::Position b = 0; b<widthCharBytes; b++) charBytes[b] = cb.UCharAt(startUTF + b); - const int utf8status = UTF8Classify(charBytes, widthCharBytes); - if (utf8status & UTF8MaskInvalid) { - // Treat as invalid and use up just one byte - return CharacterExtracted(unicodeReplacementChar, 1); - } else { - return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth); - } + return CharacterExtracted(charBytes, widthCharBytes); } // Else invalid UTF-8 so return position of isolated trail byte } @@ -2037,7 +2037,7 @@ void Document::SetCaseFolder(std::unique_ptr<CaseFolder> pcf_) noexcept { pcf = std::move(pcf_); } -Document::CharacterExtracted Document::ExtractCharacter(Sci::Position position) const noexcept { +CharacterExtracted Document::ExtractCharacter(Sci::Position position) const noexcept { const unsigned char leadByte = cb.UCharAt(position); if (UTF8IsAscii(leadByte)) { // Common case: ASCII character @@ -2047,13 +2047,7 @@ Document::CharacterExtracted Document::ExtractCharacter(Sci::Position position) unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 }; for (int b=1; b<widthCharBytes; b++) charBytes[b] = cb.UCharAt(position + b); - const int utf8status = UTF8Classify(charBytes, widthCharBytes); - if (utf8status & UTF8MaskInvalid) { - // Treat as invalid and use up just one byte - return CharacterExtracted(unicodeReplacementChar, 1); - } else { - return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth); - } + return CharacterExtracted(charBytes, widthCharBytes); } namespace { @@ -3040,7 +3034,7 @@ public: } private: void ReadCharacter() noexcept { - const Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position); + const CharacterExtracted charExtracted = doc->ExtractCharacter(position); lenBytes = charExtracted.widthBytes; if (charExtracted.character == unicodeReplacementChar) { lenCharacters = 1; |