aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/Document.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'src/Document.cxx')
-rw-r--r--src/Document.cxx44
1 files changed, 19 insertions, 25 deletions
diff --git a/src/Document.cxx b/src/Document.cxx
index 10c8e9ce5..87cace721 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -125,6 +125,18 @@ size_t ActionDuration::ActionsInAllowedTime(double secondsAllowed) const noexcep
return std::lround(secondsAllowed / Duration());
}
+CharacterExtracted::CharacterExtracted(const unsigned char *charBytes, size_t widthCharBytes) noexcept {
+ const int utf8status = UTF8Classify(charBytes, widthCharBytes);
+ if (utf8status & UTF8MaskInvalid) {
+ // Treat as invalid and use up just one byte
+ character = unicodeReplacementChar;
+ widthBytes = 1;
+ } else {
+ character = UnicodeFromUTF8(charBytes);
+ widthBytes = utf8status & UTF8MaskWidth;
+ }
+}
+
Document::Document(DocumentOption options) :
cb(!FlagSet(options, DocumentOption::StylesNone), FlagSet(options, DocumentOption::TextLarge)),
durationStyleOneByte(0.000001, 0.0000001, 0.00001) {
@@ -917,7 +929,7 @@ bool Document::NextCharacter(Sci::Position &pos, int moveDir) const noexcept {
}
}
-Document::CharacterExtracted Document::CharacterAfter(Sci::Position position) const noexcept {
+CharacterExtracted Document::CharacterAfter(Sci::Position position) const noexcept {
if (position >= LengthNoExcept()) {
return CharacterExtracted(unicodeReplacementChar, 0);
}
@@ -931,13 +943,7 @@ Document::CharacterExtracted Document::CharacterAfter(Sci::Position position) co
unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
for (int b = 1; b<widthCharBytes; b++)
charBytes[b] = cb.UCharAt(position + b);
- const int utf8status = UTF8Classify(charBytes, widthCharBytes);
- if (utf8status & UTF8MaskInvalid) {
- // Treat as invalid and use up just one byte
- return CharacterExtracted(unicodeReplacementChar, 1);
- } else {
- return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
- }
+ return CharacterExtracted(charBytes, widthCharBytes);
} else {
if (IsDBCSLeadByteNoExcept(leadByte)) {
const unsigned char trailByte = cb.UCharAt(position + 1);
@@ -949,7 +955,7 @@ Document::CharacterExtracted Document::CharacterAfter(Sci::Position position) co
}
}
-Document::CharacterExtracted Document::CharacterBefore(Sci::Position position) const noexcept {
+CharacterExtracted Document::CharacterBefore(Sci::Position position) const noexcept {
if (position <= 0) {
return CharacterExtracted(unicodeReplacementChar, 0);
}
@@ -972,13 +978,7 @@ Document::CharacterExtracted Document::CharacterBefore(Sci::Position position) c
unsigned char charBytes[UTF8MaxBytes] = { 0, 0, 0, 0 };
for (Sci::Position b = 0; b<widthCharBytes; b++)
charBytes[b] = cb.UCharAt(startUTF + b);
- const int utf8status = UTF8Classify(charBytes, widthCharBytes);
- if (utf8status & UTF8MaskInvalid) {
- // Treat as invalid and use up just one byte
- return CharacterExtracted(unicodeReplacementChar, 1);
- } else {
- return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
- }
+ return CharacterExtracted(charBytes, widthCharBytes);
}
// Else invalid UTF-8 so return position of isolated trail byte
}
@@ -2037,7 +2037,7 @@ void Document::SetCaseFolder(std::unique_ptr<CaseFolder> pcf_) noexcept {
pcf = std::move(pcf_);
}
-Document::CharacterExtracted Document::ExtractCharacter(Sci::Position position) const noexcept {
+CharacterExtracted Document::ExtractCharacter(Sci::Position position) const noexcept {
const unsigned char leadByte = cb.UCharAt(position);
if (UTF8IsAscii(leadByte)) {
// Common case: ASCII character
@@ -2047,13 +2047,7 @@ Document::CharacterExtracted Document::ExtractCharacter(Sci::Position position)
unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
for (int b=1; b<widthCharBytes; b++)
charBytes[b] = cb.UCharAt(position + b);
- const int utf8status = UTF8Classify(charBytes, widthCharBytes);
- if (utf8status & UTF8MaskInvalid) {
- // Treat as invalid and use up just one byte
- return CharacterExtracted(unicodeReplacementChar, 1);
- } else {
- return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
- }
+ return CharacterExtracted(charBytes, widthCharBytes);
}
namespace {
@@ -3040,7 +3034,7 @@ public:
}
private:
void ReadCharacter() noexcept {
- const Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
+ const CharacterExtracted charExtracted = doc->ExtractCharacter(position);
lenBytes = charExtracted.widthBytes;
if (charExtracted.character == unicodeReplacementChar) {
lenCharacters = 1;