diff options
Diffstat (limited to 'src/Document.cxx')
| -rw-r--r-- | src/Document.cxx | 44 | 
1 files changed, 19 insertions, 25 deletions
diff --git a/src/Document.cxx b/src/Document.cxx index 10c8e9ce5..87cace721 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -125,6 +125,18 @@ size_t ActionDuration::ActionsInAllowedTime(double secondsAllowed) const noexcep  	return std::lround(secondsAllowed / Duration());  } +CharacterExtracted::CharacterExtracted(const unsigned char *charBytes, size_t widthCharBytes) noexcept { +	const int utf8status = UTF8Classify(charBytes, widthCharBytes); +	if (utf8status & UTF8MaskInvalid) { +		// Treat as invalid and use up just one byte +		character = unicodeReplacementChar; +		widthBytes = 1; +	} else { +		character = UnicodeFromUTF8(charBytes); +		widthBytes = utf8status & UTF8MaskWidth; +	} +} +  Document::Document(DocumentOption options) :  	cb(!FlagSet(options, DocumentOption::StylesNone), FlagSet(options, DocumentOption::TextLarge)),  	durationStyleOneByte(0.000001, 0.0000001, 0.00001) { @@ -917,7 +929,7 @@ bool Document::NextCharacter(Sci::Position &pos, int moveDir) const noexcept {  	}  } -Document::CharacterExtracted Document::CharacterAfter(Sci::Position position) const noexcept { +CharacterExtracted Document::CharacterAfter(Sci::Position position) const noexcept {  	if (position >= LengthNoExcept()) {  		return CharacterExtracted(unicodeReplacementChar, 0);  	} @@ -931,13 +943,7 @@ Document::CharacterExtracted Document::CharacterAfter(Sci::Position position) co  		unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };  		for (int b = 1; b<widthCharBytes; b++)  			charBytes[b] = cb.UCharAt(position + b); -		const int utf8status = UTF8Classify(charBytes, widthCharBytes); -		if (utf8status & UTF8MaskInvalid) { -			// Treat as invalid and use up just one byte -			return CharacterExtracted(unicodeReplacementChar, 1); -		} else { -			return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth); -		} +		return CharacterExtracted(charBytes, widthCharBytes);  	} else {  		if (IsDBCSLeadByteNoExcept(leadByte)) {  			const unsigned char trailByte = cb.UCharAt(position + 1); @@ -949,7 +955,7 @@ Document::CharacterExtracted Document::CharacterAfter(Sci::Position position) co  	}  } -Document::CharacterExtracted Document::CharacterBefore(Sci::Position position) const noexcept { +CharacterExtracted Document::CharacterBefore(Sci::Position position) const noexcept {  	if (position <= 0) {  		return CharacterExtracted(unicodeReplacementChar, 0);  	} @@ -972,13 +978,7 @@ Document::CharacterExtracted Document::CharacterBefore(Sci::Position position) c  				unsigned char charBytes[UTF8MaxBytes] = { 0, 0, 0, 0 };  				for (Sci::Position b = 0; b<widthCharBytes; b++)  					charBytes[b] = cb.UCharAt(startUTF + b); -				const int utf8status = UTF8Classify(charBytes, widthCharBytes); -				if (utf8status & UTF8MaskInvalid) { -					// Treat as invalid and use up just one byte -					return CharacterExtracted(unicodeReplacementChar, 1); -				} else { -					return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth); -				} +				return CharacterExtracted(charBytes, widthCharBytes);  			}  			// Else invalid UTF-8 so return position of isolated trail byte  		} @@ -2037,7 +2037,7 @@ void Document::SetCaseFolder(std::unique_ptr<CaseFolder> pcf_) noexcept {  	pcf = std::move(pcf_);  } -Document::CharacterExtracted Document::ExtractCharacter(Sci::Position position) const noexcept { +CharacterExtracted Document::ExtractCharacter(Sci::Position position) const noexcept {  	const unsigned char leadByte = cb.UCharAt(position);  	if (UTF8IsAscii(leadByte)) {  		// Common case: ASCII character @@ -2047,13 +2047,7 @@ Document::CharacterExtracted Document::ExtractCharacter(Sci::Position position)  	unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };  	for (int b=1; b<widthCharBytes; b++)  		charBytes[b] = cb.UCharAt(position + b); -	const int utf8status = UTF8Classify(charBytes, widthCharBytes); -	if (utf8status & UTF8MaskInvalid) { -		// Treat as invalid and use up just one byte -		return CharacterExtracted(unicodeReplacementChar, 1); -	} else { -		return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth); -	} +	return CharacterExtracted(charBytes, widthCharBytes);  }  namespace { @@ -3040,7 +3034,7 @@ public:  	}  private:  	void ReadCharacter() noexcept { -		const Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position); +		const CharacterExtracted charExtracted = doc->ExtractCharacter(position);  		lenBytes = charExtracted.widthBytes;  		if (charExtracted.character == unicodeReplacementChar) {  			lenCharacters = 1;  | 
