diff options
| author | Zufu Liu <unknown> | 2021-07-03 13:21:38 +1000 | 
|---|---|---|
| committer | Zufu Liu <unknown> | 2021-07-03 13:21:38 +1000 | 
| commit | a2d23bd463e65f532301b682b64cd02b8a57716b (patch) | |
| tree | 1d8129cc58281e1134ebf3884273f458bfeebef4 /src | |
| parent | 9cda372c64c8920d2e910825161a8ed882b417b3 (diff) | |
| download | scintilla-mirror-a2d23bd463e65f532301b682b64cd02b8a57716b.tar.gz | |
Feature [feature-requests:#1408] Simplify code, remove IsDBCSTrailByteInvalid.
Drop temporary test for IsDBCSTrailByteNoExcept.
Diffstat (limited to 'src')
| -rw-r--r-- | src/Document.cxx | 76 | ||||
| -rw-r--r-- | src/Document.h | 1 | 
2 files changed, 13 insertions, 64 deletions
diff --git a/src/Document.cxx b/src/Document.cxx index 739b6266a..0ec598bc2 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -985,27 +985,22 @@ Sci::Position Document::GetRelativePositionUTF16(Sci::Position positionStart, Sc  }  int SCI_METHOD Document::GetCharacterAndWidth(Sci_Position position, Sci_Position *pWidth) const { -	int character;  	int bytesInCharacter = 1;  	const unsigned char leadByte = cb.UCharAt(position); -	if (dbcsCodePage) { +	int character = leadByte; +	if (dbcsCodePage && !UTF8IsAscii(leadByte)) {  		if (CpUtf8 == dbcsCodePage) { -			if (UTF8IsAscii(leadByte)) { -				// Single byte character or invalid -				character =  leadByte; +			const int widthCharBytes = UTF8BytesOfLead[leadByte]; +			unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; +			for (int b=1; b<widthCharBytes; b++) +				charBytes[b] = cb.UCharAt(position+b); +			const int utf8status = UTF8Classify(charBytes, widthCharBytes); +			if (utf8status & UTF8MaskInvalid) { +				// Report as singleton surrogate values which are invalid Unicode +				character =  0xDC80 + leadByte;  			} else { -				const int widthCharBytes = UTF8BytesOfLead[leadByte]; -				unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; -				for (int b=1; b<widthCharBytes; b++) -					charBytes[b] = cb.UCharAt(position+b); -				const int utf8status = UTF8Classify(charBytes, widthCharBytes); -				if (utf8status & UTF8MaskInvalid) { -					// Report as singleton surrogate values which are invalid Unicode -					character =  0xDC80 + leadByte; -				} else { -					bytesInCharacter = utf8status & UTF8MaskWidth; -					character = UnicodeFromUTF8(charBytes); -				} +				bytesInCharacter = utf8status & UTF8MaskWidth; +				character = UnicodeFromUTF8(charBytes);  			}  		} else {  			if (IsDBCSLeadByteNoExcept(leadByte)) { @@ -1013,15 +1008,9 @@ int SCI_METHOD Document::GetCharacterAndWidth(Sci_Position position, Sci_Positio  				if (IsDBCSTrailByteNoExcept(trailByte)) {  					bytesInCharacter = 2;  					character = (leadByte << 8) | trailByte; -				} else { -					character = leadByte;  				} -			} else { -				character = leadByte;  			}  		} -	} else { -		character = leadByte;  	}  	if (pWidth) {  		*pWidth = bytesInCharacter; @@ -1135,51 +1124,12 @@ bool Document::IsDBCSLeadByteInvalid(char ch) const noexcept {  	return false;  } -bool Document::IsDBCSTrailByteInvalid(char ch) const noexcept { -	const unsigned char trail = ch; -	switch (dbcsCodePage) { -	case 932: -		// Shift_jis -		return -			(trail <= 0x3F) || -			(trail == 0x7F) || -			(trail >= 0xFD); -	case 936: -		// GBK -		return -			(trail <= 0x3F) || -			(trail == 0x7F) || -			(trail == 0xFF); -	case 949: -		// Korean Wansung KS C-5601-1987 -		return -			(trail <= 0x40) || -			((trail >= 0x5B) && (trail <= 0x60)) || -			((trail >= 0x7B) && (trail <= 0x80)) || -			(trail == 0xFF); -	case 950: -		// Big5 -		return -			(trail <= 0x3F) || -			((trail >= 0x7F) && (trail <= 0xA0)) || -			(trail == 0xFF); -	case 1361: -		// Korean Johab KS C-5601-1992 -		return -			(trail <= 0x30) || -			(trail == 0x7F) || -			(trail == 0x80) || -			(trail == 0xFF); -	} -	return false; -} -  int Document::DBCSDrawBytes(std::string_view text) const noexcept {  	if (text.length() <= 1) {  		return static_cast<int>(text.length());  	}  	if (IsDBCSLeadByteNoExcept(text[0])) { -		return IsDBCSTrailByteInvalid(text[1]) ? 1 : 2; +		return IsDBCSTrailByteNoExcept(text[1]) ? 2 : 1;  	} else {  		return 1;  	} diff --git a/src/Document.h b/src/Document.h index 88392c1a2..c40ce2a44 100644 --- a/src/Document.h +++ b/src/Document.h @@ -331,7 +331,6 @@ public:  	bool IsDBCSLeadByteNoExcept(char ch) const noexcept;  	bool IsDBCSTrailByteNoExcept(char ch) const noexcept;  	bool IsDBCSLeadByteInvalid(char ch) const noexcept; -	bool IsDBCSTrailByteInvalid(char ch) const noexcept;  	int DBCSDrawBytes(std::string_view text) const noexcept;  	int SafeSegment(const char *text, int length, int lengthSegment) const noexcept;  	EncodingFamily CodePageFamily() const noexcept;  | 
