diff options
Diffstat (limited to 'src/Document.cxx')
| -rw-r--r-- | src/Document.cxx | 65 | 
1 files changed, 65 insertions, 0 deletions
diff --git a/src/Document.cxx b/src/Document.cxx index 8523a00fa..472567068 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -699,6 +699,71 @@ bool Document::NextCharacter(int &pos, int moveDir) const {  	}  } +static inline int UnicodeFromBytes(const unsigned char *us) { +	if (us[0] < 0xC2) { +		return us[0]; +	} else if (us[0] < 0xE0) { +		return ((us[0] & 0x1F) << 6) + (us[1] & 0x3F); +	} else if (us[0] < 0xF0) { +		return ((us[0] & 0xF) << 12) + ((us[1] & 0x3F) << 6) + (us[2] & 0x3F); +	} else if (us[0] < 0xF5) { +		return ((us[0] & 0x7) << 18) + ((us[1] & 0x3F) << 12) + ((us[2] & 0x3F) << 6) + (us[3] & 0x3F); +	} +	return us[0]; +} + +// Return -1  on out-of-bounds +int SCI_METHOD Document::GetRelativePosition(int start, int characterOffset, int *character, int *width) const { +	int pos = start; +	if (dbcsCodePage) { +		const int increment = (characterOffset > 0) ? 1 : -1; +		while (characterOffset != 0) { +			const int posNext = NextPosition(pos, increment); +			if (posNext == pos) +				return -1; +			pos = posNext; +			characterOffset -= increment; +		} +		const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos)); +		if (SC_CP_UTF8 == dbcsCodePage) { +			if (UTF8IsAscii(leadByte)) { +				// Single byte character or invalid +				*character = leadByte; +				*width = 1; +			} else { +				const int widthCharBytes = UTF8BytesOfLead[leadByte]; +				unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; +				for (int b=1; b<widthCharBytes; b++) +					charBytes[b] = static_cast<unsigned char>(cb.CharAt(pos+b)); +				int utf8status = UTF8Classify(charBytes, widthCharBytes); +				if (utf8status & UTF8MaskInvalid) { +					// Report as singleton surrogate values which are invalid in Unicode +					*character = 0xDC80 + leadByte; +					*width = 1; +				} else { +					*character = UnicodeFromBytes(charBytes); +					*width = utf8status & UTF8MaskWidth; +				} +			} +		} else if (dbcsCodePage) { +			if (IsDBCSLeadByte(leadByte)) { +				*character = (leadByte << 8) | static_cast<unsigned char>(cb.CharAt(pos+1)); +				*width = 2; +			} else { +				*character = leadByte; +				*width = 1; +			} +		} +	} else { +		pos = start + characterOffset; +		if ((pos < 0) || (pos > Length())) +			return -1; +		*character = cb.CharAt(pos); +		*width = 1; +	} +	return pos; +} +  int SCI_METHOD Document::CodePage() const {  	return dbcsCodePage;  }  | 
