diff options
| -rw-r--r-- | src/Document.cxx | 13 | ||||
| -rw-r--r-- | src/Document.h | 2 | ||||
| -rw-r--r-- | test/unit/testDocument.cxx | 67 | 
3 files changed, 70 insertions, 12 deletions
| diff --git a/src/Document.cxx b/src/Document.cxx index e5022ad64..6dc14238f 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -674,7 +674,7 @@ int Document::LenChar(Sci::Position pos) const noexcept {  			return utf8status & UTF8MaskWidth;  		}  	} else { -		if (IsDBCSLeadByteNoExcept(leadByte) && ((pos + 1) < LengthNoExcept())) { +		if (IsDBCSLeadByteNoExcept(leadByte) && IsDBCSTrailByteNoExcept(cb.CharAt(pos + 1))) {  			return 2;  		} else {  			return 1; @@ -709,7 +709,7 @@ bool Document::InGoodUTF8(Sci::Position pos, Sci::Position &start, Sci::Position  	}  } -// Normalise a position so that it is not halfway through a two byte character. +// Normalise a position so that it is not part way through a multi-byte character.  // This can occur in two situations -  // When lines are terminated with \r\n pairs which should be treated as one character.  // When displaying DBCS text such as Japanese. @@ -760,7 +760,7 @@ Sci::Position Document::MovePositionOutsideChar(Sci::Position pos, Sci::Position  			// Check from known start of character.  			while (posCheck < pos) { -				const int mbsize = IsDBCSLeadByteNoExcept(cb.CharAt(posCheck)) ? 2 : 1; +				const int mbsize = IsDBCSDualByteAt(posCheck) ? 2 : 1;  				if (posCheck + mbsize == pos) {  					return pos;  				} else if (posCheck + mbsize > pos) { @@ -825,7 +825,7 @@ Sci::Position Document::NextPosition(Sci::Position pos, int moveDir) const noexc  			}  		} else {  			if (moveDir > 0) { -				const int mbsize = IsDBCSLeadByteNoExcept(cb.CharAt(pos)) ? 2 : 1; +				const int mbsize = IsDBCSDualByteAt(pos) ? 2 : 1;  				pos += mbsize;  				if (pos > cb.Length())  					pos = cb.Length(); @@ -1098,6 +1098,11 @@ int Document::DBCSDrawBytes(std::string_view text) const noexcept {  	}  } +bool Document::IsDBCSDualByteAt(Sci::Position pos) const noexcept { +	return IsDBCSLeadByteNoExcept(cb.CharAt(pos)) +		&& IsDBCSTrailByteNoExcept(cb.CharAt(pos + 1)); +} +  static constexpr bool IsSpaceOrTab(int ch) noexcept {  	return ch == ' ' || ch == '\t';  } diff --git a/src/Document.h b/src/Document.h index c40ce2a44..fe27f4936 100644 --- a/src/Document.h +++ b/src/Document.h @@ -330,8 +330,8 @@ public:  	bool SCI_METHOD IsDBCSLeadByte(char ch) const override;  	bool IsDBCSLeadByteNoExcept(char ch) const noexcept;  	bool IsDBCSTrailByteNoExcept(char ch) const noexcept; -	bool IsDBCSLeadByteInvalid(char ch) const noexcept;  	int DBCSDrawBytes(std::string_view text) const noexcept; +	bool IsDBCSDualByteAt(Sci::Position pos) const noexcept;  	int SafeSegment(const char *text, int length, int lengthSegment) const noexcept;  	EncodingFamily CodePageFamily() const noexcept; diff --git a/test/unit/testDocument.cxx b/test/unit/testDocument.cxx index e07f99997..c91868165 100644 --- a/test/unit/testDocument.cxx +++ b/test/unit/testDocument.cxx @@ -316,25 +316,78 @@ TEST_CASE("Document") {  		// Can not test case mapping of double byte text as folder available here does not implement this   	} -	SECTION("GetCharacterAndWidth") { +	SECTION("GetCharacterAndWidth DBCS") {  		Document doc(DocumentOption::Default);  		doc.SetDBCSCodePage(932);  		REQUIRE(doc.CodePage() == 932); -		const Sci::Position length = doc.InsertString(0, "\x84\xff=", 3); -		REQUIRE(3 == length); -		REQUIRE(3 == doc.Length()); +		const Sci::Position length = doc.InsertString(0, "H\x84\xff\x84H", 5); +		// This text is invalid in code page 932. +		// A reasonable interpretation is as 4 items: 2 characters and 2 character fragments +		// The last item is a 2-byte CYRILLIC CAPITAL LETTER ZE character  +		// H [84] [FF] ZE +		REQUIRE(5 == length); +		REQUIRE(5 == doc.Length());  		Sci::Position width = 0; +		// test GetCharacterAndWidth()  		int ch = doc.GetCharacterAndWidth(0, &width);  		REQUIRE(width == 1); +		REQUIRE(ch == 'H'); +		ch = doc.GetCharacterAndWidth(1, &width); +		REQUIRE(width == 1);  		REQUIRE(ch == 0x84);  		width = 0; -		ch = doc.GetCharacterAndWidth(1, &width); +		ch = doc.GetCharacterAndWidth(2, &width);  		REQUIRE(width == 1);  		REQUIRE(ch == 0xff);  		width = 0; -		ch = doc.GetCharacterAndWidth(2, &width); +		ch = doc.GetCharacterAndWidth(3, &width); +		REQUIRE(width == 2); +		REQUIRE(ch == 0x8448); +		// test LenChar() +		width = doc.LenChar(0); +		REQUIRE(width == 1); +		width = doc.LenChar(1); +		REQUIRE(width == 1); +		width = doc.LenChar(2);  		REQUIRE(width == 1); -		REQUIRE(ch == '='); +		width = doc.LenChar(3); +		REQUIRE(width == 2); +		// test MovePositionOutsideChar() +		Sci::Position pos = doc.MovePositionOutsideChar(1, 1); +		REQUIRE(pos == 1); +		pos = doc.MovePositionOutsideChar(2, 1); +		REQUIRE(pos == 2); +		pos = doc.MovePositionOutsideChar(3, 1); +		REQUIRE(pos == 3); +		pos = doc.MovePositionOutsideChar(4, 1); +		REQUIRE(pos == 5); +		pos = doc.MovePositionOutsideChar(1, -1); +		REQUIRE(pos == 1); +		pos = doc.MovePositionOutsideChar(2, -1); +		REQUIRE(pos == 2); +		pos = doc.MovePositionOutsideChar(3, -1); +		REQUIRE(pos == 3); +		pos = doc.MovePositionOutsideChar(4, -1); +		REQUIRE(pos == 3); +		// test NextPosition() +		pos = doc.NextPosition(0, 1); +		REQUIRE(pos == 1); +		pos = doc.NextPosition(1, 1); +		REQUIRE(pos == 2); +		pos = doc.NextPosition(2, 1); +		REQUIRE(pos == 3); +		pos = doc.NextPosition(3, 1); +		REQUIRE(pos == 5); +		pos = doc.NextPosition(1, -1); +		REQUIRE(pos == 0); +		// The next two tests are commented out because the implementation of NextPosition +		// cannot yet handle character fragments correctly when moving backwards. +		//pos = doc.NextPosition(2, -1); +		//REQUIRE(pos == 1); +		//pos = doc.NextPosition(3, -1); +		//REQUIRE(pos == 2); +		pos = doc.NextPosition(5, -1); +		REQUIRE(pos == 3);  	}  } | 
