diff options
Diffstat (limited to 'src/Document.cxx')
| -rw-r--r-- | src/Document.cxx | 119 | 
1 files changed, 79 insertions, 40 deletions
| diff --git a/src/Document.cxx b/src/Document.cxx index 7a30d7fd1..650c0ced2 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -202,18 +202,23 @@ bool Document::IsCrLf(int pos) {  bool Document::IsDBCS(int pos) {  #if PLAT_WIN  	if (dbcsCodePage) { -		// Anchor DBCS calculations at start of line because start of line can -		// not be a DBCS trail byte. -		int startLine = pos; -		while (startLine > 0 && cb.CharAt(startLine) != '\r' && cb.CharAt(startLine) != '\n') -			startLine--; -		while (startLine <= pos) { -			if (IsDBCSLeadByteEx(dbcsCodePage, cb.CharAt(startLine))) { +		if (SC_CP_UTF8 == dbcsCodePage) { +			unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos)); +			return ch >= 0x80; +		} else { +			// Anchor DBCS calculations at start of line because start of line can +			// not be a DBCS trail byte. +			int startLine = pos; +			while (startLine > 0 && cb.CharAt(startLine) != '\r' && cb.CharAt(startLine) != '\n') +				startLine--; +			while (startLine <= pos) { +				if (IsDBCSLeadByteEx(dbcsCodePage, cb.CharAt(startLine))) { +					startLine++; +					if (startLine >= pos) +						return true; +				}  				startLine++; -				if (startLine >= pos) -					return true;  			} -			startLine++;  		}  	}  	return false; @@ -222,6 +227,28 @@ bool Document::IsDBCS(int pos) {  #endif  } +int Document::LenChar(int pos) { +	if (IsCrLf(pos)) { +		return 2; +	} else if (SC_CP_UTF8 == dbcsCodePage) { +		unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos)); +		if (ch < 0x80) +			return 1; +		int len = 2; +		if (ch >= (0x80+0x40+0x20)) +			len = 3; +		int lengthDoc = Length(); +		if ((pos + len) > lengthDoc) +			return lengthDoc-pos; +		else  +			return len; +	} else if (IsDBCS(pos)) { +		return 2; +	} else { +		return 1; +	} +} +  // Normalise a position so that it is not halfway through a two byte character.  // This can occur in two situations -  // When lines are terminated with \r\n pairs which should be treated as one character. @@ -253,29 +280,41 @@ int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) {  #if PLAT_WIN  	if (dbcsCodePage) { -		// Anchor DBCS calculations at start of line because start of line can -		// not be a DBCS trail byte. -		int startLine = pos; -		while (startLine > 0 && cb.CharAt(startLine) != '\r' && cb.CharAt(startLine) != '\n') -			startLine--; -		bool atLeadByte = false; -		while (startLine < pos) { -			if (atLeadByte) -				atLeadByte = false; -			else if (IsDBCSLeadByteEx(dbcsCodePage, cb.CharAt(startLine))) -				atLeadByte = true; -			else -				atLeadByte = false; -			startLine++; -			//Platform::DebugPrintf("DBCS %s\n", atlead ? "D" : "-"); -		} +		if (SC_CP_UTF8 == dbcsCodePage) { +			unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos)); +			while ((pos > 0) && (pos < Length()) && (ch >= 0x80) && (ch < (0x80 + 0x40))) { +				// ch is a trail byte +				if (moveDir > 0) +					pos++; +				else  +					pos--; +				ch = static_cast<unsigned char>(cb.CharAt(pos)); +			} +		} else { +			// Anchor DBCS calculations at start of line because start of line can +			// not be a DBCS trail byte. +			int startLine = pos; +			while (startLine > 0 && cb.CharAt(startLine) != '\r' && cb.CharAt(startLine) != '\n') +				startLine--; +			bool atLeadByte = false; +			while (startLine < pos) { +				if (atLeadByte) +					atLeadByte = false; +				else if (IsDBCSLeadByteEx(dbcsCodePage, cb.CharAt(startLine))) +					atLeadByte = true; +				else +					atLeadByte = false; +				startLine++; +				//Platform::DebugPrintf("DBCS %s\n", atlead ? "D" : "-"); +			} -		if (atLeadByte) { -			// Position is between a lead byte and a trail byte -			if (moveDir > 0) -				return pos + 1; -			else -				return pos - 1; +			if (atLeadByte) { +				// Position is between a lead byte and a trail byte +				if (moveDir > 0) +					return pos + 1; +				else +					return pos - 1; +			}  		}  	}  #endif @@ -440,13 +479,7 @@ void Document::ChangeChar(int pos, char ch) {  }  void Document::DelChar(int pos) { -	if (IsCrLf(pos)) { -		DeleteChars(pos, 2); -	} else if (IsDBCS(pos)) { -		DeleteChars(pos, 2); -	} else if (pos < Length()) { -		DeleteChars(pos, 1); -	} +	DeleteChars(pos, LenChar(pos));  }  int Document::DelCharBack(int pos) { @@ -455,6 +488,10 @@ int Document::DelCharBack(int pos) {  	} else if (IsCrLf(pos - 2)) {  		DeleteChars(pos - 2, 2);  		return pos - 2; +	} else if (SC_CP_UTF8 == dbcsCodePage) { +		int startChar = MovePositionOutsideChar(pos-1, -1, false); +		DeleteChars(startChar, pos - startChar); +		return startChar;  	} else if (IsDBCS(pos - 1)) {  		DeleteChars(pos - 2, 2);  		return pos - 2; @@ -529,6 +566,8 @@ void Document::ConvertLineEnds(int eolModeSet) {  }  bool Document::IsWordChar(unsigned char ch) { +	if ((SC_CP_UTF8 == dbcsCodePage) && (ch >0x80)) +		return true;  	return wordchars[ch];  } @@ -653,7 +692,7 @@ void Document::ChangeCase(Range r, bool makeUpperCase) {  	for (int pos=r.start; pos<r.end; pos++) {  		char ch = CharAt(pos);  		if (dbcsCodePage && IsDBCS(pos)) { -			pos++; +			pos += LenChar(pos);  		} else {  			if (makeUpperCase) {  				if (islower(ch)) { | 
