diff options
| -rw-r--r-- | src/Document.cxx | 70 | ||||
| -rw-r--r-- | src/Document.h | 1 | 
2 files changed, 71 insertions, 0 deletions
| diff --git a/src/Document.cxx b/src/Document.cxx index 07031462b..f9a1fede8 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -505,6 +505,76 @@ int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) {  	return pos;  } +// NextPosition moves between valid positions - it can not handle a position in the middle of a  +// multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar. +int Document::NextPosition(int pos, int moveDir) { +	// If out of range, just return minimum/maximum value. +	int increment = (moveDir > 0) ? 1 : -1; +	if (pos + increment <= 0) +		return 0; +	if (pos + increment >= Length()) +		return Length(); + +	// PLATFORM_ASSERT(pos > 0 && pos < Length()); +	if (moveDir > 0) { +		if (IsCrLf(pos)) +			return pos + 2; +	} else { +		if ((pos >= 2) && IsCrLf(pos-2)) +			return pos - 2; +	} + +	// Not between CR and LF + +	if (dbcsCodePage) { +		if (SC_CP_UTF8 == dbcsCodePage) { +			pos += increment; +			unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos)); +			int startUTF = pos; +			int endUTF = pos; +			if (IsTrailByte(ch) && InGoodUTF8(pos, startUTF, endUTF)) { +				// ch is a trail byte within a UTF-8 character +				if (moveDir > 0) +					pos = endUTF; +				else +					pos = startUTF; +			} +		} else { +			if (moveDir > 0) { +				int mbsize = IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1; +				pos += mbsize; +				if (pos > Length()) +					pos = Length(); +			} else { +				// Anchor DBCS calculations at start of line because start of line can +				// not be a DBCS trail byte. +				int posStartLine = LineStart(LineFromPosition(pos)); +				// See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx +				// http://msdn.microsoft.com/en-us/library/cc194790.aspx +				if ((pos - 1) <= posStartLine) { +					return posStartLine; +				} else if (IsDBCSLeadByte(cb.CharAt(pos - 1))) { +					// Must actually be trail byte +					return pos - 2; +				} else { +					// Otherwise, step back until a non-lead-byte is found. +					int posTemp = pos - 1; +					while (posStartLine <= --posTemp && IsDBCSLeadByte(cb.CharAt(posTemp))) +						; +					// Now posTemp+1 must point to the beginning of a character, +					// so figure out whether we went back an even or an odd +					// number of bytes and go back 1 or 2 bytes, respectively. +					return (pos - 1 - ((pos - posTemp) & 1));				 +				} +			} +		} +	} else { +		pos += increment; +	} + +	return pos; +} +  int SCI_METHOD Document::CodePage() const {  	return dbcsCodePage;  } diff --git a/src/Document.h b/src/Document.h index ffadbade3..d87840872 100644 --- a/src/Document.h +++ b/src/Document.h @@ -230,6 +230,7 @@ public:  	int LenChar(int pos);  	bool InGoodUTF8(int pos, int &start, int &end);  	int MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd=true); +	int NextPosition(int pos, int moveDir);  	int SCI_METHOD CodePage() const;  	bool SCI_METHOD IsDBCSLeadByte(char ch) const; | 
