diff options
| author | Neil <nyamatongwe@gmail.com> | 2013-06-29 20:32:52 +1000 | 
|---|---|---|
| committer | Neil <nyamatongwe@gmail.com> | 2013-06-29 20:32:52 +1000 | 
| commit | 1a05a259558efd7dffb118ca9b12257a1346d2ea (patch) | |
| tree | ba1add8f0a55423febdb76ee76814d3c78d62fb3 | |
| parent | 557cf43ce45fff3d9af697bc19754428f1b5a6a6 (diff) | |
| download | scintilla-mirror-1a05a259558efd7dffb118ca9b12257a1346d2ea.tar.gz | |
Bug: [#1483]. Split GetRelativePosition into 2 calls one for moving between character
positions and the other for retrieving a character and width.
| -rw-r--r-- | doc/ScintillaDoc.html | 15 | ||||
| -rw-r--r-- | include/ILexer.h | 3 | ||||
| -rw-r--r-- | lexlib/LexAccessor.h | 21 | ||||
| -rw-r--r-- | lexlib/StyleContext.h | 25 | ||||
| -rw-r--r-- | src/Document.cxx | 54 | ||||
| -rw-r--r-- | src/Document.h | 3 | 
6 files changed, 65 insertions, 56 deletions
diff --git a/doc/ScintillaDoc.html b/doc/ScintillaDoc.html index abec92a5b..19829cbd8 100644 --- a/doc/ScintillaDoc.html +++ b/doc/ScintillaDoc.html @@ -82,7 +82,7 @@      <h1>Scintilla Documentation</h1> -    <p>Last edited 5/May/2013 NH</p> +    <p>Last edited 29/June/2013 NH</p>      <p>There is <a class="jump" href="Design.html">an overview of the internal design of      Scintilla</a>.<br /> @@ -1344,7 +1344,7 @@ struct Sci_TextToFind {      <p><b id="SCI_GETLINESELSTARTPOSITION">SCI_GETLINESELSTARTPOSITION(int line)</b><br />      <b id="SCI_GETLINESELENDPOSITION">SCI_GETLINESELENDPOSITION(int line)</b><br />      Retrieve the position of the start and end of the selection at the given line with -    INVALID_POSITION returned if no selection on this line.</p> +    <code>INVALID_POSITION</code> returned if no selection on this line.</p>      <p><b id="SCI_MOVECARETINSIDEVIEW">SCI_MOVECARETINSIDEVIEW</b><br />       If the caret is off the top or bottom of the view, it is moved to the nearest line that is @@ -6322,17 +6322,22 @@ exception options.</p>  <p>  To allow lexers to determine the end position of a line and thus more easily support Unicode line ends  <code>IDocument</code> is extended to <code>IDocumentWithLineEnd</code>.</p> -<p>The <code>GetRelativePosition</code> method allows navigating the document by whole characters and provides a standard +<p><code>GetRelativePosition</code> navigates the document by whole characters, +returning <code>INVALID_POSITION</code> for movement beyond the start and end of the document.</p> +<p><code>GetCharacterAndWidth</code> provides a standard  conversion from UTF-8 bytes to a UTF-32 character or from DBCS to a 16 bit value. -Invalid UTF-8 is reported as a character for each byte with values 0xDC80+byteValue, which are +Bytes in invalid UTF-8 are reported individually with values 0xDC80+byteValue, which are  not valid Unicode code points. +The <code>pWidth</code> argument can be NULL if the caller does not need to know the number of +bytes in the character.  </p>  <div class="highlighted">  <span class="S5">class</span><span class="S0"> </span>IDocumentWithLineEnd<span class="S0"> </span><span class="S10">:</span><span class="S0"> </span><span class="S5">public</span><span class="S0"> </span>IDocument<span class="S0"> </span><span class="S10">{</span><br />  <span class="S5">public</span><span class="S10">:</span><br />  <span class="S0">        </span><span class="S5">virtual</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span>SCI_METHOD<span class="S0"> </span>LineEnd<span class="S10">(</span><span class="S5">int</span><span class="S0"> </span>line<span class="S10">)</span><span class="S0"> </span><span class="S5">const</span><span class="S0"> </span><span class="S10">=</span><span class="S0"> </span><span class="S4">0</span><span class="S10">;</span><br /> -<span class="S0">        </span><span class="S5">virtual</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span>SCI_METHOD<span class="S0"> </span>GetRelativePosition<span class="S10">(</span><span class="S5">int</span><span class="S0"> </span>start<span class="S10">,</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span>characterOffset<span class="S10">,</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span><span class="S10">*</span>character<span class="S10">,</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span><span class="S10">*</span>width<span class="S10">)</span><span class="S0"> </span><span class="S5">const</span><span class="S0"> </span><span class="S10">=</span><span class="S0"> </span><span class="S4">0</span><span class="S10">;</span><br /> +<span class="S0">        </span><span class="S5">virtual</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span>SCI_METHOD<span class="S0"> </span>GetRelativePosition<span class="S10">(</span><span class="S5">int</span><span class="S0"> </span>positionStart<span class="S10">,</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span>characterOffset<span class="S10">)</span><span class="S0"> </span><span class="S5">const</span><span class="S0"> </span><span class="S10">=</span><span class="S0"> </span><span class="S4">0</span><span class="S10">;</span><br /> +<span class="S0">        </span><span class="S5">virtual</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span>SCI_METHOD<span class="S0"> </span>GetCharacterAndWidth<span class="S10">(</span><span class="S5">int</span><span class="S0"> </span>position<span class="S10">,</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span><span class="S10">*</span>pWidth<span class="S10">)</span><span class="S0"> </span><span class="S5">const</span><span class="S0"> </span><span class="S10">=</span><span class="S0"> </span><span class="S4">0</span><span class="S10">;</span><br />  <span class="S10">};</span><br />  </div> diff --git a/include/ILexer.h b/include/ILexer.h index 9f9225ef2..e93de819a 100644 --- a/include/ILexer.h +++ b/include/ILexer.h @@ -48,7 +48,8 @@ public:  class IDocumentWithLineEnd : public IDocument {  public:  	virtual int SCI_METHOD LineEnd(int line) const = 0; -	virtual int SCI_METHOD GetRelativePosition(int start, int characterOffset, int *character, int *width) const = 0; +	virtual int SCI_METHOD GetRelativePosition(int positionStart, int characterOffset) const = 0; +	virtual int SCI_METHOD GetCharacterAndWidth(int position, int *pWidth) const = 0;  };  enum { lvOriginal=0, lvSubStyles=1 }; diff --git a/lexlib/LexAccessor.h b/lexlib/LexAccessor.h index 92e719360..e29bbc923 100644 --- a/lexlib/LexAccessor.h +++ b/lexlib/LexAccessor.h @@ -79,6 +79,12 @@ public:  		}  		return buf[position - startPos];  	} +	IDocumentWithLineEnd *MultiByteAccess() const { +		if (documentVersion >= dvLineEnd) { +			return static_cast<IDocumentWithLineEnd *>(pAccess); +		} +		return 0; +	}  	/** Safe version of operator[], returning a defined value for invalid position. */  	char SafeGetCharAt(int position, char chDefault=' ') {  		if (position < startPos || position >= endPos) { @@ -126,21 +132,6 @@ public:  				return startNext - 1;  		}  	} -	int GetRelativePosition(int start, int characterOffset, int *character, int *width) { -		if (documentVersion >= dvLineEnd) { -			return (static_cast<IDocumentWithLineEnd *>(pAccess))->GetRelativePosition( -				start, characterOffset, character, width); -		} else { -			// Old version -> byte-oriented only -			// Handle doc range overflow -			int posNew = start + characterOffset; -			if ((posNew < 0) || (posNew > Length())) -				return -1; -			*character = SafeGetCharAt(posNew, 0); -			*width = 1; -			return start + characterOffset; -		} -	}  	int LevelAt(int line) const {  		return pAccess->GetLevel(line);  	} diff --git a/lexlib/StyleContext.h b/lexlib/StyleContext.h index 0b5dee379..fc6c60d2f 100644 --- a/lexlib/StyleContext.h +++ b/lexlib/StyleContext.h @@ -49,6 +49,7 @@ inline int BytesInUnicodeCodePoint(int codePoint) {  // syntactically significant. UTF-8 avoids this as all trail bytes are >= 0x80  class StyleContext {  	LexAccessor &styler; +	IDocumentWithLineEnd *multiByteAccess;  	unsigned int endPos;  	unsigned int lengthDocument; @@ -60,11 +61,11 @@ class StyleContext {  	StyleContext &operator=(const StyleContext &);  	void GetNextChar() { -		if (styler.Encoding() == enc8bit) { +		if (multiByteAccess) { +			chNext = multiByteAccess->GetCharacterAndWidth(currentPos+width, &widthNext); +		} else {  			chNext = static_cast<unsigned char>(styler.SafeGetCharAt(currentPos+width, 0));  			widthNext = 1; -		} else { -			styler.GetRelativePosition(currentPos+width, 0, &chNext, &widthNext);  		}  		// End of line determined from line end position, allowing CR, LF,   		// CRLF and Unicode line ends as set by document. @@ -91,6 +92,7 @@ public:  	StyleContext(unsigned int startPos, unsigned int length,                          int initStyle, LexAccessor &styler_, char chMask=31) :  		styler(styler_), +		multiByteAccess(0),  		endPos(startPos + length),  		posRelative(0),  		currentPosLastRelative(0x7FFFFFFF), @@ -105,6 +107,9 @@ public:  		width(0),  		chNext(0),  		widthNext(1) { +		if (styler.Encoding() != enc8bit) { +			multiByteAccess = styler.MultiByteAccess(); +		}  		styler.StartAt(startPos, chMask);  		styler.StartSegment(startPos);  		currentLine = styler.GetLine(startPos); @@ -182,13 +187,7 @@ public:  	int GetRelativeCharacter(int n) {  		if (n == 0)  			return ch; -		if (styler.Encoding() == enc8bit) { -			// fast version for single byte encodings -			return static_cast<unsigned char>(styler.SafeGetCharAt(currentPos + n, 0)); -		} else { -			int ch = 0; -			int width = 0; -			//styler.GetRelativePosition(currentPos, n, &ch, &width); +		if (multiByteAccess) {  			if ((currentPosLastRelative != currentPos) ||  				((n > 0) && ((offsetRelative < 0) || (n < offsetRelative))) ||  				((n < 0) && ((offsetRelative > 0) || (n > offsetRelative)))) { @@ -196,11 +195,15 @@ public:  				offsetRelative = 0;  			}  			int diffRelative = n - offsetRelative; -			int posNew = styler.GetRelativePosition(posRelative, diffRelative, &ch, &width); +			int posNew = multiByteAccess->GetRelativePosition(posRelative, diffRelative); +			int ch = multiByteAccess->GetCharacterAndWidth(posNew, 0);  			posRelative = posNew;  			currentPosLastRelative = currentPos;  			offsetRelative = n;  			return ch; +		} else { +			// fast version for single byte encodings +			return static_cast<unsigned char>(styler.SafeGetCharAt(currentPos + n, 0));  		}  	}  	bool Match(char ch0) const { diff --git a/src/Document.cxx b/src/Document.cxx index 472567068..a00fc9fc2 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -713,55 +713,63 @@ static inline int UnicodeFromBytes(const unsigned char *us) {  }  // Return -1  on out-of-bounds -int SCI_METHOD Document::GetRelativePosition(int start, int characterOffset, int *character, int *width) const { -	int pos = start; +int SCI_METHOD Document::GetRelativePosition(int positionStart, int characterOffset) const { +	int pos = positionStart;  	if (dbcsCodePage) {  		const int increment = (characterOffset > 0) ? 1 : -1;  		while (characterOffset != 0) {  			const int posNext = NextPosition(pos, increment);  			if (posNext == pos) -				return -1; +				return INVALID_POSITION;  			pos = posNext;  			characterOffset -= increment;  		} -		const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos)); +	} else { +		pos = positionStart + characterOffset; +		if ((pos < 0) || (pos > Length())) +			return INVALID_POSITION; +	} +	return pos; +} + +int SCI_METHOD Document::GetCharacterAndWidth(int position, int *pWidth) const { +	int character; +	int bytesInCharacter = 1; +	if (dbcsCodePage) { +		const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));  		if (SC_CP_UTF8 == dbcsCodePage) {  			if (UTF8IsAscii(leadByte)) {  				// Single byte character or invalid -				*character = leadByte; -				*width = 1; +				character =  leadByte;  			} else {  				const int widthCharBytes = UTF8BytesOfLead[leadByte];  				unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};  				for (int b=1; b<widthCharBytes; b++) -					charBytes[b] = static_cast<unsigned char>(cb.CharAt(pos+b)); +					charBytes[b] = static_cast<unsigned char>(cb.CharAt(position+b));  				int utf8status = UTF8Classify(charBytes, widthCharBytes);  				if (utf8status & UTF8MaskInvalid) { -					// Report as singleton surrogate values which are invalid in Unicode -					*character = 0xDC80 + leadByte; -					*width = 1; +					// Report as singleton surrogate values which are invalid Unicode +					character =  0xDC80 + leadByte;  				} else { -					*character = UnicodeFromBytes(charBytes); -					*width = utf8status & UTF8MaskWidth; +					bytesInCharacter = utf8status & UTF8MaskWidth; +					character = UnicodeFromBytes(charBytes);  				}  			} -		} else if (dbcsCodePage) { +		} else {  			if (IsDBCSLeadByte(leadByte)) { -				*character = (leadByte << 8) | static_cast<unsigned char>(cb.CharAt(pos+1)); -				*width = 2; +				bytesInCharacter = 2; +				character = (leadByte << 8) | static_cast<unsigned char>(cb.CharAt(position+1));  			} else { -				*character = leadByte; -				*width = 1; +				character = leadByte;  			}  		}  	} else { -		pos = start + characterOffset; -		if ((pos < 0) || (pos > Length())) -			return -1; -		*character = cb.CharAt(pos); -		*width = 1; +		character = cb.CharAt(position);  	} -	return pos; +	if (pWidth) { +		*pWidth = bytesInCharacter; +	} +	return character;  }  int SCI_METHOD Document::CodePage() const { diff --git a/src/Document.h b/src/Document.h index 8eb8db74a..5c7e8f8a0 100644 --- a/src/Document.h +++ b/src/Document.h @@ -279,7 +279,8 @@ public:  	int MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd=true);  	int NextPosition(int pos, int moveDir) const;  	bool NextCharacter(int &pos, int moveDir) const;	// Returns true if pos changed -	int SCI_METHOD GetRelativePosition(int start, int characterOffset, int *character, int *width) const; +	int SCI_METHOD GetRelativePosition(int positionStart, int characterOffset) const; +	int SCI_METHOD GetCharacterAndWidth(int position, int *pWidth) const;  	int SCI_METHOD CodePage() const;  	bool SCI_METHOD IsDBCSLeadByte(char ch) const;  	int SafeSegment(const char *text, int length, int lengthSegment) const;  | 
