From 1a05a259558efd7dffb118ca9b12257a1346d2ea Mon Sep 17 00:00:00 2001
From: Neil <nyamatongwe@gmail.com>
Date: Sat, 29 Jun 2013 20:32:52 +1000
Subject: Bug: [#1483]. Split GetRelativePosition into 2 calls one for moving
 between character positions and the other for retrieving a character and
 width.

---
 doc/ScintillaDoc.html | 15 +++++++++-----
 include/ILexer.h      |  3 ++-
 lexlib/LexAccessor.h  | 21 ++++++--------------
 lexlib/StyleContext.h | 25 +++++++++++++-----------
 src/Document.cxx      | 54 +++++++++++++++++++++++++++++----------------------
 src/Document.h        |  3 ++-
 6 files changed, 65 insertions(+), 56 deletions(-)
diff --git a/doc/ScintillaDoc.html b/doc/ScintillaDoc.html
index abec92a5b..19829cbd8 100644
--- a/doc/ScintillaDoc.html
+++ b/doc/ScintillaDoc.html
@@ -82,7 +82,7 @@
 
     <h1>Scintilla Documentation</h1>
 
-    <p>Last edited 5/May/2013 NH</p>
+    <p>Last edited 29/June/2013 NH</p>
 
     <p>There is <a class="jump" href="Design.html">an overview of the internal design of
     Scintilla</a>.<br />
@@ -1344,7 +1344,7 @@ struct Sci_TextToFind {
     <p><b id="SCI_GETLINESELSTARTPOSITION">SCI_GETLINESELSTARTPOSITION(int line)</b><br />
     <b id="SCI_GETLINESELENDPOSITION">SCI_GETLINESELENDPOSITION(int line)</b><br />
     Retrieve the position of the start and end of the selection at the given line with
-    INVALID_POSITION returned if no selection on this line.</p>
+    <code>INVALID_POSITION</code> returned if no selection on this line.</p>
 
     <p><b id="SCI_MOVECARETINSIDEVIEW">SCI_MOVECARETINSIDEVIEW</b><br />
      If the caret is off the top or bottom of the view, it is moved to the nearest line that is
@@ -6322,17 +6322,22 @@ exception options.</p>
 <p>
 To allow lexers to determine the end position of a line and thus more easily support Unicode line ends
 <code>IDocument</code> is extended to <code>IDocumentWithLineEnd</code>.</p>
-<p>The <code>GetRelativePosition</code> method allows navigating the document by whole characters and provides a standard
+<p><code>GetRelativePosition</code> navigates the document by whole characters,
+returning <code>INVALID_POSITION</code> for movement beyond the start and end of the document.</p>
+<p><code>GetCharacterAndWidth</code> provides a standard
 conversion from UTF-8 bytes to a UTF-32 character or from DBCS to a 16 bit value.
-Invalid UTF-8 is reported as a character for each byte with values 0xDC80+byteValue, which are
+Bytes in invalid UTF-8 are reported individually with values 0xDC80+byteValue, which are
 not valid Unicode code points.
+The <code>pWidth</code> argument can be NULL if the caller does not need to know the number of
+bytes in the character.
 </p>
 
 <div class="highlighted">
 <span class="S5">class</span><span class="S0"> </span>IDocumentWithLineEnd<span class="S0"> </span><span class="S10">:</span><span class="S0"> </span><span class="S5">public</span><span class="S0"> </span>IDocument<span class="S0"> </span><span class="S10">{</span><br />
 <span class="S5">public</span><span class="S10">:</span><br />
 <span class="S0">&nbsp; &nbsp; &nbsp; &nbsp; </span><span class="S5">virtual</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span>SCI_METHOD<span class="S0"> </span>LineEnd<span class="S10">(</span><span class="S5">int</span><span class="S0"> </span>line<span class="S10">)</span><span class="S0"> </span><span class="S5">const</span><span class="S0"> </span><span class="S10">=</span><span class="S0"> </span><span class="S4">0</span><span class="S10">;</span><br />
-<span class="S0">&nbsp; &nbsp; &nbsp; &nbsp; </span><span class="S5">virtual</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span>SCI_METHOD<span class="S0"> </span>GetRelativePosition<span class="S10">(</span><span class="S5">int</span><span class="S0"> </span>start<span class="S10">,</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span>characterOffset<span class="S10">,</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span><span class="S10">*</span>character<span class="S10">,</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span><span class="S10">*</span>width<span class="S10">)</span><span class="S0"> </span><span class="S5">const</span><span class="S0"> </span><span class="S10">=</span><span class="S0"> </span><span class="S4">0</span><span class="S10">;</span><br />
+<span class="S0">&nbsp; &nbsp; &nbsp; &nbsp; </span><span class="S5">virtual</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span>SCI_METHOD<span class="S0"> </span>GetRelativePosition<span class="S10">(</span><span class="S5">int</span><span class="S0"> </span>positionStart<span class="S10">,</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span>characterOffset<span class="S10">)</span><span class="S0"> </span><span class="S5">const</span><span class="S0"> </span><span class="S10">=</span><span class="S0"> </span><span class="S4">0</span><span class="S10">;</span><br />
+<span class="S0">&nbsp; &nbsp; &nbsp; &nbsp; </span><span class="S5">virtual</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span>SCI_METHOD<span class="S0"> </span>GetCharacterAndWidth<span class="S10">(</span><span class="S5">int</span><span class="S0"> </span>position<span class="S10">,</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span><span class="S10">*</span>pWidth<span class="S10">)</span><span class="S0"> </span><span class="S5">const</span><span class="S0"> </span><span class="S10">=</span><span class="S0"> </span><span class="S4">0</span><span class="S10">;</span><br />
 <span class="S10">};</span><br />
 </div>
 
diff --git a/include/ILexer.h b/include/ILexer.h
index 9f9225ef2..e93de819a 100644
--- a/include/ILexer.h
+++ b/include/ILexer.h
@@ -48,7 +48,8 @@ public:
 class IDocumentWithLineEnd : public IDocument {
 public:
 	virtual int SCI_METHOD LineEnd(int line) const = 0;
-	virtual int SCI_METHOD GetRelativePosition(int start, int characterOffset, int *character, int *width) const = 0;
+	virtual int SCI_METHOD GetRelativePosition(int positionStart, int characterOffset) const = 0;
+	virtual int SCI_METHOD GetCharacterAndWidth(int position, int *pWidth) const = 0;
 };
 
 enum { lvOriginal=0, lvSubStyles=1 };
diff --git a/lexlib/LexAccessor.h b/lexlib/LexAccessor.h
index 92e719360..e29bbc923 100644
--- a/lexlib/LexAccessor.h
+++ b/lexlib/LexAccessor.h
@@ -79,6 +79,12 @@ public:
 		}
 		return buf[position - startPos];
 	}
+	IDocumentWithLineEnd *MultiByteAccess() const {
+		if (documentVersion >= dvLineEnd) {
+			return static_cast<IDocumentWithLineEnd *>(pAccess);
+		}
+		return 0;
+	}
 	/** Safe version of operator[], returning a defined value for invalid position. */
 	char SafeGetCharAt(int position, char chDefault=' ') {
 		if (position < startPos || position >= endPos) {
@@ -126,21 +132,6 @@ public:
 				return startNext - 1;
 		}
 	}
-	int GetRelativePosition(int start, int characterOffset, int *character, int *width) {
-		if (documentVersion >= dvLineEnd) {
-			return (static_cast<IDocumentWithLineEnd *>(pAccess))->GetRelativePosition(
-				start, characterOffset, character, width);
-		} else {
-			// Old version -> byte-oriented only
-			// Handle doc range overflow
-			int posNew = start + characterOffset;
-			if ((posNew < 0) || (posNew > Length()))
-				return -1;
-			*character = SafeGetCharAt(posNew, 0);
-			*width = 1;
-			return start + characterOffset;
-		}
-	}
 	int LevelAt(int line) const {
 		return pAccess->GetLevel(line);
 	}
diff --git a/lexlib/StyleContext.h b/lexlib/StyleContext.h
index 0b5dee379..fc6c60d2f 100644
--- a/lexlib/StyleContext.h
+++ b/lexlib/StyleContext.h
@@ -49,6 +49,7 @@ inline int BytesInUnicodeCodePoint(int codePoint) {
 // syntactically significant. UTF-8 avoids this as all trail bytes are >= 0x80
 class StyleContext {
 	LexAccessor &styler;
+	IDocumentWithLineEnd *multiByteAccess;
 	unsigned int endPos;
 	unsigned int lengthDocument;
 	
@@ -60,11 +61,11 @@ class StyleContext {
 	StyleContext &operator=(const StyleContext &);
 
 	void GetNextChar() {
-		if (styler.Encoding() == enc8bit) {
+		if (multiByteAccess) {
+			chNext = multiByteAccess->GetCharacterAndWidth(currentPos+width, &widthNext);
+		} else {
 			chNext = static_cast<unsigned char>(styler.SafeGetCharAt(currentPos+width, 0));
 			widthNext = 1;
-		} else {
-			styler.GetRelativePosition(currentPos+width, 0, &chNext, &widthNext);
 		}
 		// End of line determined from line end position, allowing CR, LF, 
 		// CRLF and Unicode line ends as set by document.
@@ -91,6 +92,7 @@ public:
 	StyleContext(unsigned int startPos, unsigned int length,
                         int initStyle, LexAccessor &styler_, char chMask=31) :
 		styler(styler_),
+		multiByteAccess(0),
 		endPos(startPos + length),
 		posRelative(0),
 		currentPosLastRelative(0x7FFFFFFF),
@@ -105,6 +107,9 @@ public:
 		width(0),
 		chNext(0),
 		widthNext(1) {
+		if (styler.Encoding() != enc8bit) {
+			multiByteAccess = styler.MultiByteAccess();
+		}
 		styler.StartAt(startPos, chMask);
 		styler.StartSegment(startPos);
 		currentLine = styler.GetLine(startPos);
@@ -182,13 +187,7 @@ public:
 	int GetRelativeCharacter(int n) {
 		if (n == 0)
 			return ch;
-		if (styler.Encoding() == enc8bit) {
-			// fast version for single byte encodings
-			return static_cast<unsigned char>(styler.SafeGetCharAt(currentPos + n, 0));
-		} else {
-			int ch = 0;
-			int width = 0;
-			//styler.GetRelativePosition(currentPos, n, &ch, &width);
+		if (multiByteAccess) {
 			if ((currentPosLastRelative != currentPos) ||
 				((n > 0) && ((offsetRelative < 0) || (n < offsetRelative))) ||
 				((n < 0) && ((offsetRelative > 0) || (n > offsetRelative)))) {
@@ -196,11 +195,15 @@ public:
 				offsetRelative = 0;
 			}
 			int diffRelative = n - offsetRelative;
-			int posNew = styler.GetRelativePosition(posRelative, diffRelative, &ch, &width);
+			int posNew = multiByteAccess->GetRelativePosition(posRelative, diffRelative);
+			int ch = multiByteAccess->GetCharacterAndWidth(posNew, 0);
 			posRelative = posNew;
 			currentPosLastRelative = currentPos;
 			offsetRelative = n;
 			return ch;
+		} else {
+			// fast version for single byte encodings
+			return static_cast<unsigned char>(styler.SafeGetCharAt(currentPos + n, 0));
 		}
 	}
 	bool Match(char ch0) const {
diff --git a/src/Document.cxx b/src/Document.cxx
index 472567068..a00fc9fc2 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -713,55 +713,63 @@ static inline int UnicodeFromBytes(const unsigned char *us) {
 }
 
 // Return -1  on out-of-bounds
-int SCI_METHOD Document::GetRelativePosition(int start, int characterOffset, int *character, int *width) const {
-	int pos = start;
+int SCI_METHOD Document::GetRelativePosition(int positionStart, int characterOffset) const {
+	int pos = positionStart;
 	if (dbcsCodePage) {
 		const int increment = (characterOffset > 0) ? 1 : -1;
 		while (characterOffset != 0) {
 			const int posNext = NextPosition(pos, increment);
 			if (posNext == pos)
-				return -1;
+				return INVALID_POSITION;
 			pos = posNext;
 			characterOffset -= increment;
 		}
-		const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
+	} else {
+		pos = positionStart + characterOffset;
+		if ((pos < 0) || (pos > Length()))
+			return INVALID_POSITION;
+	}
+	return pos;
+}
+
+int SCI_METHOD Document::GetCharacterAndWidth(int position, int *pWidth) const {
+	int character;
+	int bytesInCharacter = 1;
+	if (dbcsCodePage) {
+		const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
 		if (SC_CP_UTF8 == dbcsCodePage) {
 			if (UTF8IsAscii(leadByte)) {
 				// Single byte character or invalid
-				*character = leadByte;
-				*width = 1;
+				character =  leadByte;
 			} else {
 				const int widthCharBytes = UTF8BytesOfLead[leadByte];
 				unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
 				for (int b=1; b<widthCharBytes; b++)
-					charBytes[b] = static_cast<unsigned char>(cb.CharAt(pos+b));
+					charBytes[b] = static_cast<unsigned char>(cb.CharAt(position+b));
 				int utf8status = UTF8Classify(charBytes, widthCharBytes);
 				if (utf8status & UTF8MaskInvalid) {
-					// Report as singleton surrogate values which are invalid in Unicode
-					*character = 0xDC80 + leadByte;
-					*width = 1;
+					// Report as singleton surrogate values which are invalid Unicode
+					character =  0xDC80 + leadByte;
 				} else {
-					*character = UnicodeFromBytes(charBytes);
-					*width = utf8status & UTF8MaskWidth;
+					bytesInCharacter = utf8status & UTF8MaskWidth;
+					character = UnicodeFromBytes(charBytes);
 				}
 			}
-		} else if (dbcsCodePage) {
+		} else {
 			if (IsDBCSLeadByte(leadByte)) {
-				*character = (leadByte << 8) | static_cast<unsigned char>(cb.CharAt(pos+1));
-				*width = 2;
+				bytesInCharacter = 2;
+				character = (leadByte << 8) | static_cast<unsigned char>(cb.CharAt(position+1));
 			} else {
-				*character = leadByte;
-				*width = 1;
+				character = leadByte;
 			}
 		}
 	} else {
-		pos = start + characterOffset;
-		if ((pos < 0) || (pos > Length()))
-			return -1;
-		*character = cb.CharAt(pos);
-		*width = 1;
+		character = cb.CharAt(position);
 	}
-	return pos;
+	if (pWidth) {
+		*pWidth = bytesInCharacter;
+	}
+	return character;
 }
 
 int SCI_METHOD Document::CodePage() const {
diff --git a/src/Document.h b/src/Document.h
index 8eb8db74a..5c7e8f8a0 100644
--- a/src/Document.h
+++ b/src/Document.h
@@ -279,7 +279,8 @@ public:
 	int MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd=true);
 	int NextPosition(int pos, int moveDir) const;
 	bool NextCharacter(int &pos, int moveDir) const;	// Returns true if pos changed
-	int SCI_METHOD GetRelativePosition(int start, int characterOffset, int *character, int *width) const;
+	int SCI_METHOD GetRelativePosition(int positionStart, int characterOffset) const;
+	int SCI_METHOD GetCharacterAndWidth(int position, int *pWidth) const;
 	int SCI_METHOD CodePage() const;
 	bool SCI_METHOD IsDBCSLeadByte(char ch) const;
 	int SafeSegment(const char *text, int length, int lengthSegment) const;
-- 
cgit v1.2.3