diff options
author | Neil <nyamatongwe@gmail.com> | 2013-06-29 20:32:52 +1000 |
---|---|---|
committer | Neil <nyamatongwe@gmail.com> | 2013-06-29 20:32:52 +1000 |
commit | d9f1a1b042f9458f2d4a905f20b9547c4ac2be70 (patch) | |
tree | 4db1263c22fcca231b754d8de86f1f5bb484fb87 | |
parent | 05cdbbd2ec9de91787c9306798669532c8779248 (diff) | |
download | scintilla-mirror-d9f1a1b042f9458f2d4a905f20b9547c4ac2be70.tar.gz |
Bug: [#1483]. Split GetRelativePosition into 2 calls one for moving between character
positions and the other for retrieving a character and width.
-rw-r--r-- | doc/ScintillaDoc.html | 15 | ||||
-rw-r--r-- | include/ILexer.h | 3 | ||||
-rw-r--r-- | lexlib/LexAccessor.h | 21 | ||||
-rw-r--r-- | lexlib/StyleContext.h | 25 | ||||
-rw-r--r-- | src/Document.cxx | 54 | ||||
-rw-r--r-- | src/Document.h | 3 |
6 files changed, 65 insertions, 56 deletions
diff --git a/doc/ScintillaDoc.html b/doc/ScintillaDoc.html index abec92a5b..19829cbd8 100644 --- a/doc/ScintillaDoc.html +++ b/doc/ScintillaDoc.html @@ -82,7 +82,7 @@ <h1>Scintilla Documentation</h1> - <p>Last edited 5/May/2013 NH</p> + <p>Last edited 29/June/2013 NH</p> <p>There is <a class="jump" href="Design.html">an overview of the internal design of Scintilla</a>.<br /> @@ -1344,7 +1344,7 @@ struct Sci_TextToFind { <p><b id="SCI_GETLINESELSTARTPOSITION">SCI_GETLINESELSTARTPOSITION(int line)</b><br /> <b id="SCI_GETLINESELENDPOSITION">SCI_GETLINESELENDPOSITION(int line)</b><br /> Retrieve the position of the start and end of the selection at the given line with - INVALID_POSITION returned if no selection on this line.</p> + <code>INVALID_POSITION</code> returned if no selection on this line.</p> <p><b id="SCI_MOVECARETINSIDEVIEW">SCI_MOVECARETINSIDEVIEW</b><br /> If the caret is off the top or bottom of the view, it is moved to the nearest line that is @@ -6322,17 +6322,22 @@ exception options.</p> <p> To allow lexers to determine the end position of a line and thus more easily support Unicode line ends <code>IDocument</code> is extended to <code>IDocumentWithLineEnd</code>.</p> -<p>The <code>GetRelativePosition</code> method allows navigating the document by whole characters and provides a standard +<p><code>GetRelativePosition</code> navigates the document by whole characters, +returning <code>INVALID_POSITION</code> for movement beyond the start and end of the document.</p> +<p><code>GetCharacterAndWidth</code> provides a standard conversion from UTF-8 bytes to a UTF-32 character or from DBCS to a 16 bit value. -Invalid UTF-8 is reported as a character for each byte with values 0xDC80+byteValue, which are +Bytes in invalid UTF-8 are reported individually with values 0xDC80+byteValue, which are not valid Unicode code points. +The <code>pWidth</code> argument can be NULL if the caller does not need to know the number of +bytes in the character. </p> <div class="highlighted"> <span class="S5">class</span><span class="S0"> </span>IDocumentWithLineEnd<span class="S0"> </span><span class="S10">:</span><span class="S0"> </span><span class="S5">public</span><span class="S0"> </span>IDocument<span class="S0"> </span><span class="S10">{</span><br /> <span class="S5">public</span><span class="S10">:</span><br /> <span class="S0"> </span><span class="S5">virtual</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span>SCI_METHOD<span class="S0"> </span>LineEnd<span class="S10">(</span><span class="S5">int</span><span class="S0"> </span>line<span class="S10">)</span><span class="S0"> </span><span class="S5">const</span><span class="S0"> </span><span class="S10">=</span><span class="S0"> </span><span class="S4">0</span><span class="S10">;</span><br /> -<span class="S0"> </span><span class="S5">virtual</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span>SCI_METHOD<span class="S0"> </span>GetRelativePosition<span class="S10">(</span><span class="S5">int</span><span class="S0"> </span>start<span class="S10">,</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span>characterOffset<span class="S10">,</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span><span class="S10">*</span>character<span class="S10">,</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span><span class="S10">*</span>width<span class="S10">)</span><span class="S0"> </span><span class="S5">const</span><span class="S0"> </span><span class="S10">=</span><span class="S0"> </span><span class="S4">0</span><span class="S10">;</span><br /> +<span class="S0"> </span><span class="S5">virtual</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span>SCI_METHOD<span class="S0"> </span>GetRelativePosition<span class="S10">(</span><span class="S5">int</span><span class="S0"> </span>positionStart<span class="S10">,</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span>characterOffset<span class="S10">)</span><span class="S0"> </span><span class="S5">const</span><span class="S0"> </span><span class="S10">=</span><span class="S0"> </span><span class="S4">0</span><span class="S10">;</span><br /> +<span class="S0"> </span><span class="S5">virtual</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span>SCI_METHOD<span class="S0"> </span>GetCharacterAndWidth<span class="S10">(</span><span class="S5">int</span><span class="S0"> </span>position<span class="S10">,</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span><span class="S10">*</span>pWidth<span class="S10">)</span><span class="S0"> </span><span class="S5">const</span><span class="S0"> </span><span class="S10">=</span><span class="S0"> </span><span class="S4">0</span><span class="S10">;</span><br /> <span class="S10">};</span><br /> </div> diff --git a/include/ILexer.h b/include/ILexer.h index 9f9225ef2..e93de819a 100644 --- a/include/ILexer.h +++ b/include/ILexer.h @@ -48,7 +48,8 @@ public: class IDocumentWithLineEnd : public IDocument { public: virtual int SCI_METHOD LineEnd(int line) const = 0; - virtual int SCI_METHOD GetRelativePosition(int start, int characterOffset, int *character, int *width) const = 0; + virtual int SCI_METHOD GetRelativePosition(int positionStart, int characterOffset) const = 0; + virtual int SCI_METHOD GetCharacterAndWidth(int position, int *pWidth) const = 0; }; enum { lvOriginal=0, lvSubStyles=1 }; diff --git a/lexlib/LexAccessor.h b/lexlib/LexAccessor.h index 92e719360..e29bbc923 100644 --- a/lexlib/LexAccessor.h +++ b/lexlib/LexAccessor.h @@ -79,6 +79,12 @@ public: } return buf[position - startPos]; } + IDocumentWithLineEnd *MultiByteAccess() const { + if (documentVersion >= dvLineEnd) { + return static_cast<IDocumentWithLineEnd *>(pAccess); + } + return 0; + } /** Safe version of operator[], returning a defined value for invalid position. */ char SafeGetCharAt(int position, char chDefault=' ') { if (position < startPos || position >= endPos) { @@ -126,21 +132,6 @@ public: return startNext - 1; } } - int GetRelativePosition(int start, int characterOffset, int *character, int *width) { - if (documentVersion >= dvLineEnd) { - return (static_cast<IDocumentWithLineEnd *>(pAccess))->GetRelativePosition( - start, characterOffset, character, width); - } else { - // Old version -> byte-oriented only - // Handle doc range overflow - int posNew = start + characterOffset; - if ((posNew < 0) || (posNew > Length())) - return -1; - *character = SafeGetCharAt(posNew, 0); - *width = 1; - return start + characterOffset; - } - } int LevelAt(int line) const { return pAccess->GetLevel(line); } diff --git a/lexlib/StyleContext.h b/lexlib/StyleContext.h index 0b5dee379..fc6c60d2f 100644 --- a/lexlib/StyleContext.h +++ b/lexlib/StyleContext.h @@ -49,6 +49,7 @@ inline int BytesInUnicodeCodePoint(int codePoint) { // syntactically significant. UTF-8 avoids this as all trail bytes are >= 0x80 class StyleContext { LexAccessor &styler; + IDocumentWithLineEnd *multiByteAccess; unsigned int endPos; unsigned int lengthDocument; @@ -60,11 +61,11 @@ class StyleContext { StyleContext &operator=(const StyleContext &); void GetNextChar() { - if (styler.Encoding() == enc8bit) { + if (multiByteAccess) { + chNext = multiByteAccess->GetCharacterAndWidth(currentPos+width, &widthNext); + } else { chNext = static_cast<unsigned char>(styler.SafeGetCharAt(currentPos+width, 0)); widthNext = 1; - } else { - styler.GetRelativePosition(currentPos+width, 0, &chNext, &widthNext); } // End of line determined from line end position, allowing CR, LF, // CRLF and Unicode line ends as set by document. @@ -91,6 +92,7 @@ public: StyleContext(unsigned int startPos, unsigned int length, int initStyle, LexAccessor &styler_, char chMask=31) : styler(styler_), + multiByteAccess(0), endPos(startPos + length), posRelative(0), currentPosLastRelative(0x7FFFFFFF), @@ -105,6 +107,9 @@ public: width(0), chNext(0), widthNext(1) { + if (styler.Encoding() != enc8bit) { + multiByteAccess = styler.MultiByteAccess(); + } styler.StartAt(startPos, chMask); styler.StartSegment(startPos); currentLine = styler.GetLine(startPos); @@ -182,13 +187,7 @@ public: int GetRelativeCharacter(int n) { if (n == 0) return ch; - if (styler.Encoding() == enc8bit) { - // fast version for single byte encodings - return static_cast<unsigned char>(styler.SafeGetCharAt(currentPos + n, 0)); - } else { - int ch = 0; - int width = 0; - //styler.GetRelativePosition(currentPos, n, &ch, &width); + if (multiByteAccess) { if ((currentPosLastRelative != currentPos) || ((n > 0) && ((offsetRelative < 0) || (n < offsetRelative))) || ((n < 0) && ((offsetRelative > 0) || (n > offsetRelative)))) { @@ -196,11 +195,15 @@ public: offsetRelative = 0; } int diffRelative = n - offsetRelative; - int posNew = styler.GetRelativePosition(posRelative, diffRelative, &ch, &width); + int posNew = multiByteAccess->GetRelativePosition(posRelative, diffRelative); + int ch = multiByteAccess->GetCharacterAndWidth(posNew, 0); posRelative = posNew; currentPosLastRelative = currentPos; offsetRelative = n; return ch; + } else { + // fast version for single byte encodings + return static_cast<unsigned char>(styler.SafeGetCharAt(currentPos + n, 0)); } } bool Match(char ch0) const { diff --git a/src/Document.cxx b/src/Document.cxx index 472567068..a00fc9fc2 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -713,55 +713,63 @@ static inline int UnicodeFromBytes(const unsigned char *us) { } // Return -1 on out-of-bounds -int SCI_METHOD Document::GetRelativePosition(int start, int characterOffset, int *character, int *width) const { - int pos = start; +int SCI_METHOD Document::GetRelativePosition(int positionStart, int characterOffset) const { + int pos = positionStart; if (dbcsCodePage) { const int increment = (characterOffset > 0) ? 1 : -1; while (characterOffset != 0) { const int posNext = NextPosition(pos, increment); if (posNext == pos) - return -1; + return INVALID_POSITION; pos = posNext; characterOffset -= increment; } - const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos)); + } else { + pos = positionStart + characterOffset; + if ((pos < 0) || (pos > Length())) + return INVALID_POSITION; + } + return pos; +} + +int SCI_METHOD Document::GetCharacterAndWidth(int position, int *pWidth) const { + int character; + int bytesInCharacter = 1; + if (dbcsCodePage) { + const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position)); if (SC_CP_UTF8 == dbcsCodePage) { if (UTF8IsAscii(leadByte)) { // Single byte character or invalid - *character = leadByte; - *width = 1; + character = leadByte; } else { const int widthCharBytes = UTF8BytesOfLead[leadByte]; unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; for (int b=1; b<widthCharBytes; b++) - charBytes[b] = static_cast<unsigned char>(cb.CharAt(pos+b)); + charBytes[b] = static_cast<unsigned char>(cb.CharAt(position+b)); int utf8status = UTF8Classify(charBytes, widthCharBytes); if (utf8status & UTF8MaskInvalid) { - // Report as singleton surrogate values which are invalid in Unicode - *character = 0xDC80 + leadByte; - *width = 1; + // Report as singleton surrogate values which are invalid Unicode + character = 0xDC80 + leadByte; } else { - *character = UnicodeFromBytes(charBytes); - *width = utf8status & UTF8MaskWidth; + bytesInCharacter = utf8status & UTF8MaskWidth; + character = UnicodeFromBytes(charBytes); } } - } else if (dbcsCodePage) { + } else { if (IsDBCSLeadByte(leadByte)) { - *character = (leadByte << 8) | static_cast<unsigned char>(cb.CharAt(pos+1)); - *width = 2; + bytesInCharacter = 2; + character = (leadByte << 8) | static_cast<unsigned char>(cb.CharAt(position+1)); } else { - *character = leadByte; - *width = 1; + character = leadByte; } } } else { - pos = start + characterOffset; - if ((pos < 0) || (pos > Length())) - return -1; - *character = cb.CharAt(pos); - *width = 1; + character = cb.CharAt(position); } - return pos; + if (pWidth) { + *pWidth = bytesInCharacter; + } + return character; } int SCI_METHOD Document::CodePage() const { diff --git a/src/Document.h b/src/Document.h index 8eb8db74a..5c7e8f8a0 100644 --- a/src/Document.h +++ b/src/Document.h @@ -279,7 +279,8 @@ public: int MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd=true); int NextPosition(int pos, int moveDir) const; bool NextCharacter(int &pos, int moveDir) const; // Returns true if pos changed - int SCI_METHOD GetRelativePosition(int start, int characterOffset, int *character, int *width) const; + int SCI_METHOD GetRelativePosition(int positionStart, int characterOffset) const; + int SCI_METHOD GetCharacterAndWidth(int position, int *pWidth) const; int SCI_METHOD CodePage() const; bool SCI_METHOD IsDBCSLeadByte(char ch) const; int SafeSegment(const char *text, int length, int lengthSegment) const; |