aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--doc/ScintillaDoc.html15
-rw-r--r--include/ILexer.h3
-rw-r--r--lexlib/LexAccessor.h21
-rw-r--r--lexlib/StyleContext.h25
-rw-r--r--src/Document.cxx54
-rw-r--r--src/Document.h3
6 files changed, 65 insertions, 56 deletions
diff --git a/doc/ScintillaDoc.html b/doc/ScintillaDoc.html
index abec92a5b..19829cbd8 100644
--- a/doc/ScintillaDoc.html
+++ b/doc/ScintillaDoc.html
@@ -82,7 +82,7 @@
<h1>Scintilla Documentation</h1>
- <p>Last edited 5/May/2013 NH</p>
+ <p>Last edited 29/June/2013 NH</p>
<p>There is <a class="jump" href="Design.html">an overview of the internal design of
Scintilla</a>.<br />
@@ -1344,7 +1344,7 @@ struct Sci_TextToFind {
<p><b id="SCI_GETLINESELSTARTPOSITION">SCI_GETLINESELSTARTPOSITION(int line)</b><br />
<b id="SCI_GETLINESELENDPOSITION">SCI_GETLINESELENDPOSITION(int line)</b><br />
Retrieve the position of the start and end of the selection at the given line with
- INVALID_POSITION returned if no selection on this line.</p>
+ <code>INVALID_POSITION</code> returned if no selection on this line.</p>
<p><b id="SCI_MOVECARETINSIDEVIEW">SCI_MOVECARETINSIDEVIEW</b><br />
If the caret is off the top or bottom of the view, it is moved to the nearest line that is
@@ -6322,17 +6322,22 @@ exception options.</p>
<p>
To allow lexers to determine the end position of a line and thus more easily support Unicode line ends
<code>IDocument</code> is extended to <code>IDocumentWithLineEnd</code>.</p>
-<p>The <code>GetRelativePosition</code> method allows navigating the document by whole characters and provides a standard
+<p><code>GetRelativePosition</code> navigates the document by whole characters,
+returning <code>INVALID_POSITION</code> for movement beyond the start and end of the document.</p>
+<p><code>GetCharacterAndWidth</code> provides a standard
conversion from UTF-8 bytes to a UTF-32 character or from DBCS to a 16 bit value.
-Invalid UTF-8 is reported as a character for each byte with values 0xDC80+byteValue, which are
+Bytes in invalid UTF-8 are reported individually with values 0xDC80+byteValue, which are
not valid Unicode code points.
+The <code>pWidth</code> argument can be NULL if the caller does not need to know the number of
+bytes in the character.
</p>
<div class="highlighted">
<span class="S5">class</span><span class="S0"> </span>IDocumentWithLineEnd<span class="S0"> </span><span class="S10">:</span><span class="S0"> </span><span class="S5">public</span><span class="S0"> </span>IDocument<span class="S0"> </span><span class="S10">{</span><br />
<span class="S5">public</span><span class="S10">:</span><br />
<span class="S0">&nbsp; &nbsp; &nbsp; &nbsp; </span><span class="S5">virtual</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span>SCI_METHOD<span class="S0"> </span>LineEnd<span class="S10">(</span><span class="S5">int</span><span class="S0"> </span>line<span class="S10">)</span><span class="S0"> </span><span class="S5">const</span><span class="S0"> </span><span class="S10">=</span><span class="S0"> </span><span class="S4">0</span><span class="S10">;</span><br />
-<span class="S0">&nbsp; &nbsp; &nbsp; &nbsp; </span><span class="S5">virtual</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span>SCI_METHOD<span class="S0"> </span>GetRelativePosition<span class="S10">(</span><span class="S5">int</span><span class="S0"> </span>start<span class="S10">,</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span>characterOffset<span class="S10">,</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span><span class="S10">*</span>character<span class="S10">,</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span><span class="S10">*</span>width<span class="S10">)</span><span class="S0"> </span><span class="S5">const</span><span class="S0"> </span><span class="S10">=</span><span class="S0"> </span><span class="S4">0</span><span class="S10">;</span><br />
+<span class="S0">&nbsp; &nbsp; &nbsp; &nbsp; </span><span class="S5">virtual</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span>SCI_METHOD<span class="S0"> </span>GetRelativePosition<span class="S10">(</span><span class="S5">int</span><span class="S0"> </span>positionStart<span class="S10">,</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span>characterOffset<span class="S10">)</span><span class="S0"> </span><span class="S5">const</span><span class="S0"> </span><span class="S10">=</span><span class="S0"> </span><span class="S4">0</span><span class="S10">;</span><br />
+<span class="S0">&nbsp; &nbsp; &nbsp; &nbsp; </span><span class="S5">virtual</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span>SCI_METHOD<span class="S0"> </span>GetCharacterAndWidth<span class="S10">(</span><span class="S5">int</span><span class="S0"> </span>position<span class="S10">,</span><span class="S0"> </span><span class="S5">int</span><span class="S0"> </span><span class="S10">*</span>pWidth<span class="S10">)</span><span class="S0"> </span><span class="S5">const</span><span class="S0"> </span><span class="S10">=</span><span class="S0"> </span><span class="S4">0</span><span class="S10">;</span><br />
<span class="S10">};</span><br />
</div>
diff --git a/include/ILexer.h b/include/ILexer.h
index 9f9225ef2..e93de819a 100644
--- a/include/ILexer.h
+++ b/include/ILexer.h
@@ -48,7 +48,8 @@ public:
class IDocumentWithLineEnd : public IDocument {
public:
virtual int SCI_METHOD LineEnd(int line) const = 0;
- virtual int SCI_METHOD GetRelativePosition(int start, int characterOffset, int *character, int *width) const = 0;
+ virtual int SCI_METHOD GetRelativePosition(int positionStart, int characterOffset) const = 0;
+ virtual int SCI_METHOD GetCharacterAndWidth(int position, int *pWidth) const = 0;
};
enum { lvOriginal=0, lvSubStyles=1 };
diff --git a/lexlib/LexAccessor.h b/lexlib/LexAccessor.h
index 92e719360..e29bbc923 100644
--- a/lexlib/LexAccessor.h
+++ b/lexlib/LexAccessor.h
@@ -79,6 +79,12 @@ public:
}
return buf[position - startPos];
}
+ IDocumentWithLineEnd *MultiByteAccess() const {
+ if (documentVersion >= dvLineEnd) {
+ return static_cast<IDocumentWithLineEnd *>(pAccess);
+ }
+ return 0;
+ }
/** Safe version of operator[], returning a defined value for invalid position. */
char SafeGetCharAt(int position, char chDefault=' ') {
if (position < startPos || position >= endPos) {
@@ -126,21 +132,6 @@ public:
return startNext - 1;
}
}
- int GetRelativePosition(int start, int characterOffset, int *character, int *width) {
- if (documentVersion >= dvLineEnd) {
- return (static_cast<IDocumentWithLineEnd *>(pAccess))->GetRelativePosition(
- start, characterOffset, character, width);
- } else {
- // Old version -> byte-oriented only
- // Handle doc range overflow
- int posNew = start + characterOffset;
- if ((posNew < 0) || (posNew > Length()))
- return -1;
- *character = SafeGetCharAt(posNew, 0);
- *width = 1;
- return start + characterOffset;
- }
- }
int LevelAt(int line) const {
return pAccess->GetLevel(line);
}
diff --git a/lexlib/StyleContext.h b/lexlib/StyleContext.h
index 0b5dee379..fc6c60d2f 100644
--- a/lexlib/StyleContext.h
+++ b/lexlib/StyleContext.h
@@ -49,6 +49,7 @@ inline int BytesInUnicodeCodePoint(int codePoint) {
// syntactically significant. UTF-8 avoids this as all trail bytes are >= 0x80
class StyleContext {
LexAccessor &styler;
+ IDocumentWithLineEnd *multiByteAccess;
unsigned int endPos;
unsigned int lengthDocument;
@@ -60,11 +61,11 @@ class StyleContext {
StyleContext &operator=(const StyleContext &);
void GetNextChar() {
- if (styler.Encoding() == enc8bit) {
+ if (multiByteAccess) {
+ chNext = multiByteAccess->GetCharacterAndWidth(currentPos+width, &widthNext);
+ } else {
chNext = static_cast<unsigned char>(styler.SafeGetCharAt(currentPos+width, 0));
widthNext = 1;
- } else {
- styler.GetRelativePosition(currentPos+width, 0, &chNext, &widthNext);
}
// End of line determined from line end position, allowing CR, LF,
// CRLF and Unicode line ends as set by document.
@@ -91,6 +92,7 @@ public:
StyleContext(unsigned int startPos, unsigned int length,
int initStyle, LexAccessor &styler_, char chMask=31) :
styler(styler_),
+ multiByteAccess(0),
endPos(startPos + length),
posRelative(0),
currentPosLastRelative(0x7FFFFFFF),
@@ -105,6 +107,9 @@ public:
width(0),
chNext(0),
widthNext(1) {
+ if (styler.Encoding() != enc8bit) {
+ multiByteAccess = styler.MultiByteAccess();
+ }
styler.StartAt(startPos, chMask);
styler.StartSegment(startPos);
currentLine = styler.GetLine(startPos);
@@ -182,13 +187,7 @@ public:
int GetRelativeCharacter(int n) {
if (n == 0)
return ch;
- if (styler.Encoding() == enc8bit) {
- // fast version for single byte encodings
- return static_cast<unsigned char>(styler.SafeGetCharAt(currentPos + n, 0));
- } else {
- int ch = 0;
- int width = 0;
- //styler.GetRelativePosition(currentPos, n, &ch, &width);
+ if (multiByteAccess) {
if ((currentPosLastRelative != currentPos) ||
((n > 0) && ((offsetRelative < 0) || (n < offsetRelative))) ||
((n < 0) && ((offsetRelative > 0) || (n > offsetRelative)))) {
@@ -196,11 +195,15 @@ public:
offsetRelative = 0;
}
int diffRelative = n - offsetRelative;
- int posNew = styler.GetRelativePosition(posRelative, diffRelative, &ch, &width);
+ int posNew = multiByteAccess->GetRelativePosition(posRelative, diffRelative);
+ int ch = multiByteAccess->GetCharacterAndWidth(posNew, 0);
posRelative = posNew;
currentPosLastRelative = currentPos;
offsetRelative = n;
return ch;
+ } else {
+ // fast version for single byte encodings
+ return static_cast<unsigned char>(styler.SafeGetCharAt(currentPos + n, 0));
}
}
bool Match(char ch0) const {
diff --git a/src/Document.cxx b/src/Document.cxx
index 472567068..a00fc9fc2 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -713,55 +713,63 @@ static inline int UnicodeFromBytes(const unsigned char *us) {
}
// Return -1 on out-of-bounds
-int SCI_METHOD Document::GetRelativePosition(int start, int characterOffset, int *character, int *width) const {
- int pos = start;
+int SCI_METHOD Document::GetRelativePosition(int positionStart, int characterOffset) const {
+ int pos = positionStart;
if (dbcsCodePage) {
const int increment = (characterOffset > 0) ? 1 : -1;
while (characterOffset != 0) {
const int posNext = NextPosition(pos, increment);
if (posNext == pos)
- return -1;
+ return INVALID_POSITION;
pos = posNext;
characterOffset -= increment;
}
- const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
+ } else {
+ pos = positionStart + characterOffset;
+ if ((pos < 0) || (pos > Length()))
+ return INVALID_POSITION;
+ }
+ return pos;
+}
+
+int SCI_METHOD Document::GetCharacterAndWidth(int position, int *pWidth) const {
+ int character;
+ int bytesInCharacter = 1;
+ if (dbcsCodePage) {
+ const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
if (SC_CP_UTF8 == dbcsCodePage) {
if (UTF8IsAscii(leadByte)) {
// Single byte character or invalid
- *character = leadByte;
- *width = 1;
+ character = leadByte;
} else {
const int widthCharBytes = UTF8BytesOfLead[leadByte];
unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
for (int b=1; b<widthCharBytes; b++)
- charBytes[b] = static_cast<unsigned char>(cb.CharAt(pos+b));
+ charBytes[b] = static_cast<unsigned char>(cb.CharAt(position+b));
int utf8status = UTF8Classify(charBytes, widthCharBytes);
if (utf8status & UTF8MaskInvalid) {
- // Report as singleton surrogate values which are invalid in Unicode
- *character = 0xDC80 + leadByte;
- *width = 1;
+ // Report as singleton surrogate values which are invalid Unicode
+ character = 0xDC80 + leadByte;
} else {
- *character = UnicodeFromBytes(charBytes);
- *width = utf8status & UTF8MaskWidth;
+ bytesInCharacter = utf8status & UTF8MaskWidth;
+ character = UnicodeFromBytes(charBytes);
}
}
- } else if (dbcsCodePage) {
+ } else {
if (IsDBCSLeadByte(leadByte)) {
- *character = (leadByte << 8) | static_cast<unsigned char>(cb.CharAt(pos+1));
- *width = 2;
+ bytesInCharacter = 2;
+ character = (leadByte << 8) | static_cast<unsigned char>(cb.CharAt(position+1));
} else {
- *character = leadByte;
- *width = 1;
+ character = leadByte;
}
}
} else {
- pos = start + characterOffset;
- if ((pos < 0) || (pos > Length()))
- return -1;
- *character = cb.CharAt(pos);
- *width = 1;
+ character = cb.CharAt(position);
}
- return pos;
+ if (pWidth) {
+ *pWidth = bytesInCharacter;
+ }
+ return character;
}
int SCI_METHOD Document::CodePage() const {
diff --git a/src/Document.h b/src/Document.h
index 8eb8db74a..5c7e8f8a0 100644
--- a/src/Document.h
+++ b/src/Document.h
@@ -279,7 +279,8 @@ public:
int MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd=true);
int NextPosition(int pos, int moveDir) const;
bool NextCharacter(int &pos, int moveDir) const; // Returns true if pos changed
- int SCI_METHOD GetRelativePosition(int start, int characterOffset, int *character, int *width) const;
+ int SCI_METHOD GetRelativePosition(int positionStart, int characterOffset) const;
+ int SCI_METHOD GetCharacterAndWidth(int position, int *pWidth) const;
int SCI_METHOD CodePage() const;
bool SCI_METHOD IsDBCSLeadByte(char ch) const;
int SafeSegment(const char *text, int length, int lengthSegment) const;