From 1a05a259558efd7dffb118ca9b12257a1346d2ea Mon Sep 17 00:00:00 2001
From: Neil
Date: Sat, 29 Jun 2013 20:32:52 +1000
Subject: Bug: [#1483]. Split GetRelativePosition into 2 calls one for moving
between character positions and the other for retrieving a character and
width.
---
doc/ScintillaDoc.html | 15 +++++++++-----
include/ILexer.h | 3 ++-
lexlib/LexAccessor.h | 21 ++++++--------------
lexlib/StyleContext.h | 25 +++++++++++++-----------
src/Document.cxx | 54 +++++++++++++++++++++++++++++----------------------
src/Document.h | 3 ++-
6 files changed, 65 insertions(+), 56 deletions(-)
diff --git a/doc/ScintillaDoc.html b/doc/ScintillaDoc.html
index abec92a5b..19829cbd8 100644
--- a/doc/ScintillaDoc.html
+++ b/doc/ScintillaDoc.html
@@ -82,7 +82,7 @@
Scintilla Documentation
- Last edited 5/May/2013 NH
+ Last edited 29/June/2013 NH
There is an overview of the internal design of
Scintilla.
@@ -1344,7 +1344,7 @@ struct Sci_TextToFind {
SCI_GETLINESELSTARTPOSITION(int line)
SCI_GETLINESELENDPOSITION(int line)
Retrieve the position of the start and end of the selection at the given line with
- INVALID_POSITION returned if no selection on this line.
+ INVALID_POSITION returned if no selection on this line.
SCI_MOVECARETINSIDEVIEW
If the caret is off the top or bottom of the view, it is moved to the nearest line that is
@@ -6322,17 +6322,22 @@ exception options.
To allow lexers to determine the end position of a line and thus more easily support Unicode line ends
IDocument is extended to IDocumentWithLineEnd.
-The GetRelativePosition method allows navigating the document by whole characters and provides a standard
+
GetRelativePosition navigates the document by whole characters,
+returning INVALID_POSITION for movement beyond the start and end of the document.
+GetCharacterAndWidth provides a standard
conversion from UTF-8 bytes to a UTF-32 character or from DBCS to a 16 bit value.
-Invalid UTF-8 is reported as a character for each byte with values 0xDC80+byteValue, which are
+Bytes in invalid UTF-8 are reported individually with values 0xDC80+byteValue, which are
not valid Unicode code points.
+The pWidth argument can be NULL if the caller does not need to know the number of
+bytes in the character.
class IDocumentWithLineEnd : public IDocument {
public:
virtual int SCI_METHOD LineEnd(int line) const = 0;
- virtual int SCI_METHOD GetRelativePosition(int start, int characterOffset, int *character, int *width) const = 0;
+ virtual int SCI_METHOD GetRelativePosition(int positionStart, int characterOffset) const = 0;
+ virtual int SCI_METHOD GetCharacterAndWidth(int position, int *pWidth) const = 0;
};
diff --git a/include/ILexer.h b/include/ILexer.h
index 9f9225ef2..e93de819a 100644
--- a/include/ILexer.h
+++ b/include/ILexer.h
@@ -48,7 +48,8 @@ public:
class IDocumentWithLineEnd : public IDocument {
public:
virtual int SCI_METHOD LineEnd(int line) const = 0;
- virtual int SCI_METHOD GetRelativePosition(int start, int characterOffset, int *character, int *width) const = 0;
+ virtual int SCI_METHOD GetRelativePosition(int positionStart, int characterOffset) const = 0;
+ virtual int SCI_METHOD GetCharacterAndWidth(int position, int *pWidth) const = 0;
};
enum { lvOriginal=0, lvSubStyles=1 };
diff --git a/lexlib/LexAccessor.h b/lexlib/LexAccessor.h
index 92e719360..e29bbc923 100644
--- a/lexlib/LexAccessor.h
+++ b/lexlib/LexAccessor.h
@@ -79,6 +79,12 @@ public:
}
return buf[position - startPos];
}
+ IDocumentWithLineEnd *MultiByteAccess() const {
+ if (documentVersion >= dvLineEnd) {
+ return static_cast(pAccess);
+ }
+ return 0;
+ }
/** Safe version of operator[], returning a defined value for invalid position. */
char SafeGetCharAt(int position, char chDefault=' ') {
if (position < startPos || position >= endPos) {
@@ -126,21 +132,6 @@ public:
return startNext - 1;
}
}
- int GetRelativePosition(int start, int characterOffset, int *character, int *width) {
- if (documentVersion >= dvLineEnd) {
- return (static_cast(pAccess))->GetRelativePosition(
- start, characterOffset, character, width);
- } else {
- // Old version -> byte-oriented only
- // Handle doc range overflow
- int posNew = start + characterOffset;
- if ((posNew < 0) || (posNew > Length()))
- return -1;
- *character = SafeGetCharAt(posNew, 0);
- *width = 1;
- return start + characterOffset;
- }
- }
int LevelAt(int line) const {
return pAccess->GetLevel(line);
}
diff --git a/lexlib/StyleContext.h b/lexlib/StyleContext.h
index 0b5dee379..fc6c60d2f 100644
--- a/lexlib/StyleContext.h
+++ b/lexlib/StyleContext.h
@@ -49,6 +49,7 @@ inline int BytesInUnicodeCodePoint(int codePoint) {
// syntactically significant. UTF-8 avoids this as all trail bytes are >= 0x80
class StyleContext {
LexAccessor &styler;
+ IDocumentWithLineEnd *multiByteAccess;
unsigned int endPos;
unsigned int lengthDocument;
@@ -60,11 +61,11 @@ class StyleContext {
StyleContext &operator=(const StyleContext &);
void GetNextChar() {
- if (styler.Encoding() == enc8bit) {
+ if (multiByteAccess) {
+ chNext = multiByteAccess->GetCharacterAndWidth(currentPos+width, &widthNext);
+ } else {
chNext = static_cast(styler.SafeGetCharAt(currentPos+width, 0));
widthNext = 1;
- } else {
- styler.GetRelativePosition(currentPos+width, 0, &chNext, &widthNext);
}
// End of line determined from line end position, allowing CR, LF,
// CRLF and Unicode line ends as set by document.
@@ -91,6 +92,7 @@ public:
StyleContext(unsigned int startPos, unsigned int length,
int initStyle, LexAccessor &styler_, char chMask=31) :
styler(styler_),
+ multiByteAccess(0),
endPos(startPos + length),
posRelative(0),
currentPosLastRelative(0x7FFFFFFF),
@@ -105,6 +107,9 @@ public:
width(0),
chNext(0),
widthNext(1) {
+ if (styler.Encoding() != enc8bit) {
+ multiByteAccess = styler.MultiByteAccess();
+ }
styler.StartAt(startPos, chMask);
styler.StartSegment(startPos);
currentLine = styler.GetLine(startPos);
@@ -182,13 +187,7 @@ public:
int GetRelativeCharacter(int n) {
if (n == 0)
return ch;
- if (styler.Encoding() == enc8bit) {
- // fast version for single byte encodings
- return static_cast(styler.SafeGetCharAt(currentPos + n, 0));
- } else {
- int ch = 0;
- int width = 0;
- //styler.GetRelativePosition(currentPos, n, &ch, &width);
+ if (multiByteAccess) {
if ((currentPosLastRelative != currentPos) ||
((n > 0) && ((offsetRelative < 0) || (n < offsetRelative))) ||
((n < 0) && ((offsetRelative > 0) || (n > offsetRelative)))) {
@@ -196,11 +195,15 @@ public:
offsetRelative = 0;
}
int diffRelative = n - offsetRelative;
- int posNew = styler.GetRelativePosition(posRelative, diffRelative, &ch, &width);
+ int posNew = multiByteAccess->GetRelativePosition(posRelative, diffRelative);
+ int ch = multiByteAccess->GetCharacterAndWidth(posNew, 0);
posRelative = posNew;
currentPosLastRelative = currentPos;
offsetRelative = n;
return ch;
+ } else {
+ // fast version for single byte encodings
+ return static_cast(styler.SafeGetCharAt(currentPos + n, 0));
}
}
bool Match(char ch0) const {
diff --git a/src/Document.cxx b/src/Document.cxx
index 472567068..a00fc9fc2 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -713,55 +713,63 @@ static inline int UnicodeFromBytes(const unsigned char *us) {
}
// Return -1 on out-of-bounds
-int SCI_METHOD Document::GetRelativePosition(int start, int characterOffset, int *character, int *width) const {
- int pos = start;
+int SCI_METHOD Document::GetRelativePosition(int positionStart, int characterOffset) const {
+ int pos = positionStart;
if (dbcsCodePage) {
const int increment = (characterOffset > 0) ? 1 : -1;
while (characterOffset != 0) {
const int posNext = NextPosition(pos, increment);
if (posNext == pos)
- return -1;
+ return INVALID_POSITION;
pos = posNext;
characterOffset -= increment;
}
- const unsigned char leadByte = static_cast(cb.CharAt(pos));
+ } else {
+ pos = positionStart + characterOffset;
+ if ((pos < 0) || (pos > Length()))
+ return INVALID_POSITION;
+ }
+ return pos;
+}
+
+int SCI_METHOD Document::GetCharacterAndWidth(int position, int *pWidth) const {
+ int character;
+ int bytesInCharacter = 1;
+ if (dbcsCodePage) {
+ const unsigned char leadByte = static_cast(cb.CharAt(position));
if (SC_CP_UTF8 == dbcsCodePage) {
if (UTF8IsAscii(leadByte)) {
// Single byte character or invalid
- *character = leadByte;
- *width = 1;
+ character = leadByte;
} else {
const int widthCharBytes = UTF8BytesOfLead[leadByte];
unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
for (int b=1; b(cb.CharAt(pos+b));
+ charBytes[b] = static_cast(cb.CharAt(position+b));
int utf8status = UTF8Classify(charBytes, widthCharBytes);
if (utf8status & UTF8MaskInvalid) {
- // Report as singleton surrogate values which are invalid in Unicode
- *character = 0xDC80 + leadByte;
- *width = 1;
+ // Report as singleton surrogate values which are invalid Unicode
+ character = 0xDC80 + leadByte;
} else {
- *character = UnicodeFromBytes(charBytes);
- *width = utf8status & UTF8MaskWidth;
+ bytesInCharacter = utf8status & UTF8MaskWidth;
+ character = UnicodeFromBytes(charBytes);
}
}
- } else if (dbcsCodePage) {
+ } else {
if (IsDBCSLeadByte(leadByte)) {
- *character = (leadByte << 8) | static_cast(cb.CharAt(pos+1));
- *width = 2;
+ bytesInCharacter = 2;
+ character = (leadByte << 8) | static_cast(cb.CharAt(position+1));
} else {
- *character = leadByte;
- *width = 1;
+ character = leadByte;
}
}
} else {
- pos = start + characterOffset;
- if ((pos < 0) || (pos > Length()))
- return -1;
- *character = cb.CharAt(pos);
- *width = 1;
+ character = cb.CharAt(position);
}
- return pos;
+ if (pWidth) {
+ *pWidth = bytesInCharacter;
+ }
+ return character;
}
int SCI_METHOD Document::CodePage() const {
diff --git a/src/Document.h b/src/Document.h
index 8eb8db74a..5c7e8f8a0 100644
--- a/src/Document.h
+++ b/src/Document.h
@@ -279,7 +279,8 @@ public:
int MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd=true);
int NextPosition(int pos, int moveDir) const;
bool NextCharacter(int &pos, int moveDir) const; // Returns true if pos changed
- int SCI_METHOD GetRelativePosition(int start, int characterOffset, int *character, int *width) const;
+ int SCI_METHOD GetRelativePosition(int positionStart, int characterOffset) const;
+ int SCI_METHOD GetCharacterAndWidth(int position, int *pWidth) const;
int SCI_METHOD CodePage() const;
bool SCI_METHOD IsDBCSLeadByte(char ch) const;
int SafeSegment(const char *text, int length, int lengthSegment) const;
--
cgit v1.2.3