aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/Document.cxx
diff options
context:
space:
mode:
authornyamatongwe <unknown>2000-04-06 08:43:00 +0000
committernyamatongwe <unknown>2000-04-06 08:43:00 +0000
commitf129e6862882879aed3c8338243dbb43c17721ed (patch)
tree1352720978356ddc213962ccf1dc248c3b061491 /src/Document.cxx
parentdae4eea6225cd2f3ed9f2e4dfe46606cc9a0743f (diff)
downloadscintilla-mirror-f129e6862882879aed3c8338243dbb43c17721ed.tar.gz
Initial Unicode support code.
Diffstat (limited to 'src/Document.cxx')
-rw-r--r--src/Document.cxx119
1 files changed, 79 insertions, 40 deletions
diff --git a/src/Document.cxx b/src/Document.cxx
index 7a30d7fd1..650c0ced2 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -202,18 +202,23 @@ bool Document::IsCrLf(int pos) {
bool Document::IsDBCS(int pos) {
#if PLAT_WIN
if (dbcsCodePage) {
- // Anchor DBCS calculations at start of line because start of line can
- // not be a DBCS trail byte.
- int startLine = pos;
- while (startLine > 0 && cb.CharAt(startLine) != '\r' && cb.CharAt(startLine) != '\n')
- startLine--;
- while (startLine <= pos) {
- if (IsDBCSLeadByteEx(dbcsCodePage, cb.CharAt(startLine))) {
+ if (SC_CP_UTF8 == dbcsCodePage) {
+ unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
+ return ch >= 0x80;
+ } else {
+ // Anchor DBCS calculations at start of line because start of line can
+ // not be a DBCS trail byte.
+ int startLine = pos;
+ while (startLine > 0 && cb.CharAt(startLine) != '\r' && cb.CharAt(startLine) != '\n')
+ startLine--;
+ while (startLine <= pos) {
+ if (IsDBCSLeadByteEx(dbcsCodePage, cb.CharAt(startLine))) {
+ startLine++;
+ if (startLine >= pos)
+ return true;
+ }
startLine++;
- if (startLine >= pos)
- return true;
}
- startLine++;
}
}
return false;
@@ -222,6 +227,28 @@ bool Document::IsDBCS(int pos) {
#endif
}
+int Document::LenChar(int pos) {
+ if (IsCrLf(pos)) {
+ return 2;
+ } else if (SC_CP_UTF8 == dbcsCodePage) {
+ unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
+ if (ch < 0x80)
+ return 1;
+ int len = 2;
+ if (ch >= (0x80+0x40+0x20))
+ len = 3;
+ int lengthDoc = Length();
+ if ((pos + len) > lengthDoc)
+ return lengthDoc-pos;
+ else
+ return len;
+ } else if (IsDBCS(pos)) {
+ return 2;
+ } else {
+ return 1;
+ }
+}
+
// Normalise a position so that it is not halfway through a two byte character.
// This can occur in two situations -
// When lines are terminated with \r\n pairs which should be treated as one character.
@@ -253,29 +280,41 @@ int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) {
#if PLAT_WIN
if (dbcsCodePage) {
- // Anchor DBCS calculations at start of line because start of line can
- // not be a DBCS trail byte.
- int startLine = pos;
- while (startLine > 0 && cb.CharAt(startLine) != '\r' && cb.CharAt(startLine) != '\n')
- startLine--;
- bool atLeadByte = false;
- while (startLine < pos) {
- if (atLeadByte)
- atLeadByte = false;
- else if (IsDBCSLeadByteEx(dbcsCodePage, cb.CharAt(startLine)))
- atLeadByte = true;
- else
- atLeadByte = false;
- startLine++;
- //Platform::DebugPrintf("DBCS %s\n", atlead ? "D" : "-");
- }
+ if (SC_CP_UTF8 == dbcsCodePage) {
+ unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
+ while ((pos > 0) && (pos < Length()) && (ch >= 0x80) && (ch < (0x80 + 0x40))) {
+ // ch is a trail byte
+ if (moveDir > 0)
+ pos++;
+ else
+ pos--;
+ ch = static_cast<unsigned char>(cb.CharAt(pos));
+ }
+ } else {
+ // Anchor DBCS calculations at start of line because start of line can
+ // not be a DBCS trail byte.
+ int startLine = pos;
+ while (startLine > 0 && cb.CharAt(startLine) != '\r' && cb.CharAt(startLine) != '\n')
+ startLine--;
+ bool atLeadByte = false;
+ while (startLine < pos) {
+ if (atLeadByte)
+ atLeadByte = false;
+ else if (IsDBCSLeadByteEx(dbcsCodePage, cb.CharAt(startLine)))
+ atLeadByte = true;
+ else
+ atLeadByte = false;
+ startLine++;
+ //Platform::DebugPrintf("DBCS %s\n", atlead ? "D" : "-");
+ }
- if (atLeadByte) {
- // Position is between a lead byte and a trail byte
- if (moveDir > 0)
- return pos + 1;
- else
- return pos - 1;
+ if (atLeadByte) {
+ // Position is between a lead byte and a trail byte
+ if (moveDir > 0)
+ return pos + 1;
+ else
+ return pos - 1;
+ }
}
}
#endif
@@ -440,13 +479,7 @@ void Document::ChangeChar(int pos, char ch) {
}
void Document::DelChar(int pos) {
- if (IsCrLf(pos)) {
- DeleteChars(pos, 2);
- } else if (IsDBCS(pos)) {
- DeleteChars(pos, 2);
- } else if (pos < Length()) {
- DeleteChars(pos, 1);
- }
+ DeleteChars(pos, LenChar(pos));
}
int Document::DelCharBack(int pos) {
@@ -455,6 +488,10 @@ int Document::DelCharBack(int pos) {
} else if (IsCrLf(pos - 2)) {
DeleteChars(pos - 2, 2);
return pos - 2;
+ } else if (SC_CP_UTF8 == dbcsCodePage) {
+ int startChar = MovePositionOutsideChar(pos-1, -1, false);
+ DeleteChars(startChar, pos - startChar);
+ return startChar;
} else if (IsDBCS(pos - 1)) {
DeleteChars(pos - 2, 2);
return pos - 2;
@@ -529,6 +566,8 @@ void Document::ConvertLineEnds(int eolModeSet) {
}
bool Document::IsWordChar(unsigned char ch) {
+ if ((SC_CP_UTF8 == dbcsCodePage) && (ch >0x80))
+ return true;
return wordchars[ch];
}
@@ -653,7 +692,7 @@ void Document::ChangeCase(Range r, bool makeUpperCase) {
for (int pos=r.start; pos<r.end; pos++) {
char ch = CharAt(pos);
if (dbcsCodePage && IsDBCS(pos)) {
- pos++;
+ pos += LenChar(pos);
} else {
if (makeUpperCase) {
if (islower(ch)) {