aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authornyamatongwe <unknown>2001-10-25 23:50:45 +0000
committernyamatongwe <unknown>2001-10-25 23:50:45 +0000
commit6c9f4d9c8fd046f52a308d7072ce9292cd8645c9 (patch)
treedef446f644219cf0c3800b38254d11e7e962fd1e
parente7333cbfe272f9a08782ff889d70efd94d291da6 (diff)
downloadscintilla-mirror-6c9f4d9c8fd046f52a308d7072ce9292cd8645c9.tar.gz
Changed definition of word to either be a sequence of word characters or
a sequence of punctuation. Punctuation is defined as those character that are not in the set of word characters but are not new line (\r or \n) characters or space characters. Space characters are ' ' and control characters. Word boundaries occur between sequences of these four (word, punctuation, space and newline) classes although there is some fiddling to ensure spaces are correctly associated with their adjacent words when performing some operations such as delete to end of word.
-rw-r--r--src/Document.cxx81
-rw-r--r--src/Document.h5
2 files changed, 52 insertions, 34 deletions
diff --git a/src/Document.cxx b/src/Document.cxx
index abbb87d5d..7ffb651ba 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -34,9 +34,7 @@ Document::Document() {
stylingBits = 5;
stylingBitsMask = 0x1F;
stylingMask = 0;
- for (int ch = 0; ch < 256; ch++) {
- wordchars[ch] = isalnum(ch) || ch == '_';
- }
+ SetWordChars(0);
endStyled = 0;
enteredCount = 0;
enteredReadOnlyCount = 0;
@@ -700,72 +698,85 @@ void Document::ConvertLineEnds(int eolModeSet) {
EndUndoAction();
}
-bool Document::IsWordChar(unsigned char ch) {
+Document::charClassification Document::WordCharClass(unsigned char ch) {
if ((SC_CP_UTF8 == dbcsCodePage) && (ch > 0x80))
- return true;
- return wordchars[ch];
+ return ccWord;
+ return charClass[ch];
}
+/**
+ * Used by commmands that want to select whole words.
+ * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
+ */
int Document::ExtendWordSelect(int pos, int delta) {
if (delta < 0) {
- while (pos > 0 && IsWordChar(cb.CharAt(pos - 1)))
+ charClassification ccStart = WordCharClass(cb.CharAt(pos-1));
+ while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart))
pos--;
} else {
- while (pos < (Length()) && IsWordChar(cb.CharAt(pos)))
+ charClassification ccStart = WordCharClass(cb.CharAt(pos));
+ while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
pos++;
}
return pos;
}
+/**
+ * Find the start of the next word in either a forward (delta >= 0) or backwards direction
+ * (delta < 0).
+ * This is looking for a transition between character classes although there is also some
+ * additional movement to transit white space.
+ * Used by cursor movement by word commands.
+ */
int Document::NextWordStart(int pos, int delta) {
if (delta < 0) {
- while (pos > 0 && (cb.CharAt(pos - 1) == ' ' || cb.CharAt(pos - 1) == '\t'))
+ while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccSpace))
pos--;
- if (isspacechar(cb.CharAt(pos - 1))) { // Back up to previous line
- while (pos > 0 && isspacechar(cb.CharAt(pos - 1)))
- pos--;
- } else {
- bool startAtWordChar = IsWordChar(cb.CharAt(pos - 1));
- while (pos > 0 && !isspacechar(cb.CharAt(pos - 1)) && (startAtWordChar == IsWordChar(cb.CharAt(pos - 1))))
+ if (pos > 0) {
+ charClassification ccStart = WordCharClass(cb.CharAt(pos-1));
+ while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) {
pos--;
+ }
}
} else {
- bool startAtWordChar = IsWordChar(cb.CharAt(pos));
- while (pos < (Length()) && isspacechar(cb.CharAt(pos)))
- pos++;
- while (pos < (Length()) && !isspacechar(cb.CharAt(pos)) && (startAtWordChar == IsWordChar(cb.CharAt(pos))))
+ charClassification ccStart = WordCharClass(cb.CharAt(pos));
+ while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
pos++;
- while (pos < (Length()) && (cb.CharAt(pos) == ' ' || cb.CharAt(pos) == '\t'))
+ while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccSpace))
pos++;
}
return pos;
}
/**
- * Check that the character before the given position
- * is not a word character.
+ * Check that the character at the given position is a word or punctuation character and that
+ * the previous character is of a different character class.
*/
bool Document::IsWordStartAt(int pos) {
if (pos > 0) {
- return !IsWordChar(CharAt(pos - 1));
+ charClassification ccPos = WordCharClass(CharAt(pos));
+ return (ccPos == ccWord || ccPos == ccPunctuation) &&
+ (ccPos != WordCharClass(CharAt(pos - 1)));
}
return true;
}
/**
- * Check that the character after the given position
- * is not a word character.
+ * Check that the character at the given position is a word or punctuation character and that
+ * the next character is of a different character class.
*/
bool Document::IsWordEndAt(int pos) {
if (pos < Length() - 1) {
- return !IsWordChar(CharAt(pos));
+ charClassification ccPrev = WordCharClass(CharAt(pos-1));
+ return (ccPrev == ccWord || ccPrev == ccPunctuation) &&
+ (ccPrev != WordCharClass(CharAt(pos)));
}
return true;
}
/**
- * Check that the given range is delimited by
- * non word characters.
+ * Check that the given range is has transitions between character classes at both
+ * ends and where the characters on the inside are word or punctuation characters.
*/
bool Document::IsWordAt(int start, int end) {
return IsWordStartAt(start) && IsWordEndAt(end);
@@ -1018,16 +1029,22 @@ void Document::ChangeCase(Range r, bool makeUpperCase) {
void Document::SetWordChars(unsigned char *chars) {
int ch;
for (ch = 0; ch < 256; ch++) {
- wordchars[ch] = false;
+ if (ch == '\r' || ch == '\n')
+ charClass[ch] = ccNewLine;
+ else if (ch < 0x20 || ch == ' ')
+ charClass[ch] = ccSpace;
+ else
+ charClass[ch] = ccPunctuation;
}
if (chars) {
while (*chars) {
- wordchars[*chars] = true;
+ charClass[*chars] = ccWord;
chars++;
}
} else {
for (ch = 0; ch < 256; ch++) {
- wordchars[ch] = isalnum(ch) || ch == '_';
+ if (ch > 0x80 || isalnum(ch) || ch == '_')
+ charClass[ch] = ccWord;
}
}
}
@@ -1149,7 +1166,7 @@ void Document::NotifyModified(DocModification mh) {
}
bool Document::IsWordPartSeparator(char ch) {
- return ispunct(ch) && IsWordChar(ch);
+ return ispunct(ch) && (WordCharClass(ch) == ccWord);
}
int Document::WordPartLeft(int pos) {
diff --git a/src/Document.h b/src/Document.h
index 394c8f94b..d0e63fc5b 100644
--- a/src/Document.h
+++ b/src/Document.h
@@ -81,7 +81,8 @@ public:
private:
int refCount;
CellBuffer cb;
- bool wordchars[256];
+ enum charClassification { ccSpace, ccNewLine, ccWord, ccPunctuation };
+ charClassification charClass[256];
char stylingMask;
int endStyled;
int enteredCount;
@@ -209,7 +210,7 @@ public:
private:
bool IsDBCS(int pos);
- bool IsWordChar(unsigned char ch);
+ charClassification WordCharClass(unsigned char ch);
bool IsWordStartAt(int pos);
bool IsWordEndAt(int pos);
bool IsWordAt(int start, int end);