diff options
-rw-r--r-- | src/Document.cxx | 81 | ||||
-rw-r--r-- | src/Document.h | 5 |
2 files changed, 52 insertions, 34 deletions
diff --git a/src/Document.cxx b/src/Document.cxx index abbb87d5d..7ffb651ba 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -34,9 +34,7 @@ Document::Document() { stylingBits = 5; stylingBitsMask = 0x1F; stylingMask = 0; - for (int ch = 0; ch < 256; ch++) { - wordchars[ch] = isalnum(ch) || ch == '_'; - } + SetWordChars(0); endStyled = 0; enteredCount = 0; enteredReadOnlyCount = 0; @@ -700,72 +698,85 @@ void Document::ConvertLineEnds(int eolModeSet) { EndUndoAction(); } -bool Document::IsWordChar(unsigned char ch) { +Document::charClassification Document::WordCharClass(unsigned char ch) { if ((SC_CP_UTF8 == dbcsCodePage) && (ch > 0x80)) - return true; - return wordchars[ch]; + return ccWord; + return charClass[ch]; } +/** + * Used by commmands that want to select whole words. + * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0. + */ int Document::ExtendWordSelect(int pos, int delta) { if (delta < 0) { - while (pos > 0 && IsWordChar(cb.CharAt(pos - 1))) + charClassification ccStart = WordCharClass(cb.CharAt(pos-1)); + while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) pos--; } else { - while (pos < (Length()) && IsWordChar(cb.CharAt(pos))) + charClassification ccStart = WordCharClass(cb.CharAt(pos)); + while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart)) pos++; } return pos; } +/** + * Find the start of the next word in either a forward (delta >= 0) or backwards direction + * (delta < 0). + * This is looking for a transition between character classes although there is also some + * additional movement to transit white space. + * Used by cursor movement by word commands. + */ int Document::NextWordStart(int pos, int delta) { if (delta < 0) { - while (pos > 0 && (cb.CharAt(pos - 1) == ' ' || cb.CharAt(pos - 1) == '\t')) + while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccSpace)) pos--; - if (isspacechar(cb.CharAt(pos - 1))) { // Back up to previous line - while (pos > 0 && isspacechar(cb.CharAt(pos - 1))) - pos--; - } else { - bool startAtWordChar = IsWordChar(cb.CharAt(pos - 1)); - while (pos > 0 && !isspacechar(cb.CharAt(pos - 1)) && (startAtWordChar == IsWordChar(cb.CharAt(pos - 1)))) + if (pos > 0) { + charClassification ccStart = WordCharClass(cb.CharAt(pos-1)); + while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) { pos--; + } } } else { - bool startAtWordChar = IsWordChar(cb.CharAt(pos)); - while (pos < (Length()) && isspacechar(cb.CharAt(pos))) - pos++; - while (pos < (Length()) && !isspacechar(cb.CharAt(pos)) && (startAtWordChar == IsWordChar(cb.CharAt(pos)))) + charClassification ccStart = WordCharClass(cb.CharAt(pos)); + while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart)) pos++; - while (pos < (Length()) && (cb.CharAt(pos) == ' ' || cb.CharAt(pos) == '\t')) + while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccSpace)) pos++; } return pos; } /** - * Check that the character before the given position - * is not a word character. + * Check that the character at the given position is a word or punctuation character and that + * the previous character is of a different character class. */ bool Document::IsWordStartAt(int pos) { if (pos > 0) { - return !IsWordChar(CharAt(pos - 1)); + charClassification ccPos = WordCharClass(CharAt(pos)); + return (ccPos == ccWord || ccPos == ccPunctuation) && + (ccPos != WordCharClass(CharAt(pos - 1))); } return true; } /** - * Check that the character after the given position - * is not a word character. + * Check that the character at the given position is a word or punctuation character and that + * the next character is of a different character class. */ bool Document::IsWordEndAt(int pos) { if (pos < Length() - 1) { - return !IsWordChar(CharAt(pos)); + charClassification ccPrev = WordCharClass(CharAt(pos-1)); + return (ccPrev == ccWord || ccPrev == ccPunctuation) && + (ccPrev != WordCharClass(CharAt(pos))); } return true; } /** - * Check that the given range is delimited by - * non word characters. + * Check that the given range is has transitions between character classes at both + * ends and where the characters on the inside are word or punctuation characters. */ bool Document::IsWordAt(int start, int end) { return IsWordStartAt(start) && IsWordEndAt(end); @@ -1018,16 +1029,22 @@ void Document::ChangeCase(Range r, bool makeUpperCase) { void Document::SetWordChars(unsigned char *chars) { int ch; for (ch = 0; ch < 256; ch++) { - wordchars[ch] = false; + if (ch == '\r' || ch == '\n') + charClass[ch] = ccNewLine; + else if (ch < 0x20 || ch == ' ') + charClass[ch] = ccSpace; + else + charClass[ch] = ccPunctuation; } if (chars) { while (*chars) { - wordchars[*chars] = true; + charClass[*chars] = ccWord; chars++; } } else { for (ch = 0; ch < 256; ch++) { - wordchars[ch] = isalnum(ch) || ch == '_'; + if (ch > 0x80 || isalnum(ch) || ch == '_') + charClass[ch] = ccWord; } } } @@ -1149,7 +1166,7 @@ void Document::NotifyModified(DocModification mh) { } bool Document::IsWordPartSeparator(char ch) { - return ispunct(ch) && IsWordChar(ch); + return ispunct(ch) && (WordCharClass(ch) == ccWord); } int Document::WordPartLeft(int pos) { diff --git a/src/Document.h b/src/Document.h index 394c8f94b..d0e63fc5b 100644 --- a/src/Document.h +++ b/src/Document.h @@ -81,7 +81,8 @@ public: private: int refCount; CellBuffer cb; - bool wordchars[256]; + enum charClassification { ccSpace, ccNewLine, ccWord, ccPunctuation }; + charClassification charClass[256]; char stylingMask; int endStyled; int enteredCount; @@ -209,7 +210,7 @@ public: private: bool IsDBCS(int pos); - bool IsWordChar(unsigned char ch); + charClassification WordCharClass(unsigned char ch); bool IsWordStartAt(int pos); bool IsWordEndAt(int pos); bool IsWordAt(int start, int end); |