2 files changed, 52 insertions, 34 deletions
diff --git a/src/Document.cxx b/src/Document.cxx
index abbb87d5d..7ffb651ba 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -34,9 +34,7 @@ Document::Document() {
 	stylingBits = 5;
 	stylingBitsMask = 0x1F;
 	stylingMask = 0;
-	for (int ch = 0; ch < 256; ch++) {
-		wordchars[ch] = isalnum(ch) || ch == '_';
-	}
+	SetWordChars(0);
 	endStyled = 0;
 	enteredCount = 0;
 	enteredReadOnlyCount = 0;
@@ -700,72 +698,85 @@ void Document::ConvertLineEnds(int eolModeSet) {
 	EndUndoAction();
 }
 
-bool Document::IsWordChar(unsigned char ch) {
+Document::charClassification Document::WordCharClass(unsigned char ch) {
 	if ((SC_CP_UTF8 == dbcsCodePage) && (ch > 0x80))
-		return true;
-	return wordchars[ch];
+		return ccWord;
+	return charClass[ch];
 }
 
+/**
+ * Used by commmands that want to select whole words.
+ * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
+ */
 int Document::ExtendWordSelect(int pos, int delta) {
 	if (delta < 0) {
-		while (pos > 0 && IsWordChar(cb.CharAt(pos - 1)))
+		charClassification ccStart = WordCharClass(cb.CharAt(pos-1));
+		while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart))
 			pos--;
 	} else {
-		while (pos < (Length()) && IsWordChar(cb.CharAt(pos)))
+		charClassification ccStart = WordCharClass(cb.CharAt(pos));
+		while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
 			pos++;
 	}
 	return pos;
 }
 
+/**
+ * Find the start of the next word in either a forward (delta >= 0) or backwards direction 
+ * (delta < 0).
+ * This is looking for a transition between character classes although there is also some
+ * additional movement to transit white space.
+ * Used by cursor movement by word commands.
+ */
 int Document::NextWordStart(int pos, int delta) {
 	if (delta < 0) {
-		while (pos > 0 && (cb.CharAt(pos - 1) == ' ' || cb.CharAt(pos - 1) == '\t'))
+		while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccSpace))
 			pos--;
-		if (isspacechar(cb.CharAt(pos - 1))) {	// Back up to previous line
-			while (pos > 0 && isspacechar(cb.CharAt(pos - 1)))
-				pos--;
-		} else {
-			bool startAtWordChar = IsWordChar(cb.CharAt(pos - 1));
-			while (pos > 0 && !isspacechar(cb.CharAt(pos - 1)) && (startAtWordChar == IsWordChar(cb.CharAt(pos - 1))))
+		if (pos > 0) {
+			charClassification ccStart = WordCharClass(cb.CharAt(pos-1));
+			while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) {
 				pos--;
+			}
 		}
 	} else {
-		bool startAtWordChar = IsWordChar(cb.CharAt(pos));
-		while (pos < (Length()) && isspacechar(cb.CharAt(pos)))
-			pos++;
-		while (pos < (Length()) && !isspacechar(cb.CharAt(pos)) && (startAtWordChar == IsWordChar(cb.CharAt(pos))))
+		charClassification ccStart = WordCharClass(cb.CharAt(pos));
+		while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
 			pos++;
-		while (pos < (Length()) && (cb.CharAt(pos) == ' ' || cb.CharAt(pos) == '\t'))
+		while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccSpace))
 			pos++;
 	}
 	return pos;
 }
 
 /**
- * Check that the character before the given position
- * is not a word character.
+ * Check that the character at the given position is a word or punctuation character and that
+ * the previous character is of a different character class.
  */
 bool Document::IsWordStartAt(int pos) {
 	if (pos > 0) {
-		return !IsWordChar(CharAt(pos - 1));
+		charClassification ccPos = WordCharClass(CharAt(pos));
+		return (ccPos == ccWord || ccPos == ccPunctuation) &&
+			(ccPos != WordCharClass(CharAt(pos - 1)));
 	}
 	return true;
 }
 
 /**
- * Check that the character after the given position
- * is not a word character.
+ * Check that the character at the given position is a word or punctuation character and that
+ * the next character is of a different character class.
  */
 bool Document::IsWordEndAt(int pos) {
 	if (pos < Length() - 1) {
-		return !IsWordChar(CharAt(pos));
+		charClassification ccPrev = WordCharClass(CharAt(pos-1));
+		return (ccPrev == ccWord || ccPrev == ccPunctuation) &&
+			(ccPrev != WordCharClass(CharAt(pos)));
 	}
 	return true;
 }
 
 /**
- * Check that the given range is delimited by
- * non word characters.
+ * Check that the given range is has transitions between character classes at both 
+ * ends and where the characters on the inside are word or punctuation characters.
  */
 bool Document::IsWordAt(int start, int end) {
 	return IsWordStartAt(start) && IsWordEndAt(end);
@@ -1018,16 +1029,22 @@ void Document::ChangeCase(Range r, bool makeUpperCase) {
 void Document::SetWordChars(unsigned char *chars) {
 	int ch;
 	for (ch = 0; ch < 256; ch++) {
-		wordchars[ch] = false;
+		if (ch == '\r' || ch == '\n')
+			charClass[ch] = ccNewLine;
+		else if (ch < 0x20 || ch == ' ')
+			charClass[ch] = ccSpace;
+		else
+			charClass[ch] = ccPunctuation;
 	}
 	if (chars) {
 		while (*chars) {
-			wordchars[*chars] = true;
+			charClass[*chars] = ccWord;
 			chars++;
 		}
 	} else {
 		for (ch = 0; ch < 256; ch++) {
-			wordchars[ch] = isalnum(ch) || ch == '_';
+			if (ch > 0x80 || isalnum(ch) || ch == '_') 
+				charClass[ch] = ccWord;
 		}
 	}
 }
@@ -1149,7 +1166,7 @@ void Document::NotifyModified(DocModification mh) {
 }
 
 bool Document::IsWordPartSeparator(char ch) {
-	return ispunct(ch) && IsWordChar(ch);
+	return ispunct(ch) && (WordCharClass(ch) == ccWord);
 }
 
 int Document::WordPartLeft(int pos) {
diff --git a/src/Document.h b/src/Document.h
index 394c8f94b..d0e63fc5b 100644
--- a/src/Document.h
+++ b/src/Document.h
@@ -81,7 +81,8 @@ public:
 private:	
 	int refCount;
 	CellBuffer cb;
-	bool wordchars[256];
+	enum charClassification { ccSpace, ccNewLine, ccWord, ccPunctuation };
+	charClassification charClass[256];
 	char stylingMask;
 	int endStyled;
 	int enteredCount;
@@ -209,7 +210,7 @@ public:
 
 private:
 	bool IsDBCS(int pos);
-	bool IsWordChar(unsigned char ch);
+	charClassification WordCharClass(unsigned char ch);
 	bool IsWordStartAt(int pos);
 	bool IsWordEndAt(int pos);
 	bool IsWordAt(int start, int end);