aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorMook <marky@activestate.com>2012-05-29 12:44:34 -0700
committerMook <marky@activestate.com>2012-05-29 12:44:34 -0700
commit7e9e5b1e059efa957d2d7e9fa6b364706f621711 (patch)
tree24e9e113362f6a3f05f39e64ef9207f712b4e1a9
parent08d35504786635e06b870b2674da38c0e89521cd (diff)
downloadscintilla-mirror-7e9e5b1e059efa957d2d7e9fa6b364706f621711.tar.gz
add CharClassifier::GetCharsOfClass
add SCI_GETWORDCHARS(<unused>, stringresult chars) command to get word chars add SCI_GETWHITESPACECHARS(<unused>, stringresult chars) to get whitespace chars add SCI_GETPUNCTUATIONCHARS(<unused>, stringresult chars) to get punctutation also add tests for {Set,Get}{Word,Whitespce,Punctuation}Chars, CharClassifier
-rw-r--r--include/Scintilla.h4
-rw-r--r--include/Scintilla.iface14
-rw-r--r--src/CharClassify.cxx16
-rw-r--r--src/CharClassify.h1
-rw-r--r--src/Document.cxx4
-rw-r--r--src/Document.h1
-rw-r--r--src/Editor.cxx16
-rw-r--r--test/simpleTests.py129
-rw-r--r--test/unit/makefile2
-rw-r--r--test/unit/testCharClassify.cxx110
10 files changed, 295 insertions, 2 deletions
diff --git a/include/Scintilla.h b/include/Scintilla.h
index 8251726dc..3cacd5310 100644
--- a/include/Scintilla.h
+++ b/include/Scintilla.h
@@ -246,6 +246,7 @@ typedef sptr_t (*SciFnDirect)(sptr_t ptr, unsigned int iMessage, uptr_t wParam,
#define SCI_GETCARETPERIOD 2075
#define SCI_SETCARETPERIOD 2076
#define SCI_SETWORDCHARS 2077
+#define SCI_GETWORDCHARS 2646
#define SCI_BEGINUNDOACTION 2078
#define SCI_ENDUNDOACTION 2079
#define INDIC_PLAIN 0
@@ -684,6 +685,9 @@ typedef sptr_t (*SciFnDirect)(sptr_t ptr, unsigned int iMessage, uptr_t wParam,
#define SCI_WORDRIGHTEND 2441
#define SCI_WORDRIGHTENDEXTEND 2442
#define SCI_SETWHITESPACECHARS 2443
+#define SCI_GETWHITESPACECHARS 2647
+#define SCI_SETPUNCTUATIONCHARS 2648
+#define SCI_GETPUNCTUATIONCHARS 2649
#define SCI_SETCHARSDEFAULT 2444
#define SCI_AUTOCGETCURRENT 2445
#define SCI_AUTOCGETCURRENTTEXT 2610
diff --git a/include/Scintilla.iface b/include/Scintilla.iface
index 7abbb68c1..0dc9eb35a 100644
--- a/include/Scintilla.iface
+++ b/include/Scintilla.iface
@@ -552,6 +552,10 @@ set void SetCaretPeriod=2076(int periodMilliseconds,)
# First sets defaults like SetCharsDefault.
set void SetWordChars=2077(, string characters)
+# Get the set of characters making up words for when moving or selecting by word.
+# Retuns the number of characters
+get int GetWordChars=2646(, stringresult characters)
+
# Start a sequence of actions that is undone and redone as a unit.
# May be nested.
fun void BeginUndoAction=2078(,)
@@ -1800,6 +1804,16 @@ fun void WordRightEndExtend=2442(,)
# Should be called after SetWordChars.
set void SetWhitespaceChars=2443(, string characters)
+# Get the set of characters making up whitespace for when moving or selecting by word.
+get void GetWhitespaceChars=2647(, stringresult characters)
+
+# Set the set of characters making up punctuation characters
+# Should be called after SetWordChars.
+set void SetPunctuationChars=2648(, string characters)
+
+# Get the set of characters making up punctuation characters
+get void GetPunctuationChars=2649(, stringresult characters)
+
# Reset the set of characters for whitespace and word characters to the defaults.
fun void SetCharsDefault=2444(,)
diff --git a/src/CharClassify.cxx b/src/CharClassify.cxx
index c16af4547..7e3db737e 100644
--- a/src/CharClassify.cxx
+++ b/src/CharClassify.cxx
@@ -46,3 +46,19 @@ void CharClassify::SetCharClasses(const unsigned char *chars, cc newCharClass) {
}
}
}
+
+int CharClassify::GetCharsOfClass(cc characterClass, unsigned char *buffer) {
+ // Get characters belonging to the given char class; return the number
+ // of characters (if the buffer is NULL, don't write to it).
+ int count = 0;
+ for (int ch = maxChar - 1; ch >= 0; --ch) {
+ if (charClass[ch] == characterClass) {
+ ++count;
+ if (buffer) {
+ *buffer = static_cast<unsigned char>(ch);
+ buffer++;
+ }
+ }
+ }
+ return count;
+}
diff --git a/src/CharClassify.h b/src/CharClassify.h
index e8b798ecb..5d2734c00 100644
--- a/src/CharClassify.h
+++ b/src/CharClassify.h
@@ -19,6 +19,7 @@ public:
enum cc { ccSpace, ccNewLine, ccWord, ccPunctuation };
void SetDefaultCharClasses(bool includeWordClass);
void SetCharClasses(const unsigned char *chars, cc newCharClass);
+ int GetCharsOfClass(cc charClass, unsigned char *buffer);
cc GetClass(unsigned char ch) const { return static_cast<cc>(charClass[ch]);}
bool IsWord(unsigned char ch) const { return static_cast<cc>(charClass[ch]) == ccWord;}
diff --git a/src/Document.cxx b/src/Document.cxx
index 244e96e4f..82b9e070b 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -1615,6 +1615,10 @@ void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCh
charClass.SetCharClasses(chars, newCharClass);
}
+int Document::GetCharsOfClass(CharClassify::cc characterClass, unsigned char *buffer) {
+ return charClass.GetCharsOfClass(characterClass, buffer);
+}
+
void Document::SetStylingBits(int bits) {
stylingBits = bits;
stylingBitsMask = (1 << stylingBits) - 1;
diff --git a/src/Document.h b/src/Document.h
index 7e03f3d9e..30c6aee1c 100644
--- a/src/Document.h
+++ b/src/Document.h
@@ -364,6 +364,7 @@ public:
void SetDefaultCharClasses(bool includeWordClass);
void SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass);
+ int GetCharsOfClass(CharClassify::cc charClass, unsigned char *buffer);
void SetStylingBits(int bits);
void SCI_METHOD StartStyling(int position, char mask);
bool SCI_METHOD SetStyleFor(int length, char style);
diff --git a/src/Editor.cxx b/src/Editor.cxx
index d72ff302c..2bc89ba1f 100644
--- a/src/Editor.cxx
+++ b/src/Editor.cxx
@@ -7510,6 +7510,9 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) {
caret.period = wParam;
break;
+ case SCI_GETWORDCHARS:
+ return pdoc->GetCharsOfClass(CharClassify::ccWord, reinterpret_cast<unsigned char *>(lParam));
+
case SCI_SETWORDCHARS: {
pdoc->SetDefaultCharClasses(false);
if (lParam == 0)
@@ -7518,6 +7521,9 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) {
}
break;
+ case SCI_GETWHITESPACECHARS:
+ return pdoc->GetCharsOfClass(CharClassify::ccSpace, reinterpret_cast<unsigned char *>(lParam));
+
case SCI_SETWHITESPACECHARS: {
if (lParam == 0)
return 0;
@@ -7525,6 +7531,16 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) {
}
break;
+ case SCI_GETPUNCTUATIONCHARS:
+ return pdoc->GetCharsOfClass(CharClassify::ccPunctuation, reinterpret_cast<unsigned char *>(lParam));
+
+ case SCI_SETPUNCTUATIONCHARS: {
+ if (lParam == 0)
+ return 0;
+ pdoc->SetCharClasses(reinterpret_cast<unsigned char *>(lParam), CharClassify::ccPunctuation);
+ }
+ break;
+
case SCI_SETCHARSDEFAULT:
pdoc->SetDefaultCharClasses(true);
break;
diff --git a/test/simpleTests.py b/test/simpleTests.py
index 8e101b84e..d8abbb651 100644
--- a/test/simpleTests.py
+++ b/test/simpleTests.py
@@ -3,7 +3,7 @@
from __future__ import with_statement
from __future__ import unicode_literals
-import ctypes, os, sys, unittest
+import codecs, ctypes, os, sys, unittest
import XiteWin
@@ -1348,6 +1348,133 @@ class TestDirectAccess(unittest.TestCase):
cpBuffer = ctypes.c_char_p(rangePointer)
self.assertEquals(cpBuffer.value, text[1:])
+class TestWordChars(unittest.TestCase):
+ def setUp(self):
+ self.xite = XiteWin.xiteFrame
+ self.ed = self.xite.ed
+ self.ed.ClearAll()
+ self.ed.EmptyUndoBuffer()
+
+ def tearDown(self):
+ self.ed.SetCharsDefault()
+
+ def _setChars(self, charClass, chars):
+ """ Wrapper to call self.ed.Set*Chars with the right type
+ @param charClass {str} the character class, "word", "space", etc.
+ @param chars {iterable of int} characters to set
+ """
+ if sys.version_info.major == 2:
+ # Python 2, use latin-1 encoded str
+ unichars = (unichr(x) for x in chars if x != 0)
+ # can't use literal u"", that's a syntax error in Py3k
+ # uncode() doesn't exist in Py3k, but we never run it there
+ result = unicode("").join(unichars).encode("latin-1")
+ else:
+ # Python 3, use bytes()
+ result = bytes(x for x in chars if x != 0)
+ meth = getattr(self.ed, "Set%sChars" % (charClass.capitalize()))
+ return meth(None, result)
+
+ def assertCharSetsEqual(self, first, second, *args, **kwargs):
+ """ Assert that the two character sets are equal.
+ If either set are an iterable of numbers, convert them to chars
+ first. """
+ first_set = set()
+ for c in first:
+ first_set.add(chr(c) if isinstance(c, int) else c)
+ second_set = set()
+ for c in second:
+ second_set.add(chr(c) if isinstance(c, int) else c)
+ return self.assertEqual(first_set, second_set, *args, **kwargs)
+
+ def testDefaultWordChars(self):
+ # check that the default word chars are as expected
+ import string
+ dataLen = self.ed.GetWordChars(None, None)
+ data = b"\0" * dataLen
+ self.ed.GetWordChars(None, data)
+ self.assertEquals(dataLen, len(data))
+ expected = set(string.digits + string.ascii_letters + '_') | \
+ set(chr(x) for x in range(0x80, 0x100))
+ self.assertCharSetsEqual(data, expected)
+
+ def testDefaultWhitespaceChars(self):
+ # check that the default whitespace chars are as expected
+ import string
+ dataLen = self.ed.GetWhitespaceChars(None, None)
+ data = b"\0" * dataLen
+ self.ed.GetWhitespaceChars(None, data)
+ self.assertEquals(dataLen, len(data))
+ expected = (set(chr(x) for x in (range(0, 0x20))) | set(' ')) - \
+ set(['\r', '\n'])
+ self.assertCharSetsEqual(data, expected)
+
+ def testDefaultPunctuationChars(self):
+ # check that the default punctuation chars are as expected
+ import string
+ dataLen = self.ed.GetPunctuationChars(None, None)
+ data = b"\0" * dataLen
+ self.ed.GetPunctuationChars(None, data)
+ self.assertEquals(dataLen, len(data))
+ expected = set(chr(x) for x in range(0x20, 0x80)) - \
+ set(string.ascii_letters + string.digits + "\r\n_ ")
+ self.assertCharSetsEqual(data, expected)
+
+ def testCustomWordChars(self):
+ # check that setting things to whitespace chars makes them not words
+ self._setChars("whitespace", range(1, 0x100))
+ dataLen = self.ed.GetWordChars(None, None)
+ data = b"\0" * dataLen
+ self.ed.GetWordChars(None, data)
+ self.assertEquals(dataLen, len(data))
+ expected = set()
+ self.assertCharSetsEqual(data, expected)
+ # and now set something to make sure that works too
+ expected = set(range(1, 0x100, 2))
+ self._setChars("word", expected)
+ dataLen = self.ed.GetWordChars(None, None)
+ data = b"\0" * dataLen
+ self.ed.GetWordChars(None, data)
+ self.assertEquals(dataLen, len(data))
+ self.assertCharSetsEqual(data, expected)
+
+ def testCustomWhitespaceChars(self):
+ # check setting whitespace chars to non-default values
+ self._setChars("word", range(1, 0x100))
+ # we can't change chr(0) from being anything but whitespace
+ expected = set([0])
+ dataLen = self.ed.GetWhitespaceChars(None, None)
+ data = b"\0" * dataLen
+ self.ed.GetWhitespaceChars(None, data)
+ self.assertEquals(dataLen, len(data))
+ self.assertCharSetsEqual(data, expected)
+ # now try to set it to something custom
+ expected = set(range(1, 0x100, 2)) | set([0])
+ self._setChars("whitespace", expected)
+ dataLen = self.ed.GetWhitespaceChars(None, None)
+ data = b"\0" * dataLen
+ self.ed.GetWhitespaceChars(None, data)
+ self.assertEquals(dataLen, len(data))
+ self.assertCharSetsEqual(data, expected)
+
+ def testCustomPunctuationChars(self):
+ # check setting punctuation chars to non-default values
+ self._setChars("word", range(1, 0x100))
+ expected = set()
+ dataLen = self.ed.GetPunctuationChars(None, None)
+ data = b"\0" * dataLen
+ self.ed.GetPunctuationChars(None, data)
+ self.assertEquals(dataLen, len(data))
+ self.assertEquals(set(data), expected)
+ # now try to set it to something custom
+ expected = set(range(1, 0x100, 1))
+ self._setChars("punctuation", expected)
+ dataLen = self.ed.GetPunctuationChars(None, None)
+ data = b"\0" * dataLen
+ self.ed.GetPunctuationChars(None, data)
+ self.assertEquals(dataLen, len(data))
+ self.assertCharSetsEqual(data, expected)
+
#~ import os
#~ for x in os.getenv("PATH").split(";"):
#~ n = "scilexer.dll"
diff --git a/test/unit/makefile b/test/unit/makefile
index 35a6fd0ce..220952c6f 100644
--- a/test/unit/makefile
+++ b/test/unit/makefile
@@ -42,7 +42,7 @@ CXXFLAGS += -g -Wall -Wextra -Wno-unused-function
#~ CXXFLAGS += -g -Wall
CASES:=$(addsuffix .o,$(basename $(notdir $(wildcard test*.cxx))))
-TESTEDOBJS=ContractionState.o RunStyles.o
+TESTEDOBJS=ContractionState.o RunStyles.o CharClassify.o
TESTS=$(EXE)
diff --git a/test/unit/testCharClassify.cxx b/test/unit/testCharClassify.cxx
new file mode 100644
index 000000000..8ff98b302
--- /dev/null
+++ b/test/unit/testCharClassify.cxx
@@ -0,0 +1,110 @@
+// Unit Tests for Scintilla internal data structures
+
+#include <string.h>
+
+#include "Platform.h"
+
+#include "CharClassify.h"
+
+#include <gtest/gtest.h>
+
+// Test CharClassify.
+
+class CharClassifyTest : public::testing::Test {
+protected:
+ virtual void SetUp() {
+ pcc = new CharClassify();
+ for (int ch = 0; ch < 256; ch++) {
+ if (ch == '\r' || ch == '\n')
+ charClass[ch] = CharClassify::ccNewLine;
+ else if (ch < 0x20 || ch == ' ')
+ charClass[ch] = CharClassify::ccSpace;
+ else if (ch >= 0x80 || isalnum(ch) || ch == '_')
+ charClass[ch] = CharClassify::ccWord;
+ else
+ charClass[ch] = CharClassify::ccPunctuation;
+ }
+ }
+
+ virtual void TearDown() {
+ delete pcc;
+ pcc = 0;
+ }
+
+ CharClassify *pcc;
+ CharClassify::cc charClass[256];
+
+ static const char* GetClassName(CharClassify::cc charClass) {
+ switch(charClass) {
+ #define CASE(c) case CharClassify::c: return #c
+ CASE(ccSpace);
+ CASE(ccNewLine);
+ CASE(ccWord);
+ CASE(ccPunctuation);
+ #undef CASE
+ default:
+ return "<unknown>";
+ }
+ }
+};
+
+TEST_F(CharClassifyTest, Defaults) {
+ for (int i = 0; i < 256; i++) {
+ EXPECT_EQ(charClass[i], pcc->GetClass(i))
+ << "Character " << i
+ << " should be class " << GetClassName(charClass[i])
+ << ", but got " << GetClassName(pcc->GetClass(i));
+ }
+}
+
+TEST_F(CharClassifyTest, Custom) {
+ unsigned char buf[2] = {0, 0};
+ for (int i = 0; i < 256; i++) {
+ CharClassify::cc thisClass = CharClassify::cc(i % 4);
+ buf[0] = i;
+ pcc->SetCharClasses(buf, thisClass);
+ charClass[i] = thisClass;
+ }
+ for (int i = 0; i < 256; i++) {
+ EXPECT_EQ(charClass[i], pcc->GetClass(i))
+ << "Character " << i
+ << " should be class " << GetClassName(charClass[i])
+ << ", but got " << GetClassName(pcc->GetClass(i));
+ }
+}
+
+TEST_F(CharClassifyTest, CharsOfClass) {
+ unsigned char buf[2] = {0, 0};
+ for (int i = 1; i < 256; i++) {
+ CharClassify::cc thisClass = CharClassify::cc(i % 4);
+ buf[0] = i;
+ pcc->SetCharClasses(buf, thisClass);
+ charClass[i] = thisClass;
+ }
+ for (int classVal = 0; classVal < 4; ++classVal) {
+ CharClassify::cc thisClass = CharClassify::cc(classVal % 4);
+ int size = pcc->GetCharsOfClass(thisClass, NULL);
+ unsigned char* buffer = reinterpret_cast<unsigned char*>(malloc(size + 1));
+ ASSERT_TRUE(buffer);
+ buffer[size] = '\0';
+ pcc->GetCharsOfClass(thisClass, buffer);
+ for (int i = 1; i < 256; i++) {
+ if (charClass[i] == thisClass) {
+ EXPECT_TRUE(memchr(reinterpret_cast<char*>(buffer), i, size))
+ << "Character " << i
+ << " should be class " << GetClassName(thisClass)
+ << ", but was not in GetCharsOfClass;"
+ << " it is reported to be "
+ << GetClassName(pcc->GetClass(i));
+ } else {
+ EXPECT_FALSE(memchr(reinterpret_cast<char*>(buffer), i, size))
+ << "Character " << i
+ << " should not be class " << GetClassName(thisClass)
+ << ", but was in GetCharsOfClass"
+ << " it is reported to be "
+ << GetClassName(pcc->GetClass(i));
+ }
+ }
+ free(buffer);
+ }
+}