diff options
-rw-r--r-- | include/Scintilla.h | 4 | ||||
-rw-r--r-- | include/Scintilla.iface | 14 | ||||
-rw-r--r-- | src/CharClassify.cxx | 16 | ||||
-rw-r--r-- | src/CharClassify.h | 1 | ||||
-rw-r--r-- | src/Document.cxx | 4 | ||||
-rw-r--r-- | src/Document.h | 1 | ||||
-rw-r--r-- | src/Editor.cxx | 16 | ||||
-rw-r--r-- | test/simpleTests.py | 129 | ||||
-rw-r--r-- | test/unit/makefile | 2 | ||||
-rw-r--r-- | test/unit/testCharClassify.cxx | 110 |
10 files changed, 295 insertions, 2 deletions
diff --git a/include/Scintilla.h b/include/Scintilla.h index 8251726dc..3cacd5310 100644 --- a/include/Scintilla.h +++ b/include/Scintilla.h @@ -246,6 +246,7 @@ typedef sptr_t (*SciFnDirect)(sptr_t ptr, unsigned int iMessage, uptr_t wParam, #define SCI_GETCARETPERIOD 2075 #define SCI_SETCARETPERIOD 2076 #define SCI_SETWORDCHARS 2077 +#define SCI_GETWORDCHARS 2646 #define SCI_BEGINUNDOACTION 2078 #define SCI_ENDUNDOACTION 2079 #define INDIC_PLAIN 0 @@ -684,6 +685,9 @@ typedef sptr_t (*SciFnDirect)(sptr_t ptr, unsigned int iMessage, uptr_t wParam, #define SCI_WORDRIGHTEND 2441 #define SCI_WORDRIGHTENDEXTEND 2442 #define SCI_SETWHITESPACECHARS 2443 +#define SCI_GETWHITESPACECHARS 2647 +#define SCI_SETPUNCTUATIONCHARS 2648 +#define SCI_GETPUNCTUATIONCHARS 2649 #define SCI_SETCHARSDEFAULT 2444 #define SCI_AUTOCGETCURRENT 2445 #define SCI_AUTOCGETCURRENTTEXT 2610 diff --git a/include/Scintilla.iface b/include/Scintilla.iface index 7abbb68c1..0dc9eb35a 100644 --- a/include/Scintilla.iface +++ b/include/Scintilla.iface @@ -552,6 +552,10 @@ set void SetCaretPeriod=2076(int periodMilliseconds,) # First sets defaults like SetCharsDefault. set void SetWordChars=2077(, string characters) +# Get the set of characters making up words for when moving or selecting by word. +# Retuns the number of characters +get int GetWordChars=2646(, stringresult characters) + # Start a sequence of actions that is undone and redone as a unit. # May be nested. fun void BeginUndoAction=2078(,) @@ -1800,6 +1804,16 @@ fun void WordRightEndExtend=2442(,) # Should be called after SetWordChars. set void SetWhitespaceChars=2443(, string characters) +# Get the set of characters making up whitespace for when moving or selecting by word. +get void GetWhitespaceChars=2647(, stringresult characters) + +# Set the set of characters making up punctuation characters +# Should be called after SetWordChars. +set void SetPunctuationChars=2648(, string characters) + +# Get the set of characters making up punctuation characters +get void GetPunctuationChars=2649(, stringresult characters) + # Reset the set of characters for whitespace and word characters to the defaults. fun void SetCharsDefault=2444(,) diff --git a/src/CharClassify.cxx b/src/CharClassify.cxx index c16af4547..7e3db737e 100644 --- a/src/CharClassify.cxx +++ b/src/CharClassify.cxx @@ -46,3 +46,19 @@ void CharClassify::SetCharClasses(const unsigned char *chars, cc newCharClass) { } } } + +int CharClassify::GetCharsOfClass(cc characterClass, unsigned char *buffer) { + // Get characters belonging to the given char class; return the number + // of characters (if the buffer is NULL, don't write to it). + int count = 0; + for (int ch = maxChar - 1; ch >= 0; --ch) { + if (charClass[ch] == characterClass) { + ++count; + if (buffer) { + *buffer = static_cast<unsigned char>(ch); + buffer++; + } + } + } + return count; +} diff --git a/src/CharClassify.h b/src/CharClassify.h index e8b798ecb..5d2734c00 100644 --- a/src/CharClassify.h +++ b/src/CharClassify.h @@ -19,6 +19,7 @@ public: enum cc { ccSpace, ccNewLine, ccWord, ccPunctuation }; void SetDefaultCharClasses(bool includeWordClass); void SetCharClasses(const unsigned char *chars, cc newCharClass); + int GetCharsOfClass(cc charClass, unsigned char *buffer); cc GetClass(unsigned char ch) const { return static_cast<cc>(charClass[ch]);} bool IsWord(unsigned char ch) const { return static_cast<cc>(charClass[ch]) == ccWord;} diff --git a/src/Document.cxx b/src/Document.cxx index 244e96e4f..82b9e070b 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -1615,6 +1615,10 @@ void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCh charClass.SetCharClasses(chars, newCharClass); } +int Document::GetCharsOfClass(CharClassify::cc characterClass, unsigned char *buffer) { + return charClass.GetCharsOfClass(characterClass, buffer); +} + void Document::SetStylingBits(int bits) { stylingBits = bits; stylingBitsMask = (1 << stylingBits) - 1; diff --git a/src/Document.h b/src/Document.h index 7e03f3d9e..30c6aee1c 100644 --- a/src/Document.h +++ b/src/Document.h @@ -364,6 +364,7 @@ public: void SetDefaultCharClasses(bool includeWordClass); void SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass); + int GetCharsOfClass(CharClassify::cc charClass, unsigned char *buffer); void SetStylingBits(int bits); void SCI_METHOD StartStyling(int position, char mask); bool SCI_METHOD SetStyleFor(int length, char style); diff --git a/src/Editor.cxx b/src/Editor.cxx index d72ff302c..2bc89ba1f 100644 --- a/src/Editor.cxx +++ b/src/Editor.cxx @@ -7510,6 +7510,9 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) { caret.period = wParam; break; + case SCI_GETWORDCHARS: + return pdoc->GetCharsOfClass(CharClassify::ccWord, reinterpret_cast<unsigned char *>(lParam)); + case SCI_SETWORDCHARS: { pdoc->SetDefaultCharClasses(false); if (lParam == 0) @@ -7518,6 +7521,9 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) { } break; + case SCI_GETWHITESPACECHARS: + return pdoc->GetCharsOfClass(CharClassify::ccSpace, reinterpret_cast<unsigned char *>(lParam)); + case SCI_SETWHITESPACECHARS: { if (lParam == 0) return 0; @@ -7525,6 +7531,16 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) { } break; + case SCI_GETPUNCTUATIONCHARS: + return pdoc->GetCharsOfClass(CharClassify::ccPunctuation, reinterpret_cast<unsigned char *>(lParam)); + + case SCI_SETPUNCTUATIONCHARS: { + if (lParam == 0) + return 0; + pdoc->SetCharClasses(reinterpret_cast<unsigned char *>(lParam), CharClassify::ccPunctuation); + } + break; + case SCI_SETCHARSDEFAULT: pdoc->SetDefaultCharClasses(true); break; diff --git a/test/simpleTests.py b/test/simpleTests.py index 8e101b84e..d8abbb651 100644 --- a/test/simpleTests.py +++ b/test/simpleTests.py @@ -3,7 +3,7 @@ from __future__ import with_statement from __future__ import unicode_literals -import ctypes, os, sys, unittest +import codecs, ctypes, os, sys, unittest import XiteWin @@ -1348,6 +1348,133 @@ class TestDirectAccess(unittest.TestCase): cpBuffer = ctypes.c_char_p(rangePointer) self.assertEquals(cpBuffer.value, text[1:]) +class TestWordChars(unittest.TestCase): + def setUp(self): + self.xite = XiteWin.xiteFrame + self.ed = self.xite.ed + self.ed.ClearAll() + self.ed.EmptyUndoBuffer() + + def tearDown(self): + self.ed.SetCharsDefault() + + def _setChars(self, charClass, chars): + """ Wrapper to call self.ed.Set*Chars with the right type + @param charClass {str} the character class, "word", "space", etc. + @param chars {iterable of int} characters to set + """ + if sys.version_info.major == 2: + # Python 2, use latin-1 encoded str + unichars = (unichr(x) for x in chars if x != 0) + # can't use literal u"", that's a syntax error in Py3k + # uncode() doesn't exist in Py3k, but we never run it there + result = unicode("").join(unichars).encode("latin-1") + else: + # Python 3, use bytes() + result = bytes(x for x in chars if x != 0) + meth = getattr(self.ed, "Set%sChars" % (charClass.capitalize())) + return meth(None, result) + + def assertCharSetsEqual(self, first, second, *args, **kwargs): + """ Assert that the two character sets are equal. + If either set are an iterable of numbers, convert them to chars + first. """ + first_set = set() + for c in first: + first_set.add(chr(c) if isinstance(c, int) else c) + second_set = set() + for c in second: + second_set.add(chr(c) if isinstance(c, int) else c) + return self.assertEqual(first_set, second_set, *args, **kwargs) + + def testDefaultWordChars(self): + # check that the default word chars are as expected + import string + dataLen = self.ed.GetWordChars(None, None) + data = b"\0" * dataLen + self.ed.GetWordChars(None, data) + self.assertEquals(dataLen, len(data)) + expected = set(string.digits + string.ascii_letters + '_') | \ + set(chr(x) for x in range(0x80, 0x100)) + self.assertCharSetsEqual(data, expected) + + def testDefaultWhitespaceChars(self): + # check that the default whitespace chars are as expected + import string + dataLen = self.ed.GetWhitespaceChars(None, None) + data = b"\0" * dataLen + self.ed.GetWhitespaceChars(None, data) + self.assertEquals(dataLen, len(data)) + expected = (set(chr(x) for x in (range(0, 0x20))) | set(' ')) - \ + set(['\r', '\n']) + self.assertCharSetsEqual(data, expected) + + def testDefaultPunctuationChars(self): + # check that the default punctuation chars are as expected + import string + dataLen = self.ed.GetPunctuationChars(None, None) + data = b"\0" * dataLen + self.ed.GetPunctuationChars(None, data) + self.assertEquals(dataLen, len(data)) + expected = set(chr(x) for x in range(0x20, 0x80)) - \ + set(string.ascii_letters + string.digits + "\r\n_ ") + self.assertCharSetsEqual(data, expected) + + def testCustomWordChars(self): + # check that setting things to whitespace chars makes them not words + self._setChars("whitespace", range(1, 0x100)) + dataLen = self.ed.GetWordChars(None, None) + data = b"\0" * dataLen + self.ed.GetWordChars(None, data) + self.assertEquals(dataLen, len(data)) + expected = set() + self.assertCharSetsEqual(data, expected) + # and now set something to make sure that works too + expected = set(range(1, 0x100, 2)) + self._setChars("word", expected) + dataLen = self.ed.GetWordChars(None, None) + data = b"\0" * dataLen + self.ed.GetWordChars(None, data) + self.assertEquals(dataLen, len(data)) + self.assertCharSetsEqual(data, expected) + + def testCustomWhitespaceChars(self): + # check setting whitespace chars to non-default values + self._setChars("word", range(1, 0x100)) + # we can't change chr(0) from being anything but whitespace + expected = set([0]) + dataLen = self.ed.GetWhitespaceChars(None, None) + data = b"\0" * dataLen + self.ed.GetWhitespaceChars(None, data) + self.assertEquals(dataLen, len(data)) + self.assertCharSetsEqual(data, expected) + # now try to set it to something custom + expected = set(range(1, 0x100, 2)) | set([0]) + self._setChars("whitespace", expected) + dataLen = self.ed.GetWhitespaceChars(None, None) + data = b"\0" * dataLen + self.ed.GetWhitespaceChars(None, data) + self.assertEquals(dataLen, len(data)) + self.assertCharSetsEqual(data, expected) + + def testCustomPunctuationChars(self): + # check setting punctuation chars to non-default values + self._setChars("word", range(1, 0x100)) + expected = set() + dataLen = self.ed.GetPunctuationChars(None, None) + data = b"\0" * dataLen + self.ed.GetPunctuationChars(None, data) + self.assertEquals(dataLen, len(data)) + self.assertEquals(set(data), expected) + # now try to set it to something custom + expected = set(range(1, 0x100, 1)) + self._setChars("punctuation", expected) + dataLen = self.ed.GetPunctuationChars(None, None) + data = b"\0" * dataLen + self.ed.GetPunctuationChars(None, data) + self.assertEquals(dataLen, len(data)) + self.assertCharSetsEqual(data, expected) + #~ import os #~ for x in os.getenv("PATH").split(";"): #~ n = "scilexer.dll" diff --git a/test/unit/makefile b/test/unit/makefile index 35a6fd0ce..220952c6f 100644 --- a/test/unit/makefile +++ b/test/unit/makefile @@ -42,7 +42,7 @@ CXXFLAGS += -g -Wall -Wextra -Wno-unused-function #~ CXXFLAGS += -g -Wall CASES:=$(addsuffix .o,$(basename $(notdir $(wildcard test*.cxx)))) -TESTEDOBJS=ContractionState.o RunStyles.o +TESTEDOBJS=ContractionState.o RunStyles.o CharClassify.o TESTS=$(EXE) diff --git a/test/unit/testCharClassify.cxx b/test/unit/testCharClassify.cxx new file mode 100644 index 000000000..8ff98b302 --- /dev/null +++ b/test/unit/testCharClassify.cxx @@ -0,0 +1,110 @@ +// Unit Tests for Scintilla internal data structures + +#include <string.h> + +#include "Platform.h" + +#include "CharClassify.h" + +#include <gtest/gtest.h> + +// Test CharClassify. + +class CharClassifyTest : public::testing::Test { +protected: + virtual void SetUp() { + pcc = new CharClassify(); + for (int ch = 0; ch < 256; ch++) { + if (ch == '\r' || ch == '\n') + charClass[ch] = CharClassify::ccNewLine; + else if (ch < 0x20 || ch == ' ') + charClass[ch] = CharClassify::ccSpace; + else if (ch >= 0x80 || isalnum(ch) || ch == '_') + charClass[ch] = CharClassify::ccWord; + else + charClass[ch] = CharClassify::ccPunctuation; + } + } + + virtual void TearDown() { + delete pcc; + pcc = 0; + } + + CharClassify *pcc; + CharClassify::cc charClass[256]; + + static const char* GetClassName(CharClassify::cc charClass) { + switch(charClass) { + #define CASE(c) case CharClassify::c: return #c + CASE(ccSpace); + CASE(ccNewLine); + CASE(ccWord); + CASE(ccPunctuation); + #undef CASE + default: + return "<unknown>"; + } + } +}; + +TEST_F(CharClassifyTest, Defaults) { + for (int i = 0; i < 256; i++) { + EXPECT_EQ(charClass[i], pcc->GetClass(i)) + << "Character " << i + << " should be class " << GetClassName(charClass[i]) + << ", but got " << GetClassName(pcc->GetClass(i)); + } +} + +TEST_F(CharClassifyTest, Custom) { + unsigned char buf[2] = {0, 0}; + for (int i = 0; i < 256; i++) { + CharClassify::cc thisClass = CharClassify::cc(i % 4); + buf[0] = i; + pcc->SetCharClasses(buf, thisClass); + charClass[i] = thisClass; + } + for (int i = 0; i < 256; i++) { + EXPECT_EQ(charClass[i], pcc->GetClass(i)) + << "Character " << i + << " should be class " << GetClassName(charClass[i]) + << ", but got " << GetClassName(pcc->GetClass(i)); + } +} + +TEST_F(CharClassifyTest, CharsOfClass) { + unsigned char buf[2] = {0, 0}; + for (int i = 1; i < 256; i++) { + CharClassify::cc thisClass = CharClassify::cc(i % 4); + buf[0] = i; + pcc->SetCharClasses(buf, thisClass); + charClass[i] = thisClass; + } + for (int classVal = 0; classVal < 4; ++classVal) { + CharClassify::cc thisClass = CharClassify::cc(classVal % 4); + int size = pcc->GetCharsOfClass(thisClass, NULL); + unsigned char* buffer = reinterpret_cast<unsigned char*>(malloc(size + 1)); + ASSERT_TRUE(buffer); + buffer[size] = '\0'; + pcc->GetCharsOfClass(thisClass, buffer); + for (int i = 1; i < 256; i++) { + if (charClass[i] == thisClass) { + EXPECT_TRUE(memchr(reinterpret_cast<char*>(buffer), i, size)) + << "Character " << i + << " should be class " << GetClassName(thisClass) + << ", but was not in GetCharsOfClass;" + << " it is reported to be " + << GetClassName(pcc->GetClass(i)); + } else { + EXPECT_FALSE(memchr(reinterpret_cast<char*>(buffer), i, size)) + << "Character " << i + << " should not be class " << GetClassName(thisClass) + << ", but was in GetCharsOfClass" + << " it is reported to be " + << GetClassName(pcc->GetClass(i)); + } + } + free(buffer); + } +} |