diff options
| -rw-r--r-- | include/Scintilla.h | 4 | ||||
| -rw-r--r-- | include/Scintilla.iface | 14 | ||||
| -rw-r--r-- | src/CharClassify.cxx | 16 | ||||
| -rw-r--r-- | src/CharClassify.h | 1 | ||||
| -rw-r--r-- | src/Document.cxx | 4 | ||||
| -rw-r--r-- | src/Document.h | 1 | ||||
| -rw-r--r-- | src/Editor.cxx | 16 | ||||
| -rw-r--r-- | test/simpleTests.py | 129 | ||||
| -rw-r--r-- | test/unit/makefile | 2 | ||||
| -rw-r--r-- | test/unit/testCharClassify.cxx | 110 | 
10 files changed, 295 insertions, 2 deletions
| diff --git a/include/Scintilla.h b/include/Scintilla.h index 8251726dc..3cacd5310 100644 --- a/include/Scintilla.h +++ b/include/Scintilla.h @@ -246,6 +246,7 @@ typedef sptr_t (*SciFnDirect)(sptr_t ptr, unsigned int iMessage, uptr_t wParam,  #define SCI_GETCARETPERIOD 2075  #define SCI_SETCARETPERIOD 2076  #define SCI_SETWORDCHARS 2077 +#define SCI_GETWORDCHARS 2646  #define SCI_BEGINUNDOACTION 2078  #define SCI_ENDUNDOACTION 2079  #define INDIC_PLAIN 0 @@ -684,6 +685,9 @@ typedef sptr_t (*SciFnDirect)(sptr_t ptr, unsigned int iMessage, uptr_t wParam,  #define SCI_WORDRIGHTEND 2441  #define SCI_WORDRIGHTENDEXTEND 2442  #define SCI_SETWHITESPACECHARS 2443 +#define SCI_GETWHITESPACECHARS 2647 +#define SCI_SETPUNCTUATIONCHARS 2648 +#define SCI_GETPUNCTUATIONCHARS 2649  #define SCI_SETCHARSDEFAULT 2444  #define SCI_AUTOCGETCURRENT 2445  #define SCI_AUTOCGETCURRENTTEXT 2610 diff --git a/include/Scintilla.iface b/include/Scintilla.iface index 7abbb68c1..0dc9eb35a 100644 --- a/include/Scintilla.iface +++ b/include/Scintilla.iface @@ -552,6 +552,10 @@ set void SetCaretPeriod=2076(int periodMilliseconds,)  # First sets defaults like SetCharsDefault.  set void SetWordChars=2077(, string characters) +# Get the set of characters making up words for when moving or selecting by word. +# Retuns the number of characters +get int GetWordChars=2646(, stringresult characters) +  # Start a sequence of actions that is undone and redone as a unit.  # May be nested.  fun void BeginUndoAction=2078(,) @@ -1800,6 +1804,16 @@ fun void WordRightEndExtend=2442(,)  # Should be called after SetWordChars.  set void SetWhitespaceChars=2443(, string characters) +# Get the set of characters making up whitespace for when moving or selecting by word. +get void GetWhitespaceChars=2647(, stringresult characters) + +# Set the set of characters making up punctuation characters +# Should be called after SetWordChars. +set void SetPunctuationChars=2648(, string characters) + +# Get the set of characters making up punctuation characters +get void GetPunctuationChars=2649(, stringresult characters) +  # Reset the set of characters for whitespace and word characters to the defaults.  fun void SetCharsDefault=2444(,) diff --git a/src/CharClassify.cxx b/src/CharClassify.cxx index c16af4547..7e3db737e 100644 --- a/src/CharClassify.cxx +++ b/src/CharClassify.cxx @@ -46,3 +46,19 @@ void CharClassify::SetCharClasses(const unsigned char *chars, cc newCharClass) {  		}  	}  } + +int CharClassify::GetCharsOfClass(cc characterClass, unsigned char *buffer) { +	// Get characters belonging to the given char class; return the number +	// of characters (if the buffer is NULL, don't write to it). +	int count = 0; +	for (int ch = maxChar - 1; ch >= 0; --ch) { +		if (charClass[ch] == characterClass) { +			++count; +			if (buffer) { +				*buffer = static_cast<unsigned char>(ch); +				buffer++; +			} +		} +	} +	return count; +} diff --git a/src/CharClassify.h b/src/CharClassify.h index e8b798ecb..5d2734c00 100644 --- a/src/CharClassify.h +++ b/src/CharClassify.h @@ -19,6 +19,7 @@ public:  	enum cc { ccSpace, ccNewLine, ccWord, ccPunctuation };  	void SetDefaultCharClasses(bool includeWordClass);  	void SetCharClasses(const unsigned char *chars, cc newCharClass); +	int GetCharsOfClass(cc charClass, unsigned char *buffer);  	cc GetClass(unsigned char ch) const { return static_cast<cc>(charClass[ch]);}  	bool IsWord(unsigned char ch) const { return static_cast<cc>(charClass[ch]) == ccWord;} diff --git a/src/Document.cxx b/src/Document.cxx index 244e96e4f..82b9e070b 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -1615,6 +1615,10 @@ void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCh      charClass.SetCharClasses(chars, newCharClass);  } +int Document::GetCharsOfClass(CharClassify::cc characterClass, unsigned char *buffer) { +    return charClass.GetCharsOfClass(characterClass, buffer); +} +  void Document::SetStylingBits(int bits) {  	stylingBits = bits;  	stylingBitsMask = (1 << stylingBits) - 1; diff --git a/src/Document.h b/src/Document.h index 7e03f3d9e..30c6aee1c 100644 --- a/src/Document.h +++ b/src/Document.h @@ -364,6 +364,7 @@ public:  	void SetDefaultCharClasses(bool includeWordClass);  	void SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass); +	int GetCharsOfClass(CharClassify::cc charClass, unsigned char *buffer);  	void SetStylingBits(int bits);  	void SCI_METHOD StartStyling(int position, char mask);  	bool SCI_METHOD SetStyleFor(int length, char style); diff --git a/src/Editor.cxx b/src/Editor.cxx index d72ff302c..2bc89ba1f 100644 --- a/src/Editor.cxx +++ b/src/Editor.cxx @@ -7510,6 +7510,9 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) {  		caret.period = wParam;  		break; +	case SCI_GETWORDCHARS: +		return pdoc->GetCharsOfClass(CharClassify::ccWord, reinterpret_cast<unsigned char *>(lParam)); +  	case SCI_SETWORDCHARS: {  			pdoc->SetDefaultCharClasses(false);  			if (lParam == 0) @@ -7518,6 +7521,9 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) {  		}  		break; +	case SCI_GETWHITESPACECHARS: +		return pdoc->GetCharsOfClass(CharClassify::ccSpace, reinterpret_cast<unsigned char *>(lParam)); +  	case SCI_SETWHITESPACECHARS: {  			if (lParam == 0)  				return 0; @@ -7525,6 +7531,16 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) {  		}  		break; +	case SCI_GETPUNCTUATIONCHARS: +		return pdoc->GetCharsOfClass(CharClassify::ccPunctuation, reinterpret_cast<unsigned char *>(lParam)); + +	case SCI_SETPUNCTUATIONCHARS: { +			if (lParam == 0) +				return 0; +			pdoc->SetCharClasses(reinterpret_cast<unsigned char *>(lParam), CharClassify::ccPunctuation); +		} +		break; +  	case SCI_SETCHARSDEFAULT:  		pdoc->SetDefaultCharClasses(true);  		break; diff --git a/test/simpleTests.py b/test/simpleTests.py index 8e101b84e..d8abbb651 100644 --- a/test/simpleTests.py +++ b/test/simpleTests.py @@ -3,7 +3,7 @@  from __future__ import with_statement  from __future__ import unicode_literals -import ctypes, os, sys, unittest +import codecs, ctypes, os, sys, unittest  import XiteWin @@ -1348,6 +1348,133 @@ class TestDirectAccess(unittest.TestCase):  		cpBuffer = ctypes.c_char_p(rangePointer)  		self.assertEquals(cpBuffer.value, text[1:]) +class TestWordChars(unittest.TestCase): +	def setUp(self): +		self.xite = XiteWin.xiteFrame +		self.ed = self.xite.ed +		self.ed.ClearAll() +		self.ed.EmptyUndoBuffer() + +	def tearDown(self): +		self.ed.SetCharsDefault() + +	def _setChars(self, charClass, chars): +		""" Wrapper to call self.ed.Set*Chars with the right type +		@param charClass {str} the character class, "word", "space", etc. +		@param chars {iterable of int} characters to set +		""" +		if sys.version_info.major == 2: +			# Python 2, use latin-1 encoded str +			unichars = (unichr(x) for x in chars if x != 0) +			# can't use literal u"", that's a syntax error in Py3k +			# uncode() doesn't exist in Py3k, but we never run it there +			result = unicode("").join(unichars).encode("latin-1") +		else: +			# Python 3, use bytes() +			result = bytes(x for x in chars if x != 0) +		meth = getattr(self.ed, "Set%sChars" % (charClass.capitalize())) +		return meth(None, result) + +	def assertCharSetsEqual(self, first, second, *args, **kwargs): +		""" Assert that the two character sets are equal. +		If either set are an iterable of numbers, convert them to chars +		first. """ +		first_set = set() +		for c in first: +			first_set.add(chr(c) if isinstance(c, int) else c) +		second_set = set() +		for c in second: +			second_set.add(chr(c) if isinstance(c, int) else c) +		return self.assertEqual(first_set, second_set, *args, **kwargs) + +	def testDefaultWordChars(self): +		# check that the default word chars are as expected +		import string +		dataLen = self.ed.GetWordChars(None, None) +		data = b"\0" * dataLen +		self.ed.GetWordChars(None, data) +		self.assertEquals(dataLen, len(data)) +		expected = set(string.digits + string.ascii_letters + '_') | \ +			set(chr(x) for x in range(0x80, 0x100)) +		self.assertCharSetsEqual(data, expected) + +	def testDefaultWhitespaceChars(self): +		# check that the default whitespace chars are as expected +		import string +		dataLen = self.ed.GetWhitespaceChars(None, None) +		data = b"\0" * dataLen +		self.ed.GetWhitespaceChars(None, data) +		self.assertEquals(dataLen, len(data)) +		expected = (set(chr(x) for x in (range(0, 0x20))) | set(' ')) - \ +			set(['\r', '\n']) +		self.assertCharSetsEqual(data, expected) + +	def testDefaultPunctuationChars(self): +		# check that the default punctuation chars are as expected +		import string +		dataLen = self.ed.GetPunctuationChars(None, None) +		data = b"\0" * dataLen +		self.ed.GetPunctuationChars(None, data) +		self.assertEquals(dataLen, len(data)) +		expected = set(chr(x) for x in range(0x20, 0x80)) - \ +			set(string.ascii_letters + string.digits + "\r\n_ ") +		self.assertCharSetsEqual(data, expected) + +	def testCustomWordChars(self): +		# check that setting things to whitespace chars makes them not words +		self._setChars("whitespace", range(1, 0x100)) +		dataLen = self.ed.GetWordChars(None, None) +		data = b"\0" * dataLen +		self.ed.GetWordChars(None, data) +		self.assertEquals(dataLen, len(data)) +		expected = set() +		self.assertCharSetsEqual(data, expected) +		# and now set something to make sure that works too +		expected = set(range(1, 0x100, 2)) +		self._setChars("word", expected) +		dataLen = self.ed.GetWordChars(None, None) +		data = b"\0" * dataLen +		self.ed.GetWordChars(None, data) +		self.assertEquals(dataLen, len(data)) +		self.assertCharSetsEqual(data, expected) + +	def testCustomWhitespaceChars(self): +		# check setting whitespace chars to non-default values +		self._setChars("word", range(1, 0x100)) +		# we can't change chr(0) from being anything but whitespace +		expected = set([0]) +		dataLen = self.ed.GetWhitespaceChars(None, None) +		data = b"\0" * dataLen +		self.ed.GetWhitespaceChars(None, data) +		self.assertEquals(dataLen, len(data)) +		self.assertCharSetsEqual(data, expected) +		# now try to set it to something custom +		expected = set(range(1, 0x100, 2)) | set([0]) +		self._setChars("whitespace", expected) +		dataLen = self.ed.GetWhitespaceChars(None, None) +		data = b"\0" * dataLen +		self.ed.GetWhitespaceChars(None, data) +		self.assertEquals(dataLen, len(data)) +		self.assertCharSetsEqual(data, expected) + +	def testCustomPunctuationChars(self): +		# check setting punctuation chars to non-default values +		self._setChars("word", range(1, 0x100)) +		expected = set() +		dataLen = self.ed.GetPunctuationChars(None, None) +		data = b"\0" * dataLen +		self.ed.GetPunctuationChars(None, data) +		self.assertEquals(dataLen, len(data)) +		self.assertEquals(set(data), expected) +		# now try to set it to something custom +		expected = set(range(1, 0x100, 1)) +		self._setChars("punctuation", expected) +		dataLen = self.ed.GetPunctuationChars(None, None) +		data = b"\0" * dataLen +		self.ed.GetPunctuationChars(None, data) +		self.assertEquals(dataLen, len(data)) +		self.assertCharSetsEqual(data, expected) +  #~ import os  #~ for x in os.getenv("PATH").split(";"):  	#~ n = "scilexer.dll" diff --git a/test/unit/makefile b/test/unit/makefile index 35a6fd0ce..220952c6f 100644 --- a/test/unit/makefile +++ b/test/unit/makefile @@ -42,7 +42,7 @@ CXXFLAGS += -g -Wall -Wextra -Wno-unused-function  #~ CXXFLAGS += -g -Wall  CASES:=$(addsuffix .o,$(basename $(notdir $(wildcard test*.cxx)))) -TESTEDOBJS=ContractionState.o RunStyles.o +TESTEDOBJS=ContractionState.o RunStyles.o CharClassify.o  TESTS=$(EXE) diff --git a/test/unit/testCharClassify.cxx b/test/unit/testCharClassify.cxx new file mode 100644 index 000000000..8ff98b302 --- /dev/null +++ b/test/unit/testCharClassify.cxx @@ -0,0 +1,110 @@ +// Unit Tests for Scintilla internal data structures + +#include <string.h> + +#include "Platform.h" + +#include "CharClassify.h" + +#include <gtest/gtest.h> + +// Test CharClassify. + +class CharClassifyTest : public::testing::Test { +protected: +	virtual void SetUp() { +		pcc = new CharClassify(); +		for (int ch = 0; ch < 256; ch++) { +			if (ch == '\r' || ch == '\n') +				charClass[ch] = CharClassify::ccNewLine; +			else if (ch < 0x20 || ch == ' ') +				charClass[ch] = CharClassify::ccSpace; +			else if (ch >= 0x80 || isalnum(ch) || ch == '_') +				charClass[ch] = CharClassify::ccWord; +			else +				charClass[ch] = CharClassify::ccPunctuation; +		} +	} + +	virtual void TearDown() { +		delete pcc; +		pcc = 0; +	} + +	CharClassify *pcc; +	CharClassify::cc charClass[256]; + +	static const char* GetClassName(CharClassify::cc charClass) { +		switch(charClass) { +			#define CASE(c) case CharClassify::c: return #c +			CASE(ccSpace); +			CASE(ccNewLine); +			CASE(ccWord); +			CASE(ccPunctuation); +			#undef CASE +			default: +				return "<unknown>"; +		} +	} +}; + +TEST_F(CharClassifyTest, Defaults) { +	for (int i = 0; i < 256; i++) { +		EXPECT_EQ(charClass[i], pcc->GetClass(i)) +			<< "Character " << i +			<< " should be class " << GetClassName(charClass[i]) +			<< ", but got " << GetClassName(pcc->GetClass(i)); +	} +} + +TEST_F(CharClassifyTest, Custom) { +	unsigned char buf[2] = {0, 0}; +	for (int i = 0; i < 256; i++) { +		CharClassify::cc thisClass = CharClassify::cc(i % 4); +		buf[0] = i; +		pcc->SetCharClasses(buf, thisClass); +		charClass[i] = thisClass; +	} +	for (int i = 0; i < 256; i++) { +		EXPECT_EQ(charClass[i], pcc->GetClass(i)) +			<< "Character " << i +			<< " should be class " << GetClassName(charClass[i]) +			<< ", but got " << GetClassName(pcc->GetClass(i)); +	} +} + +TEST_F(CharClassifyTest, CharsOfClass) { +	unsigned char buf[2] = {0, 0}; +	for (int i = 1; i < 256; i++) { +		CharClassify::cc thisClass = CharClassify::cc(i % 4); +		buf[0] = i; +		pcc->SetCharClasses(buf, thisClass); +		charClass[i] = thisClass; +	} +	for (int classVal = 0; classVal < 4; ++classVal) { +		CharClassify::cc thisClass = CharClassify::cc(classVal % 4); +		int size = pcc->GetCharsOfClass(thisClass, NULL); +		unsigned char* buffer = reinterpret_cast<unsigned char*>(malloc(size + 1)); +		ASSERT_TRUE(buffer); +		buffer[size] = '\0'; +		pcc->GetCharsOfClass(thisClass, buffer); +		for (int i = 1; i < 256; i++) { +			if (charClass[i] == thisClass) { +				EXPECT_TRUE(memchr(reinterpret_cast<char*>(buffer), i, size)) +					<< "Character " << i +					<< " should be class " << GetClassName(thisClass) +					<< ", but was not in GetCharsOfClass;" +					<< " it is reported to be " +					<< GetClassName(pcc->GetClass(i)); +			} else { +				EXPECT_FALSE(memchr(reinterpret_cast<char*>(buffer), i, size)) +					<< "Character " << i +					<< " should not be class " << GetClassName(thisClass) +					<< ", but was in GetCharsOfClass" +					<< " it is reported to be " +					<< GetClassName(pcc->GetClass(i)); +			} +		} +		free(buffer); +	} +} | 
