From 86266d4700632860705fc2d4e88d4be4f5228be1 Mon Sep 17 00:00:00 2001 From: mitchell Date: Tue, 16 Apr 2019 22:50:17 -0400 Subject: Backport: Feature [feature-requests:#1259]. Add SCI_SETCHARACTERCATEGORYOPTIMIZATION API to optimize speed of character category features. Backport of changeset 7392:2832adedd0f4, but with added includes for Sci::clamp(). --- curses/ScintillaCurses.cxx | 1 + doc/ScintillaDoc.html | 11 +++++++++++ doc/ScintillaHistory.html | 6 ++++++ include/Scintilla.h | 2 ++ include/Scintilla.iface | 6 ++++++ lexlib/CharacterCategory.cxx | 33 +++++++++++++++++++++++++++++++++ lexlib/CharacterCategory.h | 17 +++++++++++++++++ scripts/GenerateCharacterCategory.py | 20 +++++++++++++++----- src/Document.cxx | 10 +++++++++- src/Document.h | 3 +++ src/Editor.cxx | 7 +++++++ test/simpleTests.py | 5 +++++ 12 files changed, 115 insertions(+), 6 deletions(-) diff --git a/curses/ScintillaCurses.cxx b/curses/ScintillaCurses.cxx index 1c69801d1..a39ccbec3 100644 --- a/curses/ScintillaCurses.cxx +++ b/curses/ScintillaCurses.cxx @@ -22,6 +22,7 @@ #include "ILoader.h" #include "ILexer.h" #include "Scintilla.h" +#include "CharacterCategory.h" #include "Position.h" #include "UniqueString.h" #include "SplitVector.h" diff --git a/doc/ScintillaDoc.html b/doc/ScintillaDoc.html index 76acc4c71..0bd7c2243 100644 --- a/doc/ScintillaDoc.html +++ b/doc/ScintillaDoc.html @@ -2467,6 +2467,8 @@ struct Sci_TextToFind { SCI_SETPUNCTUATIONCHARS(<unused>, const char *characters)
SCI_GETPUNCTUATIONCHARS(<unused>, char *characters) → int
SCI_SETCHARSDEFAULT
+ SCI_SETCHARACTERCATEGORYOPTIMIZATION(int countCharacters)
+ SCI_GETCHARACTERCATEGORYOPTIMIZATION → int

SCI_WORDENDPOSITION(int pos, bool onlyWordCharacters) → int
SCI_WORDSTARTPOSITION(int pos, bool onlyWordCharacters) → int
@@ -2594,6 +2596,15 @@ struct Sci_TextToFind { characters with codes less than 0x20, with word characters set to alphanumeric and '_'.

+

SCI_SETCHARACTERCATEGORYOPTIMIZATION(int countCharacters)
+ SCI_GETCHARACTERCATEGORYOPTIMIZATION → int
+ Optimize speed of character category features like determining whether a character is a space or number at the expense of memory. + Mostly used for Unicode documents. + The countCharacters parameter determines how many character starting from 0 are added to a look-up table with one byte used for each character. + It is reasonable to cover the set of characters likely to be used in a document so 0x100 for simple Roman text, + 0x1000 to cover most simple alphabets, 0x10000 to cover most of East Asian languages, and 0x110000 to cover all possible characters. +

+

Word keyboard commands are: