1 files changed, 13 insertions, 2 deletions
diff --git a/lexlib/CharacterCategory.cxx b/lexlib/CharacterCategory.cxx
index a75551ccc..a83776028 100644
--- a/lexlib/CharacterCategory.cxx
+++ b/lexlib/CharacterCategory.cxx
@@ -18,7 +18,7 @@ namespace Scintilla {
 namespace {
 	// Use an unnamed namespace to protect the declarations from name conflicts
 
-static int catRanges[] = {
+const int catRanges[] = {
 //++Autogenerated -- start of section automatically generated
 // Created with Python 3.3.0,  Unicode 6.1.0
 25,
@@ -3275,14 +3275,25 @@ static int catRanges[] = {
 
 const int maxUnicode = 0x10ffff;
 const int maskCategory = 0x1F;
+const int nRanges = sizeof(catRanges) / sizeof(catRanges[0]);
 
 }
 
+// Each element in catRanges is the start of a range of Unicode characters in
+// one general category.
+// The value is comprised of a 21-bit character value shifted 5 bits and a 5 bit
+// category matching the CharacterCategory enumeration.
+// Initial version has 3249 entries and adds about 13K to the executable.
+// The array is in ascending order so can be searched using binary search.
+// Therefore the average call takes log2(3249) = 12 comparisons.
+// For speed, it may be an useful to make a linear table for the common values,
+// possibly for 0..0xff for most Western European text or 0..0xfff for most
+// alphabetic languages.
+
 CharacterCategory CategoriseCharacter(int character) {
 	if (character < 0 || character > maxUnicode)
 		return ccCn;
 	const int baseValue = character * (maskCategory+1) + maskCategory;
-	const int nRanges = sizeof(catRanges) / sizeof(catRanges[0]);
 	const int *placeAfter = std::lower_bound(catRanges, catRanges+nRanges, baseValue);
 	return static_cast<CharacterCategory>(*(placeAfter-1) & maskCategory);
 }