From 13b6f88d9b4710e1c51b36a97c3b45a6441a7820 Mon Sep 17 00:00:00 2001 From: Neil Date: Tue, 27 Apr 2021 10:04:57 +1000 Subject: Rename CharacterSet and CharacterCategory modules in Scintilla to CharacterType and CharacterCategoryMap to avoid clashes with Lexilla when building an executable that includes both. --- src/AutoComplete.cxx | 2 +- src/CharClassify.cxx | 2 +- src/CharacterCategory.cxx | 4103 ----------------------------------------- src/CharacterCategory.h | 50 - src/CharacterCategoryMap.cxx | 4105 ++++++++++++++++++++++++++++++++++++++++++ src/CharacterCategoryMap.h | 52 + src/CharacterSet.cxx | 52 - src/CharacterSet.h | 208 --- src/CharacterType.cxx | 51 + src/CharacterType.h | 112 ++ src/Document.cxx | 4 +- src/EditModel.cxx | 2 +- src/EditView.cxx | 4 +- src/Editor.cxx | 4 +- src/MarginView.cxx | 2 +- src/PositionCache.cxx | 2 +- src/ScintillaBase.cxx | 2 +- 17 files changed, 4332 insertions(+), 4425 deletions(-) delete mode 100644 src/CharacterCategory.cxx delete mode 100644 src/CharacterCategory.h create mode 100644 src/CharacterCategoryMap.cxx create mode 100644 src/CharacterCategoryMap.h delete mode 100644 src/CharacterSet.cxx delete mode 100644 src/CharacterSet.h create mode 100644 src/CharacterType.cxx create mode 100644 src/CharacterType.h (limited to 'src') diff --git a/src/AutoComplete.cxx b/src/AutoComplete.cxx index 779d34965..3de456371 100644 --- a/src/AutoComplete.cxx +++ b/src/AutoComplete.cxx @@ -24,7 +24,7 @@ #include "Platform.h" #include "Scintilla.h" -#include "CharacterSet.h" +#include "CharacterType.h" #include "Position.h" #include "AutoComplete.h" diff --git a/src/CharClassify.cxx b/src/CharClassify.cxx index f82fe75a6..64d720740 100644 --- a/src/CharClassify.cxx +++ b/src/CharClassify.cxx @@ -10,7 +10,7 @@ #include -#include "CharacterSet.h" +#include "CharacterType.h" #include "CharClassify.h" using namespace Scintilla; diff --git a/src/CharacterCategory.cxx b/src/CharacterCategory.cxx deleted file mode 100644 index fdd6c15e7..000000000 --- a/src/CharacterCategory.cxx +++ /dev/null @@ -1,4103 +0,0 @@ -// Scintilla source code edit control -/** @file CharacterCategory.cxx - ** Returns the Unicode general category of a character. - ** Table automatically regenerated by scripts/GenerateCharacterCategory.py - ** Should only be rarely regenerated for new versions of Unicode. - **/ -// Copyright 2013 by Neil Hodgson -// The License.txt file describes the conditions under which this software may be distributed. - -#include -#include -#include - -#include "CharacterCategory.h" - -namespace Scintilla { - -namespace { - // Use an unnamed namespace to protect the declarations from name conflicts - -const int catRanges[] = { -//++Autogenerated -- start of section automatically generated -// Created with Python 3.9.4, Unicode 13.0.0 -25, -1046, -1073, -1171, -1201, -1293, -1326, -1361, -1394, -1425, -1452, -1489, -1544, -1873, -1938, -2033, -2080, -2925, -2961, -2990, -3028, -3051, -3092, -3105, -3949, -3986, -4014, -4050, -4089, -5142, -5169, -5203, -5333, -5361, -5396, -5429, -5444, -5487, -5522, -5562, -5589, -5620, -5653, -5682, -5706, -5780, -5793, -5841, -5908, -5930, -5956, -6000, -6026, -6129, -6144, -6898, -6912, -7137, -7922, -7937, -8192, -8225, -8256, -8289, -8320, -8353, -8384, -8417, -8448, -8481, -8512, -8545, -8576, -8609, -8640, -8673, -8704, -8737, -8768, -8801, -8832, -8865, -8896, -8929, -8960, -8993, -9024, -9057, -9088, -9121, -9152, -9185, -9216, -9249, -9280, -9313, -9344, -9377, -9408, -9441, -9472, -9505, -9536, -9569, -9600, -9633, -9664, -9697, -9728, -9761, -9792, -9825, -9856, -9889, -9920, -9953, -10016, -10049, -10080, -10113, -10144, -10177, -10208, -10241, -10272, -10305, -10336, -10369, -10400, -10433, -10464, -10497, -10560, -10593, -10624, -10657, -10688, -10721, -10752, -10785, -10816, -10849, -10880, -10913, -10944, -10977, -11008, -11041, -11072, -11105, -11136, -11169, -11200, -11233, -11264, -11297, -11328, -11361, -11392, -11425, -11456, -11489, -11520, -11553, -11584, -11617, -11648, -11681, -11712, -11745, -11776, -11809, -11840, -11873, -11904, -11937, -11968, -12001, -12032, -12097, -12128, -12161, -12192, -12225, -12320, -12385, -12416, -12449, -12480, -12545, -12576, -12673, -12736, -12865, -12896, -12961, -12992, -13089, -13184, -13249, -13280, -13345, -13376, -13409, -13440, -13473, -13504, -13569, -13600, -13633, -13696, -13729, -13760, -13825, -13856, -13953, -13984, -14017, -14048, -14113, -14180, -14208, -14241, -14340, -14464, -14498, -14529, -14560, -14594, -14625, -14656, -14690, -14721, -14752, -14785, -14816, -14849, -14880, -14913, -14944, -14977, -15008, -15041, -15072, -15105, -15136, -15169, -15200, -15233, -15296, -15329, -15360, -15393, -15424, -15457, -15488, -15521, -15552, -15585, -15616, -15649, -15680, -15713, -15744, -15777, -15808, -15841, -15904, -15938, -15969, -16000, -16033, -16064, -16161, -16192, -16225, -16256, -16289, -16320, -16353, -16384, -16417, -16448, -16481, -16512, -16545, -16576, -16609, -16640, -16673, -16704, -16737, -16768, -16801, -16832, -16865, -16896, -16929, -16960, -16993, -17024, -17057, -17088, -17121, -17152, -17185, -17216, -17249, -17280, -17313, -17344, -17377, -17408, -17441, -17472, -17505, -17536, -17569, -17600, -17633, -17664, -17697, -17728, -17761, -17792, -17825, -17856, -17889, -17920, -17953, -17984, -18017, -18240, -18305, -18336, -18401, -18464, -18497, -18528, -18657, -18688, -18721, -18752, -18785, -18816, -18849, -18880, -18913, -21124, -21153, -22019, -22612, -22723, -23124, -23555, -23732, -23939, -23988, -24003, -24052, -24581, -28160, -28193, -28224, -28257, -28291, -28340, -28352, -28385, -28445, -28483, -28513, -28625, -28640, -28701, -28820, -28864, -28913, -28928, -29053, -29056, -29117, -29120, -29185, -29216, -29789, -29792, -30081, -31200, -31233, -31296, -31393, -31488, -31521, -31552, -31585, -31616, -31649, -31680, -31713, -31744, -31777, -31808, -31841, -31872, -31905, -31936, -31969, -32000, -32033, -32064, -32097, -32128, -32161, -32192, -32225, -32384, -32417, -32466, -32480, -32513, -32544, -32609, -32672, -34305, -35840, -35873, -35904, -35937, -35968, -36001, -36032, -36065, -36096, -36129, -36160, -36193, -36224, -36257, -36288, -36321, -36352, -36385, -36416, -36449, -36480, -36513, -36544, -36577, -36608, -36641, -36672, -36705, -36736, -36769, -36800, -36833, -36864, -36897, -36949, -36965, -37127, -37184, -37217, -37248, -37281, -37312, -37345, -37376, -37409, -37440, -37473, -37504, -37537, -37568, -37601, -37632, -37665, -37696, -37729, -37760, -37793, -37824, -37857, -37888, -37921, -37952, -37985, -38016, -38049, -38080, -38113, -38144, -38177, -38208, -38241, -38272, -38305, -38336, -38369, -38400, -38433, -38464, -38497, -38528, -38561, -38592, -38625, -38656, -38689, -38720, -38753, -38784, -38817, -38848, -38881, -38912, -38977, -39008, -39041, -39072, -39105, -39136, -39169, -39200, -39233, -39264, -39297, -39328, -39361, -39424, -39457, -39488, -39521, -39552, -39585, -39616, -39649, -39680, -39713, -39744, -39777, -39808, -39841, -39872, -39905, -39936, -39969, -40000, -40033, -40064, -40097, -40128, -40161, -40192, -40225, -40256, -40289, -40320, -40353, -40384, -40417, -40448, -40481, -40512, -40545, -40576, -40609, -40640, -40673, -40704, -40737, -40768, -40801, -40832, -40865, -40896, -40929, -40960, -40993, -41024, -41057, -41088, -41121, -41152, -41185, -41216, -41249, -41280, -41313, -41344, -41377, -41408, -41441, -41472, -41505, -41536, -41569, -41600, -41633, -41664, -41697, -41728, -41761, -41792, -41825, -41856, -41889, -41920, -41953, -41984, -42017, -42048, -42081, -42112, -42145, -42176, -42209, -42240, -42273, -42304, -42337, -42368, -42401, -42432, -42465, -42525, -42528, -43773, -43811, -43857, -44033, -45361, -45388, -45437, -45493, -45555, -45597, -45605, -47052, -47077, -47121, -47141, -47217, -47237, -47313, -47333, -47389, -47620, -48509, -48612, -48753, -48829, -49178, -49362, -49457, -49523, -49553, -49621, -49669, -50033, -50074, -50109, -50129, -50180, -51203, -51236, -51557, -52232, -52561, -52676, -52741, -52772, -55953, -55972, -56005, -56250, -56277, -56293, -56483, -56549, -56629, -56645, -56772, -56840, -57156, -57269, -57316, -57361, -57821, -57850, -57860, -57893, -57924, -58885, -59773, -59812, -62661, -63012, -63069, -63496, -63812, -64869, -65155, -65237, -65265, -65347, -65405, -65445, -65491, -65540, -66245, -66371, -66405, -66691, -66725, -66819, -66853, -67037, -67089, -67581, -67588, -68389, -68509, -68561, -68605, -68612, -68989, -70660, -71357, -71364, -71965, -72293, -72794, -72805, -73830, -73860, -75589, -75622, -75653, -75684, -75718, -75813, -76070, -76197, -76230, -76292, -76325, -76548, -76869, -76945, -77000, -77329, -77347, -77380, -77861, -77894, -77981, -77988, -78269, -78308, -78397, -78436, -79165, -79172, -79421, -79428, -79485, -79556, -79709, -79749, -79780, -79814, -79909, -80061, -80102, -80189, -80230, -80293, -80324, -80381, -80614, -80669, -80772, -80861, -80868, -80965, -81053, -81096, -81412, -81491, -81546, -81749, -81779, -81796, -81841, -81861, -81917, -81957, -82022, -82077, -82084, -82301, -82404, -82493, -82532, -83261, -83268, -83517, -83524, -83613, -83620, -83709, -83716, -83805, -83845, -83901, -83910, -84005, -84093, -84197, -84285, -84325, -84445, -84517, -84573, -84772, -84925, -84932, -84989, -85192, -85509, -85572, -85669, -85713, -85757, -86053, -86118, -86173, -86180, -86493, -86500, -86621, -86628, -87357, -87364, -87613, -87620, -87709, -87716, -87901, -87941, -87972, -88006, -88101, -88285, -88293, -88358, -88413, -88422, -88485, -88541, -88580, -88637, -89092, -89157, -89245, -89288, -89617, -89651, -89693, -89892, -89925, -90141, -90149, -90182, -90269, -90276, -90557, -90596, -90685, -90724, -91453, -91460, -91709, -91716, -91805, -91812, -91997, -92037, -92068, -92102, -92133, -92166, -92197, -92349, -92390, -92477, -92518, -92581, -92637, -92837, -92902, -92957, -93060, -93149, -93156, -93253, -93341, -93384, -93717, -93732, -93770, -93981, -94277, -94308, -94365, -94372, -94589, -94660, -94781, -94788, -94941, -95012, -95101, -95108, -95165, -95172, -95261, -95332, -95421, -95492, -95613, -95684, -96093, -96198, -96261, -96294, -96381, -96454, -96573, -96582, -96677, -96733, -96772, -96829, -96998, -97053, -97480, -97802, -97909, -98099, -98133, -98173, -98309, -98342, -98437, -98468, -98749, -98756, -98877, -98884, -99645, -99652, -100189, -100260, -100293, -100390, -100541, -100549, -100669, -100677, -100829, -101029, -101117, -101124, -101245, -101380, -101445, -101533, -101576, -101917, -102129, -102154, -102389, -102404, -102437, -102470, -102545, -102564, -102845, -102852, -102973, -102980, -103741, -103748, -104093, -104100, -104285, -104325, -104356, -104390, -104421, -104454, -104637, -104645, -104678, -104765, -104774, -104837, -104925, -105126, -105213, -105412, -105469, -105476, -105541, -105629, -105672, -106013, -106020, -106109, -106501, -106566, -106628, -106941, -106948, -107069, -107076, -108389, -108452, -108486, -108581, -108733, -108742, -108861, -108870, -108965, -108996, -109045, -109085, -109188, -109286, -109322, -109540, -109637, -109725, -109768, -110090, -110389, -110404, -110621, -110629, -110662, -110749, -110756, -111357, -111428, -112221, -112228, -112541, -112548, -112605, -112644, -112893, -112965, -113021, -113126, -113221, -113341, -113349, -113405, -113414, -113693, -113864, -114205, -114246, -114321, -114365, -114724, -116261, -116292, -116357, -116605, -116723, -116740, -116931, -116965, -117233, -117256, -117585, -117661, -118820, -118909, -118916, -118973, -118980, -119165, -119172, -119965, -119972, -120029, -120036, -120357, -120388, -120453, -120740, -120797, -120836, -121021, -121027, -121085, -121093, -121309, -121352, -121693, -121732, -121885, -122884, -122933, -123025, -123509, -123537, -123573, -123653, -123733, -123912, -124234, -124565, -124581, -124629, -124645, -124693, -124709, -124749, -124782, -124813, -124846, -124870, -124932, -125213, -125220, -126397, -126501, -126950, -126981, -127153, -127173, -127236, -127397, -127773, -127781, -128957, -128981, -129221, -129269, -129469, -129493, -129553, -129717, -129841, -129917, -131076, -132454, -132517, -132646, -132677, -132870, -132901, -132966, -133029, -133092, -133128, -133457, -133636, -133830, -133893, -133956, -134085, -134180, -134214, -134308, -134374, -134596, -134693, -134820, -135237, -135270, -135333, -135398, -135589, -135620, -135654, -135688, -136006, -136101, -136149, -136192, -137437, -137440, -137501, -137632, -137693, -137729, -139121, -139139, -139169, -139268, -149821, -149828, -149981, -150020, -150269, -150276, -150333, -150340, -150493, -150532, -151869, -151876, -152029, -152068, -153149, -153156, -153309, -153348, -153597, -153604, -153661, -153668, -153821, -153860, -154365, -154372, -156221, -156228, -156381, -156420, -158589, -158629, -158737, -159018, -159677, -159748, -160277, -160605, -160768, -163549, -163585, -163805, -163852, -163876, -183733, -183761, -183780, -184342, -184356, -185197, -185230, -185277, -185348, -187761, -187849, -187940, -188221, -188420, -188861, -188868, -188997, -189117, -189444, -190021, -190129, -190205, -190468, -191045, -191133, -191492, -191933, -191940, -192061, -192069, -192157, -192516, -194181, -194246, -194277, -194502, -194757, -194790, -194853, -195217, -195299, -195345, -195443, -195460, -195493, -195549, -195592, -195933, -196106, -196445, -196625, -196812, -196849, -196965, -197082, -197117, -197128, -197469, -197636, -198755, -198788, -200509, -200708, -200869, -200932, -202021, -202052, -202109, -202244, -204509, -204804, -205821, -205829, -205926, -206053, -206118, -206237, -206342, -206405, -206438, -206629, -206749, -206869, -206909, -206993, -207048, -207364, -208349, -208388, -208573, -208900, -210333, -210436, -211293, -211464, -211786, -211837, -211925, -212996, -213733, -213798, -213861, -213917, -213969, -214020, -215718, -215749, -215782, -215813, -216061, -216069, -216102, -216133, -216166, -216229, -216486, -216677, -217021, -217061, -217096, -217437, -217608, -217949, -218129, -218339, -218385, -218589, -218629, -219079, -219109, -219197, -221189, -221318, -221348, -222853, -222886, -222917, -223078, -223109, -223142, -223301, -223334, -223396, -223645, -223752, -224081, -224309, -224613, -224917, -225213, -225285, -225350, -225380, -226342, -226373, -226502, -226565, -226630, -226661, -226756, -226824, -227140, -228549, -228582, -228613, -228678, -228773, -228806, -228837, -228934, -229021, -229265, -229380, -230534, -230789, -231046, -231109, -231197, -231281, -231432, -231773, -231844, -231944, -232260, -233219, -233425, -233473, -233789, -233984, -235389, -235424, -235537, -235805, -236037, -236145, -236165, -236582, -236613, -236836, -236965, -236996, -237189, -237220, -237286, -237317, -237380, -237437, -237569, -238979, -240993, -241411, -241441, -242531, -243717, -245597, -245605, -245760, -245793, -245824, -245857, -245888, -245921, -245952, -245985, -246016, -246049, -246080, -246113, -246144, -246177, -246208, -246241, -246272, -246305, -246336, -246369, -246400, -246433, -246464, -246497, -246528, -246561, -246592, -246625, -246656, -246689, -246720, -246753, -246784, -246817, -246848, -246881, -246912, -246945, -246976, -247009, -247040, -247073, -247104, -247137, -247168, -247201, -247232, -247265, -247296, -247329, -247360, -247393, -247424, -247457, -247488, -247521, -247552, -247585, -247616, -247649, -247680, -247713, -247744, -247777, -247808, -247841, -247872, -247905, -247936, -247969, -248000, -248033, -248064, -248097, -248128, -248161, -248192, -248225, -248256, -248289, -248320, -248353, -248384, -248417, -248448, -248481, -248512, -248545, -248576, -248609, -248640, -248673, -248704, -248737, -248768, -248801, -248832, -248865, -248896, -248929, -248960, -248993, -249024, -249057, -249088, -249121, -249152, -249185, -249216, -249249, -249280, -249313, -249344, -249377, -249408, -249441, -249472, -249505, -249536, -249569, -249600, -249633, -249664, -249697, -249728, -249761, -249792, -249825, -249856, -249889, -249920, -249953, -249984, -250017, -250048, -250081, -250112, -250145, -250176, -250209, -250240, -250273, -250304, -250337, -250368, -250401, -250432, -250465, -250496, -250529, -250816, -250849, -250880, -250913, -250944, -250977, -251008, -251041, -251072, -251105, -251136, -251169, -251200, -251233, -251264, -251297, -251328, -251361, -251392, -251425, -251456, -251489, -251520, -251553, -251584, -251617, -251648, -251681, -251712, -251745, -251776, -251809, -251840, -251873, -251904, -251937, -251968, -252001, -252032, -252065, -252096, -252129, -252160, -252193, -252224, -252257, -252288, -252321, -252352, -252385, -252416, -252449, -252480, -252513, -252544, -252577, -252608, -252641, -252672, -252705, -252736, -252769, -252800, -252833, -252864, -252897, -252928, -252961, -252992, -253025, -253056, -253089, -253120, -253153, -253184, -253217, -253248, -253281, -253312, -253345, -253376, -253409, -253440, -253473, -253504, -253537, -253568, -253601, -253632, -253665, -253696, -253729, -253760, -253793, -253824, -253857, -253888, -253921, -254208, -254465, -254685, -254720, -254941, -254977, -255232, -255489, -255744, -256001, -256221, -256256, -256477, -256513, -256797, -256800, -256861, -256864, -256925, -256928, -256989, -256992, -257025, -257280, -257537, -258013, -258049, -258306, -258561, -258818, -259073, -259330, -259585, -259773, -259777, -259840, -259970, -260020, -260033, -260084, -260161, -260285, -260289, -260352, -260482, -260532, -260609, -260765, -260801, -260864, -261021, -261044, -261121, -261376, -261556, -261661, -261697, -261821, -261825, -261888, -262018, -262068, -262141, -262166, -262522, -262668, -262865, -262927, -262960, -262989, -263023, -263088, -263117, -263151, -263185, -263447, -263480, -263514, -263670, -263697, -263983, -264016, -264049, -264171, -264241, -264338, -264365, -264398, -264433, -264786, -264817, -264843, -264881, -265206, -265242, -265405, -265434, -265738, -265763, -265821, -265866, -266066, -266157, -266190, -266211, -266250, -266578, -266669, -266702, -266749, -266755, -267197, -267283, -268317, -268805, -269223, -269349, -269383, -269477, -269885, -270357, -270400, -270453, -270560, -270613, -270657, -270688, -270785, -270848, -270945, -270997, -271008, -271061, -271122, -271136, -271317, -271488, -271541, -271552, -271605, -271616, -271669, -271680, -271829, -271841, -271872, -272001, -272036, -272161, -272213, -272257, -272320, -272402, -272544, -272577, -272725, -272754, -272789, -272833, -272885, -272906, -273417, -274528, -274561, -274601, -274730, -274773, -274845, -274962, -275125, -275282, -275349, -275474, -275509, -275570, -275605, -275666, -275701, -275922, -275957, -276946, -277013, -277074, -277109, -277138, -277173, -278162, -286741, -286989, -287022, -287053, -287086, -287125, -287762, -287829, -288045, -288078, -288117, -290706, -290741, -291698, -292501, -293778, -293973, -296189, -296981, -297341, -297994, -299925, -302410, -303125, -308978, -309013, -309298, -309333, -311058, -311317, -314866, -314901, -322829, -322862, -322893, -322926, -322957, -322990, -323021, -323054, -323085, -323118, -323149, -323182, -323213, -323246, -323274, -324245, -325650, -325805, -325838, -325874, -326861, -326894, -326925, -326958, -326989, -327022, -327053, -327086, -327117, -327150, -327186, -327701, -335890, -340077, -340110, -340141, -340174, -340205, -340238, -340269, -340302, -340333, -340366, -340397, -340430, -340461, -340494, -340525, -340558, -340589, -340622, -340653, -340686, -340717, -340750, -340786, -342797, -342830, -342861, -342894, -342930, -343949, -343982, -344018, -352277, -353810, -354485, -354546, -354741, -355997, -356053, -357085, -357109, -360448, -361981, -361985, -363517, -363520, -363553, -363584, -363681, -363744, -363777, -363808, -363841, -363872, -363905, -363936, -364065, -364096, -364129, -364192, -364225, -364419, -364480, -364577, -364608, -364641, -364672, -364705, -364736, -364769, -364800, -364833, -364864, -364897, -364928, -364961, -364992, -365025, -365056, -365089, -365120, -365153, -365184, -365217, -365248, -365281, -365312, -365345, -365376, -365409, -365440, -365473, -365504, -365537, -365568, -365601, -365632, -365665, -365696, -365729, -365760, -365793, -365824, -365857, -365888, -365921, -365952, -365985, -366016, -366049, -366080, -366113, -366144, -366177, -366208, -366241, -366272, -366305, -366336, -366369, -366400, -366433, -366464, -366497, -366528, -366561, -366592, -366625, -366656, -366689, -366720, -366753, -366784, -366817, -366848, -366881, -366912, -366945, -366976, -367009, -367040, -367073, -367104, -367137, -367168, -367201, -367232, -367265, -367296, -367329, -367360, -367393, -367424, -367457, -367488, -367521, -367552, -367585, -367616, -367649, -367680, -367713, -367797, -367968, -368001, -368032, -368065, -368101, -368192, -368225, -368285, -368433, -368554, -368593, -368641, -369885, -369889, -369949, -370081, -370141, -370180, -371997, -372195, -372241, -372285, -372709, -372740, -373501, -373764, -374013, -374020, -374269, -374276, -374525, -374532, -374781, -374788, -375037, -375044, -375293, -375300, -375549, -375556, -375805, -375813, -376849, -376911, -376944, -376975, -377008, -377041, -377135, -377168, -377201, -377231, -377264, -377297, -377580, -377617, -377676, -377713, -377743, -377776, -377809, -377871, -377904, -377933, -377966, -377997, -378030, -378061, -378094, -378125, -378158, -378193, -378339, -378385, -378700, -378769, -378892, -378929, -378957, -378993, -379413, -379473, -379517, -380949, -381789, -381813, -384669, -385045, -391901, -392725, -393117, -393238, -393265, -393365, -393379, -393412, -393449, -393485, -393518, -393549, -393582, -393613, -393646, -393677, -393710, -393741, -393774, -393813, -393869, -393902, -393933, -393966, -393997, -394030, -394061, -394094, -394124, -394157, -394190, -394261, -394281, -394565, -394694, -394764, -394787, -394965, -395017, -395107, -395140, -395185, -395221, -395293, -395300, -398077, -398117, -398196, -398243, -398308, -398348, -398372, -401265, -401283, -401380, -401437, -401572, -402973, -402980, -406013, -406037, -406090, -406229, -406532, -407573, -408733, -409092, -409621, -410621, -410634, -410965, -411914, -412181, -412202, -412693, -413706, -414037, -415274, -415765, -425988, -636949, -638980, -1310653, -1310724, -1311395, -1311428, -1348029, -1348117, -1349885, -1350148, -1351427, -1351633, -1351684, -1360259, -1360305, -1360388, -1360904, -1361220, -1361309, -1361920, -1361953, -1361984, -1362017, -1362048, -1362081, -1362112, -1362145, -1362176, -1362209, -1362240, -1362273, -1362304, -1362337, -1362368, -1362401, -1362432, -1362465, -1362496, -1362529, -1362560, -1362593, -1362624, -1362657, -1362688, -1362721, -1362752, -1362785, -1362816, -1362849, -1362880, -1362913, -1362944, -1362977, -1363008, -1363041, -1363072, -1363105, -1363136, -1363169, -1363200, -1363233, -1363264, -1363297, -1363328, -1363361, -1363396, -1363429, -1363463, -1363569, -1363589, -1363921, -1363939, -1363968, -1364001, -1364032, -1364065, -1364096, -1364129, -1364160, -1364193, -1364224, -1364257, -1364288, -1364321, -1364352, -1364385, -1364416, -1364449, -1364480, -1364513, -1364544, -1364577, -1364608, -1364641, -1364672, -1364705, -1364736, -1364769, -1364800, -1364833, -1364867, -1364933, -1364996, -1367241, -1367557, -1367633, -1367837, -1368084, -1368803, -1369108, -1369152, -1369185, -1369216, -1369249, -1369280, -1369313, -1369344, -1369377, -1369408, -1369441, -1369472, -1369505, -1369536, -1369569, -1369664, -1369697, -1369728, -1369761, -1369792, -1369825, -1369856, -1369889, -1369920, -1369953, -1369984, -1370017, -1370048, -1370081, -1370112, -1370145, -1370176, -1370209, -1370240, -1370273, -1370304, -1370337, -1370368, -1370401, -1370432, -1370465, -1370496, -1370529, -1370560, -1370593, -1370624, -1370657, -1370688, -1370721, -1370752, -1370785, -1370816, -1370849, -1370880, -1370913, -1370944, -1370977, -1371008, -1371041, -1371072, -1371105, -1371136, -1371169, -1371200, -1371233, -1371264, -1371297, -1371328, -1371361, -1371392, -1371425, -1371456, -1371489, -1371520, -1371553, -1371584, -1371617, -1371651, -1371681, -1371936, -1371969, -1372000, -1372033, -1372064, -1372129, -1372160, -1372193, -1372224, -1372257, -1372288, -1372321, -1372352, -1372385, -1372419, -1372468, -1372512, -1372545, -1372576, -1372609, -1372644, -1372672, -1372705, -1372736, -1372769, -1372864, -1372897, -1372928, -1372961, -1372992, -1373025, -1373056, -1373089, -1373120, -1373153, -1373184, -1373217, -1373248, -1373281, -1373312, -1373345, -1373376, -1373409, -1373440, -1373473, -1373504, -1373665, -1373696, -1373857, -1373888, -1373921, -1373952, -1373985, -1374016, -1374049, -1374080, -1374113, -1374144, -1374177, -1374237, -1374272, -1374305, -1374336, -1374465, -1374496, -1374529, -1374589, -1375904, -1375937, -1375972, -1376003, -1376065, -1376100, -1376325, -1376356, -1376453, -1376484, -1376613, -1376644, -1377382, -1377445, -1377510, -1377557, -1377669, -1377725, -1377802, -1378005, -1378067, -1378101, -1378141, -1378308, -1379985, -1380125, -1380358, -1380420, -1382022, -1382533, -1382621, -1382865, -1382920, -1383261, -1383429, -1384004, -1384209, -1384292, -1384337, -1384356, -1384421, -1384456, -1384772, -1385669, -1385937, -1385988, -1386725, -1387078, -1387165, -1387505, -1387524, -1388477, -1388549, -1388646, -1388676, -1390181, -1390214, -1390277, -1390406, -1390469, -1390534, -1390641, -1391069, -1391075, -1391112, -1391453, -1391569, -1391620, -1391781, -1391811, -1391844, -1392136, -1392452, -1392637, -1392644, -1393957, -1394150, -1394213, -1394278, -1394341, -1394429, -1394692, -1394789, -1394820, -1395077, -1395110, -1395165, -1395208, -1395549, -1395601, -1395716, -1396227, -1396260, -1396469, -1396548, -1396582, -1396613, -1396646, -1396676, -1398277, -1398308, -1398341, -1398436, -1398501, -1398564, -1398725, -1398788, -1398821, -1398852, -1398909, -1399652, -1399715, -1399761, -1399812, -1400166, -1400197, -1400262, -1400337, -1400388, -1400419, -1400486, -1400517, -1400573, -1400868, -1401085, -1401124, -1401341, -1401380, -1401597, -1401860, -1402109, -1402116, -1402365, -1402369, -1403764, -1403779, -1403905, -1404195, -1404244, -1404317, -1404417, -1406980, -1408102, -1408165, -1408198, -1408261, -1408294, -1408369, -1408390, -1408421, -1408477, -1408520, -1408861, -1409028, -1766557, -1766916, -1767677, -1767780, -1769373, -1769499, -1835036, -2039812, -2051549, -2051588, -2055005, -2056193, -2056445, -2056801, -2056989, -2057124, -2057157, -2057188, -2057522, -2057540, -2057981, -2057988, -2058173, -2058180, -2058237, -2058244, -2058333, -2058340, -2058429, -2058436, -2061908, -2062429, -2062948, -2074574, -2074605, -2074653, -2075140, -2077213, -2077252, -2079005, -2080260, -2080659, -2080693, -2080733, -2080773, -2081297, -2081517, -2081550, -2081585, -2081629, -2081797, -2082321, -2082348, -2082411, -2082477, -2082510, -2082541, -2082574, -2082605, -2082638, -2082669, -2082702, -2082733, -2082766, -2082797, -2082830, -2082861, -2082894, -2082925, -2082958, -2082993, -2083053, -2083086, -2083121, -2083243, -2083345, -2083453, -2083473, -2083596, -2083629, -2083662, -2083693, -2083726, -2083757, -2083790, -2083825, -2083922, -2083948, -2083986, -2084093, -2084113, -2084147, -2084177, -2084253, -2084356, -2084541, -2084548, -2088893, -2088954, -2088989, -2089009, -2089107, -2089137, -2089229, -2089262, -2089297, -2089330, -2089361, -2089388, -2089425, -2089480, -2089809, -2089874, -2089969, -2090016, -2090861, -2090897, -2090926, -2090964, -2090987, -2091028, -2091041, -2091885, -2091922, -2091950, -2091986, -2092013, -2092046, -2092081, -2092109, -2092142, -2092177, -2092228, -2092547, -2092580, -2094019, -2094084, -2095101, -2095172, -2095389, -2095428, -2095645, -2095684, -2095901, -2095940, -2096061, -2096147, -2096210, -2096244, -2096277, -2096307, -2096381, -2096405, -2096434, -2096565, -2096637, -2096954, -2097045, -2097117, -2097156, -2097565, -2097572, -2098429, -2098436, -2099069, -2099076, -2099165, -2099172, -2099677, -2099716, -2100189, -2101252, -2105213, -2105361, -2105469, -2105578, -2107037, -2107125, -2107401, -2109098, -2109237, -2109770, -2109845, -2109949, -2109973, -2110397, -2110485, -2110525, -2112021, -2113445, -2113501, -2117636, -2118589, -2118660, -2120253, -2120709, -2120746, -2121629, -2121732, -2122762, -2122909, -2123172, -2123817, -2123844, -2124105, -2124157, -2124292, -2125509, -2125693, -2125828, -2126813, -2126833, -2126852, -2128029, -2128132, -2128401, -2128425, -2128605, -2129920, -2131201, -2132484, -2135005, -2135048, -2135389, -2135552, -2136733, -2136833, -2138013, -2138116, -2139421, -2139652, -2141341, -2141681, -2141725, -2146308, -2156285, -2156548, -2157277, -2157572, -2157853, -2162692, -2162909, -2162948, -2163005, -2163012, -2164445, -2164452, -2164541, -2164612, -2164669, -2164708, -2165469, -2165489, -2165514, -2165764, -2166517, -2166570, -2166788, -2167805, -2168042, -2168349, -2169860, -2170493, -2170500, -2170589, -2170730, -2170884, -2171594, -2171805, -2171889, -2171908, -2172765, -2172913, -2172957, -2174980, -2176797, -2176906, -2176964, -2177034, -2177565, -2177610, -2179076, -2179109, -2179229, -2179237, -2179325, -2179461, -2179588, -2179741, -2179748, -2179869, -2179876, -2180829, -2180869, -2180989, -2181093, -2181130, -2181437, -2181649, -2181949, -2182148, -2183082, -2183153, -2183172, -2184106, -2184221, -2185220, -2185493, -2185508, -2186405, -2186493, -2186602, -2186769, -2187005, -2187268, -2189021, -2189105, -2189316, -2190045, -2190090, -2190340, -2190973, -2191114, -2191364, -2191965, -2192177, -2192317, -2192682, -2192925, -2195460, -2197821, -2199552, -2201213, -2201601, -2203261, -2203466, -2203652, -2204805, -2204957, -2205192, -2205533, -2214922, -2215933, -2215940, -2217309, -2217317, -2217388, -2217437, -2217476, -2217565, -2220036, -2220970, -2221284, -2221341, -2221572, -2222277, -2222634, -2222769, -2222941, -2225668, -2226346, -2226589, -2227204, -2227965, -2228230, -2228261, -2228294, -2228324, -2230021, -2230513, -2230749, -2230858, -2231496, -2231837, -2232293, -2232390, -2232420, -2233862, -2233957, -2234086, -2234149, -2234225, -2234298, -2234321, -2234461, -2234810, -2234845, -2234884, -2235709, -2235912, -2236253, -2236421, -2236516, -2237669, -2237830, -2237861, -2238141, -2238152, -2238481, -2238596, -2238630, -2238692, -2238749, -2238980, -2240101, -2240145, -2240196, -2240253, -2240517, -2240582, -2240612, -2242150, -2242245, -2242534, -2242596, -2242737, -2242853, -2242993, -2243014, -2243045, -2243080, -2243396, -2243441, -2243460, -2243505, -2243613, -2243626, -2244285, -2244612, -2245213, -2245220, -2246022, -2246117, -2246214, -2246277, -2246310, -2246341, -2246417, -2246597, -2246653, -2248708, -2248957, -2248964, -2249021, -2249028, -2249181, -2249188, -2249693, -2249700, -2250033, -2250077, -2250244, -2251749, -2251782, -2251877, -2252157, -2252296, -2252637, -2252805, -2252870, -2252957, -2252964, -2253245, -2253284, -2253373, -2253412, -2254141, -2254148, -2254397, -2254404, -2254493, -2254500, -2254685, -2254693, -2254756, -2254790, -2254853, -2254886, -2255037, -2255078, -2255165, -2255206, -2255325, -2255364, -2255421, -2255590, -2255645, -2255780, -2255942, -2256029, -2256069, -2256317, -2256389, -2256573, -2260996, -2262694, -2262789, -2263046, -2263109, -2263206, -2263237, -2263268, -2263409, -2263560, -2263889, -2263965, -2263985, -2264005, -2264036, -2264157, -2265092, -2266630, -2266725, -2266918, -2266949, -2266982, -2267109, -2267174, -2267205, -2267268, -2267345, -2267364, -2267421, -2267656, -2267997, -2273284, -2274790, -2274885, -2275037, -2275078, -2275205, -2275270, -2275301, -2275377, -2276100, -2276229, -2276317, -2277380, -2278918, -2279013, -2279270, -2279333, -2279366, -2279397, -2279473, -2279556, -2279613, -2279944, -2280285, -2280465, -2280893, -2281476, -2282853, -2282886, -2282917, -2282950, -2283013, -2283206, -2283237, -2283268, -2283325, -2283528, -2283869, -2285572, -2286461, -2286501, -2286598, -2286661, -2286790, -2286821, -2287005, -2287112, -2287434, -2287505, -2287605, -2287645, -2293764, -2295174, -2295269, -2295558, -2295589, -2295665, -2295709, -2298880, -2299905, -2300936, -2301258, -2301565, -2301924, -2302205, -2302244, -2302301, -2302340, -2302621, -2302628, -2302717, -2302724, -2303494, -2303709, -2303718, -2303805, -2303845, -2303910, -2303941, -2303972, -2304006, -2304036, -2304070, -2304101, -2304145, -2304253, -2304520, -2304861, -2307076, -2307357, -2307396, -2308646, -2308741, -2308893, -2308933, -2308998, -2309125, -2309156, -2309201, -2309220, -2309254, -2309309, -2310148, -2310181, -2310500, -2311781, -2311974, -2312004, -2312037, -2312177, -2312421, -2312477, -2312708, -2312741, -2312934, -2312997, -2313092, -2314565, -2314982, -2315013, -2315089, -2315172, -2315217, -2315389, -2316292, -2318141, -2326532, -2326845, -2326852, -2328038, -2328069, -2328317, -2328325, -2328518, -2328549, -2328580, -2328625, -2328797, -2329096, -2329418, -2330045, -2330129, -2330180, -2331165, -2331205, -2331933, -2331942, -2331973, -2332198, -2332229, -2332294, -2332325, -2332413, -2334724, -2334973, -2334980, -2335069, -2335076, -2336293, -2336509, -2336581, -2336637, -2336645, -2336733, -2336741, -2336964, -2336997, -2337053, -2337288, -2337629, -2337796, -2338013, -2338020, -2338109, -2338116, -2339142, -2339325, -2339333, -2339421, -2339430, -2339493, -2339526, -2339557, -2339588, -2339645, -2339848, -2340189, -2350084, -2350693, -2350758, -2350833, -2350909, -2356740, -2356797, -2357258, -2357941, -2358195, -2358325, -2358877, -2359281, -2359300, -2388829, -2392073, -2395645, -2395665, -2395837, -2396164, -2402461, -2490372, -2524669, -2524698, -2524989, -2654212, -2672893, -2949124, -2967357, -2967556, -2968573, -2968584, -2968925, -2969041, -2969117, -2972164, -2973149, -2973189, -2973361, -2973405, -2973700, -2975237, -2975473, -2975637, -2975747, -2975889, -2975925, -2975965, -2976264, -2976605, -2976618, -2976861, -2976868, -2977565, -2977700, -2978333, -3000320, -3001345, -3002378, -3003121, -3003261, -3006468, -3008893, -3008997, -3009028, -3009062, -3010845, -3011045, -3011171, -3011613, -3013635, -3013713, -3013731, -3013765, -3013821, -3014150, -3014237, -3014660, -3211037, -3211268, -3250909, -3252228, -3252541, -3538948, -3548157, -3549700, -3549821, -3550340, -3550493, -3550724, -3563421, -3637252, -3640701, -3640836, -3641277, -3641348, -3641661, -3641860, -3642205, -3642261, -3642277, -3642353, -3642394, -3642525, -3801109, -3808989, -3809301, -3810557, -3810613, -3812518, -3812581, -3812693, -3812774, -3812986, -3813221, -3813493, -3813541, -3813781, -3814725, -3814869, -3816765, -3817493, -3819589, -3819701, -3819741, -3824650, -3825309, -3825685, -3828477, -3828746, -3829565, -3833856, -3834689, -3835520, -3836353, -3836605, -3836609, -3837184, -3838017, -3838848, -3838909, -3838912, -3839005, -3839040, -3839101, -3839136, -3839229, -3839264, -3839421, -3839424, -3839681, -3839837, -3839841, -3839901, -3839905, -3840157, -3840161, -3840512, -3841345, -3842176, -3842269, -3842272, -3842429, -3842464, -3842749, -3842752, -3843005, -3843009, -3843840, -3843933, -3843936, -3844093, -3844096, -3844285, -3844288, -3844349, -3844416, -3844669, -3844673, -3845504, -3846337, -3847168, -3848001, -3848832, -3849665, -3850496, -3851329, -3852160, -3852993, -3853824, -3854657, -3855581, -3855616, -3856434, -3856449, -3857266, -3857281, -3857472, -3858290, -3858305, -3859122, -3859137, -3859328, -3860146, -3860161, -3860978, -3860993, -3861184, -3862002, -3862017, -3862834, -3862849, -3863040, -3863858, -3863873, -3864690, -3864705, -3864896, -3864929, -3864989, -3865032, -3866645, -3883013, -3884789, -3884901, -3886517, -3886757, -3886805, -3887237, -3887285, -3887345, -3887517, -3887973, -3888157, -3888165, -3888669, -3932165, -3932413, -3932421, -3932989, -3933029, -3933277, -3933285, -3933373, -3933381, -3933565, -3940356, -3941821, -3941893, -3942115, -3942365, -3942408, -3942749, -3942852, -3942901, -3942941, -3954692, -3956101, -3956232, -3956573, -3956723, -3956765, -3997700, -4004029, -4004074, -4004357, -4004605, -4005888, -4006977, -4008069, -4008291, -4008349, -4008456, -4008797, -4008913, -4008989, -4034090, -4035989, -4036010, -4036115, -4036138, -4036285, -4038698, -4040149, -4040170, -4040669, -4046852, -4047005, -4047012, -4047901, -4047908, -4047997, -4048004, -4048061, -4048100, -4048157, -4048164, -4048509, -4048516, -4048669, -4048676, -4048733, -4048740, -4048797, -4048964, -4049021, -4049124, -4049181, -4049188, -4049245, -4049252, -4049309, -4049316, -4049437, -4049444, -4049533, -4049540, -4049597, -4049636, -4049693, -4049700, -4049757, -4049764, -4049821, -4049828, -4049885, -4049892, -4049949, -4049956, -4050045, -4050052, -4050109, -4050148, -4050301, -4050308, -4050557, -4050564, -4050717, -4050724, -4050877, -4050884, -4050941, -4050948, -4051293, -4051300, -4051869, -4052004, -4052125, -4052132, -4052317, -4052324, -4052893, -4054546, -4054621, -4063253, -4064669, -4064789, -4067997, -4068373, -4068861, -4068917, -4069405, -4069429, -4069917, -4069941, -4071133, -4071434, -4071861, -4077021, -4078805, -4079741, -4080149, -4081565, -4081685, -4081981, -4082197, -4082269, -4082709, -4082909, -4087829, -4095860, -4096021, -4119325, -4119573, -4119997, -4120085, -4120509, -4120597, -4124317, -4124693, -4127549, -4127765, -4128157, -4128789, -4129181, -4129301, -4131101, -4131349, -4131677, -4131861, -4133149, -4133397, -4134365, -4134421, -4134493, -4136981, -4140861, -4140885, -4143517, -4143541, -4147869, -4148245, -4148701, -4148757, -4148925, -4149013, -4149117, -4149269, -4149501, -4149781, -4150589, -4150805, -4151037, -4151317, -4151421, -4151829, -4152061, -4153365, -4158077, -4158101, -4159869, -4161032, -4161373, -4194308, -5561309, -5562372, -5695165, -5695492, -5702621, -5702660, -5887069, -5887492, -6126653, -6225924, -6243293, -6291460, -6449533, -29360186, -29360221, -29361178, -29364253, -29368325, -29376029, -31457308, -33554397, -33554460, -35651549, -35651613, -//--Autogenerated -- end of section automatically generated -}; - -constexpr int maxUnicode = 0x10ffff; -constexpr int maskCategory = 0x1F; - -} - -// Each element in catRanges is the start of a range of Unicode characters in -// one general category. -// The value is comprised of a 21-bit character value shifted 5 bits and a 5 bit -// category matching the CharacterCategory enumeration. -// Initial version has 3249 entries and adds about 13K to the executable. -// The array is in ascending order so can be searched using binary search. -// Therefore the average call takes log2(3249) = 12 comparisons. -// For speed, it may be useful to make a linear table for the common values, -// possibly for 0..0xff for most Western European text or 0..0xfff for most -// alphabetic languages. - -CharacterCategory CategoriseCharacter(int character) { - if (character < 0 || character > maxUnicode) - return ccCn; - const int baseValue = character * (maskCategory+1) + maskCategory; - const int *placeAfter = std::lower_bound(catRanges, std::end(catRanges), baseValue); - return static_cast(*(placeAfter-1) & maskCategory); -} - -// Implementation of character sets recommended for identifiers in Unicode Standard Annex #31. -// http://unicode.org/reports/tr31/ - -namespace { - -enum class OtherID { oidNone, oidStart, oidContinue }; - -// Some characters are treated as valid for identifiers even -// though most characters from their category are not. -// Values copied from http://www.unicode.org/Public/9.0.0/ucd/PropList.txt -OtherID OtherIDOfCharacter(int character) noexcept { - if ( - (character == 0x1885) || // MONGOLIAN LETTER ALI GALI BALUDA - (character == 0x1886) || // MONGOLIAN LETTER ALI GALI THREE BALUDA - (character == 0x2118) || // SCRIPT CAPITAL P - (character == 0x212E) || // ESTIMATED SYMBOL - (character == 0x309B) || // KATAKANA-HIRAGANA VOICED SOUND MARK - (character == 0x309C)) { // KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK - return OtherID::oidStart; - } else if ( - (character == 0x00B7) || // MIDDLE DOT - (character == 0x0387) || // GREEK ANO TELEIA - ((character >= 0x1369) && (character <= 0x1371)) || // ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE - (character == 0x19DA)) { // NEW TAI LUE THAM DIGIT ONE - return OtherID::oidContinue; - } else { - return OtherID::oidNone; - } -} - -// Determine if a character is in Ll|Lu|Lt|Lm|Lo|Nl|Mn|Mc|Nd|Pc and has -// Pattern_Syntax|Pattern_White_Space. -// As of Unicode 9, only VERTICAL TILDE which is in Lm and has Pattern_Syntax matches. -// Should really generate from PropList.txt a list of Pattern_Syntax and Pattern_White_Space. -constexpr bool IsIdPattern(int character) noexcept { - return character == 0x2E2F; -} - -bool OmitXidStart(int character) noexcept { - switch (character) { - case 0x037A: // GREEK YPOGEGRAMMENI - case 0x0E33: // THAI CHARACTER SARA AM - case 0x0EB3: // LAO VOWEL SIGN AM - case 0x309B: // KATAKANA-HIRAGANA VOICED SOUND MARK - case 0x309C: // KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK - case 0xFC5E: // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM - case 0xFC5F: // ARABIC LIGATURE SHADDA WITH KASRATAN ISOLATED FORM - case 0xFC60: // ARABIC LIGATURE SHADDA WITH FATHA ISOLATED FORM - case 0xFC61: // ARABIC LIGATURE SHADDA WITH DAMMA ISOLATED FORM - case 0xFC62: // ARABIC LIGATURE SHADDA WITH KASRA ISOLATED FORM - case 0xFC63: // ARABIC LIGATURE SHADDA WITH SUPERSCRIPT ALEF ISOLATED FORM - case 0xFDFA: // ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM - case 0xFDFB: // ARABIC LIGATURE JALLAJALALOUHOU - case 0xFE70: // ARABIC FATHATAN ISOLATED FORM - case 0xFE72: // ARABIC DAMMATAN ISOLATED FORM - case 0xFE74: // ARABIC KASRATAN ISOLATED FORM - case 0xFE76: // ARABIC FATHA ISOLATED FORM - case 0xFE78: // ARABIC DAMMA ISOLATED FORM - case 0xFE7A: // ARABIC KASRA ISOLATED FORM - case 0xFE7C: // ARABIC SHADDA ISOLATED FORM - case 0xFE7E: // ARABIC SUKUN ISOLATED FORM - case 0xFF9E: // HALFWIDTH KATAKANA VOICED SOUND MARK - case 0xFF9F: // HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK - return true; - default: - return false; - } -} - -bool OmitXidContinue(int character) noexcept { - switch (character) { - case 0x037A: // GREEK YPOGEGRAMMENI - case 0x309B: // KATAKANA-HIRAGANA VOICED SOUND MARK - case 0x309C: // KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK - case 0xFC5E: // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM - case 0xFC5F: // ARABIC LIGATURE SHADDA WITH KASRATAN ISOLATED FORM - case 0xFC60: // ARABIC LIGATURE SHADDA WITH FATHA ISOLATED FORM - case 0xFC61: // ARABIC LIGATURE SHADDA WITH DAMMA ISOLATED FORM - case 0xFC62: // ARABIC LIGATURE SHADDA WITH KASRA ISOLATED FORM - case 0xFC63: // ARABIC LIGATURE SHADDA WITH SUPERSCRIPT ALEF ISOLATED FORM - case 0xFDFA: // ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM - case 0xFDFB: // ARABIC LIGATURE JALLAJALALOUHOU - case 0xFE70: // ARABIC FATHATAN ISOLATED FORM - case 0xFE72: // ARABIC DAMMATAN ISOLATED FORM - case 0xFE74: // ARABIC KASRATAN ISOLATED FORM - case 0xFE76: // ARABIC FATHA ISOLATED FORM - case 0xFE78: // ARABIC DAMMA ISOLATED FORM - case 0xFE7A: // ARABIC KASRA ISOLATED FORM - case 0xFE7C: // ARABIC SHADDA ISOLATED FORM - case 0xFE7E: // ARABIC SUKUN ISOLATED FORM - return true; - default: - return false; - } -} - -} - -// UAX #31 defines ID_Start as -// [[:L:][:Nl:][:Other_ID_Start:]--[:Pattern_Syntax:]--[:Pattern_White_Space:]] -bool IsIdStart(int character) { - if (IsIdPattern(character)) { - return false; - } - const OtherID oid = OtherIDOfCharacter(character); - if (oid == OtherID::oidStart) { - return true; - } - const CharacterCategory c = CategoriseCharacter(character); - return (c == ccLl || c == ccLu || c == ccLt || c == ccLm || c == ccLo - || c == ccNl); -} - -// UAX #31 defines ID_Continue as -// [[:ID_Start:][:Mn:][:Mc:][:Nd:][:Pc:][:Other_ID_Continue:]--[:Pattern_Syntax:]--[:Pattern_White_Space:]] -bool IsIdContinue(int character) { - if (IsIdPattern(character)) { - return false; - } - const OtherID oid = OtherIDOfCharacter(character); - if (oid != OtherID::oidNone) { - return true; - } - const CharacterCategory c = CategoriseCharacter(character); - return (c == ccLl || c == ccLu || c == ccLt || c == ccLm || c == ccLo - || c == ccNl || c == ccMn || c == ccMc || c == ccNd || c == ccPc); -} - -// XID_Start is ID_Start modified for Normalization Form KC in UAX #31 -bool IsXidStart(int character) { - if (OmitXidStart(character)) { - return false; - } else { - return IsIdStart(character); - } -} - -// XID_Continue is ID_Continue modified for Normalization Form KC in UAX #31 -bool IsXidContinue(int character) { - if (OmitXidContinue(character)) { - return false; - } else { - return IsIdContinue(character); - } -} - -CharacterCategoryMap::CharacterCategoryMap() { - Optimize(256); -} - -int CharacterCategoryMap::Size() const noexcept { - return static_cast(dense.size()); -} - -void CharacterCategoryMap::Optimize(int countCharacters) { - const int characters = std::clamp(countCharacters, 256, maxUnicode + 1); - dense.resize(characters); - - int end = 0; - int index = 0; - int current = catRanges[index]; - ++index; - do { - const int next = catRanges[index]; - const unsigned char category = current & maskCategory; - current >>= 5; - end = std::min(characters, next >> 5); - while (current < end) { - dense[current++] = category; - } - current = next; - ++index; - } while (characters > end); -} - -} diff --git a/src/CharacterCategory.h b/src/CharacterCategory.h deleted file mode 100644 index cd3320dd9..000000000 --- a/src/CharacterCategory.h +++ /dev/null @@ -1,50 +0,0 @@ -// Scintilla source code edit control -/** @file CharacterCategory.h - ** Returns the Unicode general category of a character. - **/ -// Copyright 2013 by Neil Hodgson -// The License.txt file describes the conditions under which this software may be distributed. - -#ifndef CHARACTERCATEGORY_H -#define CHARACTERCATEGORY_H - -namespace Scintilla { - -enum CharacterCategory { - ccLu, ccLl, ccLt, ccLm, ccLo, - ccMn, ccMc, ccMe, - ccNd, ccNl, ccNo, - ccPc, ccPd, ccPs, ccPe, ccPi, ccPf, ccPo, - ccSm, ccSc, ccSk, ccSo, - ccZs, ccZl, ccZp, - ccCc, ccCf, ccCs, ccCo, ccCn -}; - -CharacterCategory CategoriseCharacter(int character); - -// Common definitions of allowable characters in identifiers from UAX #31. -bool IsIdStart(int character); -bool IsIdContinue(int character); -bool IsXidStart(int character); -bool IsXidContinue(int character); - -class CharacterCategoryMap { -private: - std::vector dense; -public: - CharacterCategoryMap(); - CharacterCategory CategoryFor(int character) const { - if (static_cast(character) < dense.size()) { - return static_cast(dense[character]); - } else { - // binary search through ranges - return CategoriseCharacter(character); - } - } - int Size() const noexcept; - void Optimize(int countCharacters); -}; - -} - -#endif diff --git a/src/CharacterCategoryMap.cxx b/src/CharacterCategoryMap.cxx new file mode 100644 index 000000000..e9bfecb6a --- /dev/null +++ b/src/CharacterCategoryMap.cxx @@ -0,0 +1,4105 @@ +// Scintilla source code edit control +/** @file CharacterCategoryMap.cxx + ** Returns the Unicode general category of a character. + ** Table automatically regenerated by scripts/GenerateCharacterCategory.py + ** Should only be rarely regenerated for new versions of Unicode. + ** Similar code to Lexilla's lexilla/lexlib/CharacterCategory.cxx but renamed + ** to avoid problems with builds that statically include both Scintilla and Lexilla. + **/ +// Copyright 2013 by Neil Hodgson +// The License.txt file describes the conditions under which this software may be distributed. + +#include +#include +#include + +#include "CharacterCategoryMap.h" + +namespace Scintilla { + +namespace { + // Use an unnamed namespace to protect the declarations from name conflicts + +const int catRanges[] = { +//++Autogenerated -- start of section automatically generated +// Created with Python 3.9.4, Unicode 13.0.0 +25, +1046, +1073, +1171, +1201, +1293, +1326, +1361, +1394, +1425, +1452, +1489, +1544, +1873, +1938, +2033, +2080, +2925, +2961, +2990, +3028, +3051, +3092, +3105, +3949, +3986, +4014, +4050, +4089, +5142, +5169, +5203, +5333, +5361, +5396, +5429, +5444, +5487, +5522, +5562, +5589, +5620, +5653, +5682, +5706, +5780, +5793, +5841, +5908, +5930, +5956, +6000, +6026, +6129, +6144, +6898, +6912, +7137, +7922, +7937, +8192, +8225, +8256, +8289, +8320, +8353, +8384, +8417, +8448, +8481, +8512, +8545, +8576, +8609, +8640, +8673, +8704, +8737, +8768, +8801, +8832, +8865, +8896, +8929, +8960, +8993, +9024, +9057, +9088, +9121, +9152, +9185, +9216, +9249, +9280, +9313, +9344, +9377, +9408, +9441, +9472, +9505, +9536, +9569, +9600, +9633, +9664, +9697, +9728, +9761, +9792, +9825, +9856, +9889, +9920, +9953, +10016, +10049, +10080, +10113, +10144, +10177, +10208, +10241, +10272, +10305, +10336, +10369, +10400, +10433, +10464, +10497, +10560, +10593, +10624, +10657, +10688, +10721, +10752, +10785, +10816, +10849, +10880, +10913, +10944, +10977, +11008, +11041, +11072, +11105, +11136, +11169, +11200, +11233, +11264, +11297, +11328, +11361, +11392, +11425, +11456, +11489, +11520, +11553, +11584, +11617, +11648, +11681, +11712, +11745, +11776, +11809, +11840, +11873, +11904, +11937, +11968, +12001, +12032, +12097, +12128, +12161, +12192, +12225, +12320, +12385, +12416, +12449, +12480, +12545, +12576, +12673, +12736, +12865, +12896, +12961, +12992, +13089, +13184, +13249, +13280, +13345, +13376, +13409, +13440, +13473, +13504, +13569, +13600, +13633, +13696, +13729, +13760, +13825, +13856, +13953, +13984, +14017, +14048, +14113, +14180, +14208, +14241, +14340, +14464, +14498, +14529, +14560, +14594, +14625, +14656, +14690, +14721, +14752, +14785, +14816, +14849, +14880, +14913, +14944, +14977, +15008, +15041, +15072, +15105, +15136, +15169, +15200, +15233, +15296, +15329, +15360, +15393, +15424, +15457, +15488, +15521, +15552, +15585, +15616, +15649, +15680, +15713, +15744, +15777, +15808, +15841, +15904, +15938, +15969, +16000, +16033, +16064, +16161, +16192, +16225, +16256, +16289, +16320, +16353, +16384, +16417, +16448, +16481, +16512, +16545, +16576, +16609, +16640, +16673, +16704, +16737, +16768, +16801, +16832, +16865, +16896, +16929, +16960, +16993, +17024, +17057, +17088, +17121, +17152, +17185, +17216, +17249, +17280, +17313, +17344, +17377, +17408, +17441, +17472, +17505, +17536, +17569, +17600, +17633, +17664, +17697, +17728, +17761, +17792, +17825, +17856, +17889, +17920, +17953, +17984, +18017, +18240, +18305, +18336, +18401, +18464, +18497, +18528, +18657, +18688, +18721, +18752, +18785, +18816, +18849, +18880, +18913, +21124, +21153, +22019, +22612, +22723, +23124, +23555, +23732, +23939, +23988, +24003, +24052, +24581, +28160, +28193, +28224, +28257, +28291, +28340, +28352, +28385, +28445, +28483, +28513, +28625, +28640, +28701, +28820, +28864, +28913, +28928, +29053, +29056, +29117, +29120, +29185, +29216, +29789, +29792, +30081, +31200, +31233, +31296, +31393, +31488, +31521, +31552, +31585, +31616, +31649, +31680, +31713, +31744, +31777, +31808, +31841, +31872, +31905, +31936, +31969, +32000, +32033, +32064, +32097, +32128, +32161, +32192, +32225, +32384, +32417, +32466, +32480, +32513, +32544, +32609, +32672, +34305, +35840, +35873, +35904, +35937, +35968, +36001, +36032, +36065, +36096, +36129, +36160, +36193, +36224, +36257, +36288, +36321, +36352, +36385, +36416, +36449, +36480, +36513, +36544, +36577, +36608, +36641, +36672, +36705, +36736, +36769, +36800, +36833, +36864, +36897, +36949, +36965, +37127, +37184, +37217, +37248, +37281, +37312, +37345, +37376, +37409, +37440, +37473, +37504, +37537, +37568, +37601, +37632, +37665, +37696, +37729, +37760, +37793, +37824, +37857, +37888, +37921, +37952, +37985, +38016, +38049, +38080, +38113, +38144, +38177, +38208, +38241, +38272, +38305, +38336, +38369, +38400, +38433, +38464, +38497, +38528, +38561, +38592, +38625, +38656, +38689, +38720, +38753, +38784, +38817, +38848, +38881, +38912, +38977, +39008, +39041, +39072, +39105, +39136, +39169, +39200, +39233, +39264, +39297, +39328, +39361, +39424, +39457, +39488, +39521, +39552, +39585, +39616, +39649, +39680, +39713, +39744, +39777, +39808, +39841, +39872, +39905, +39936, +39969, +40000, +40033, +40064, +40097, +40128, +40161, +40192, +40225, +40256, +40289, +40320, +40353, +40384, +40417, +40448, +40481, +40512, +40545, +40576, +40609, +40640, +40673, +40704, +40737, +40768, +40801, +40832, +40865, +40896, +40929, +40960, +40993, +41024, +41057, +41088, +41121, +41152, +41185, +41216, +41249, +41280, +41313, +41344, +41377, +41408, +41441, +41472, +41505, +41536, +41569, +41600, +41633, +41664, +41697, +41728, +41761, +41792, +41825, +41856, +41889, +41920, +41953, +41984, +42017, +42048, +42081, +42112, +42145, +42176, +42209, +42240, +42273, +42304, +42337, +42368, +42401, +42432, +42465, +42525, +42528, +43773, +43811, +43857, +44033, +45361, +45388, +45437, +45493, +45555, +45597, +45605, +47052, +47077, +47121, +47141, +47217, +47237, +47313, +47333, +47389, +47620, +48509, +48612, +48753, +48829, +49178, +49362, +49457, +49523, +49553, +49621, +49669, +50033, +50074, +50109, +50129, +50180, +51203, +51236, +51557, +52232, +52561, +52676, +52741, +52772, +55953, +55972, +56005, +56250, +56277, +56293, +56483, +56549, +56629, +56645, +56772, +56840, +57156, +57269, +57316, +57361, +57821, +57850, +57860, +57893, +57924, +58885, +59773, +59812, +62661, +63012, +63069, +63496, +63812, +64869, +65155, +65237, +65265, +65347, +65405, +65445, +65491, +65540, +66245, +66371, +66405, +66691, +66725, +66819, +66853, +67037, +67089, +67581, +67588, +68389, +68509, +68561, +68605, +68612, +68989, +70660, +71357, +71364, +71965, +72293, +72794, +72805, +73830, +73860, +75589, +75622, +75653, +75684, +75718, +75813, +76070, +76197, +76230, +76292, +76325, +76548, +76869, +76945, +77000, +77329, +77347, +77380, +77861, +77894, +77981, +77988, +78269, +78308, +78397, +78436, +79165, +79172, +79421, +79428, +79485, +79556, +79709, +79749, +79780, +79814, +79909, +80061, +80102, +80189, +80230, +80293, +80324, +80381, +80614, +80669, +80772, +80861, +80868, +80965, +81053, +81096, +81412, +81491, +81546, +81749, +81779, +81796, +81841, +81861, +81917, +81957, +82022, +82077, +82084, +82301, +82404, +82493, +82532, +83261, +83268, +83517, +83524, +83613, +83620, +83709, +83716, +83805, +83845, +83901, +83910, +84005, +84093, +84197, +84285, +84325, +84445, +84517, +84573, +84772, +84925, +84932, +84989, +85192, +85509, +85572, +85669, +85713, +85757, +86053, +86118, +86173, +86180, +86493, +86500, +86621, +86628, +87357, +87364, +87613, +87620, +87709, +87716, +87901, +87941, +87972, +88006, +88101, +88285, +88293, +88358, +88413, +88422, +88485, +88541, +88580, +88637, +89092, +89157, +89245, +89288, +89617, +89651, +89693, +89892, +89925, +90141, +90149, +90182, +90269, +90276, +90557, +90596, +90685, +90724, +91453, +91460, +91709, +91716, +91805, +91812, +91997, +92037, +92068, +92102, +92133, +92166, +92197, +92349, +92390, +92477, +92518, +92581, +92637, +92837, +92902, +92957, +93060, +93149, +93156, +93253, +93341, +93384, +93717, +93732, +93770, +93981, +94277, +94308, +94365, +94372, +94589, +94660, +94781, +94788, +94941, +95012, +95101, +95108, +95165, +95172, +95261, +95332, +95421, +95492, +95613, +95684, +96093, +96198, +96261, +96294, +96381, +96454, +96573, +96582, +96677, +96733, +96772, +96829, +96998, +97053, +97480, +97802, +97909, +98099, +98133, +98173, +98309, +98342, +98437, +98468, +98749, +98756, +98877, +98884, +99645, +99652, +100189, +100260, +100293, +100390, +100541, +100549, +100669, +100677, +100829, +101029, +101117, +101124, +101245, +101380, +101445, +101533, +101576, +101917, +102129, +102154, +102389, +102404, +102437, +102470, +102545, +102564, +102845, +102852, +102973, +102980, +103741, +103748, +104093, +104100, +104285, +104325, +104356, +104390, +104421, +104454, +104637, +104645, +104678, +104765, +104774, +104837, +104925, +105126, +105213, +105412, +105469, +105476, +105541, +105629, +105672, +106013, +106020, +106109, +106501, +106566, +106628, +106941, +106948, +107069, +107076, +108389, +108452, +108486, +108581, +108733, +108742, +108861, +108870, +108965, +108996, +109045, +109085, +109188, +109286, +109322, +109540, +109637, +109725, +109768, +110090, +110389, +110404, +110621, +110629, +110662, +110749, +110756, +111357, +111428, +112221, +112228, +112541, +112548, +112605, +112644, +112893, +112965, +113021, +113126, +113221, +113341, +113349, +113405, +113414, +113693, +113864, +114205, +114246, +114321, +114365, +114724, +116261, +116292, +116357, +116605, +116723, +116740, +116931, +116965, +117233, +117256, +117585, +117661, +118820, +118909, +118916, +118973, +118980, +119165, +119172, +119965, +119972, +120029, +120036, +120357, +120388, +120453, +120740, +120797, +120836, +121021, +121027, +121085, +121093, +121309, +121352, +121693, +121732, +121885, +122884, +122933, +123025, +123509, +123537, +123573, +123653, +123733, +123912, +124234, +124565, +124581, +124629, +124645, +124693, +124709, +124749, +124782, +124813, +124846, +124870, +124932, +125213, +125220, +126397, +126501, +126950, +126981, +127153, +127173, +127236, +127397, +127773, +127781, +128957, +128981, +129221, +129269, +129469, +129493, +129553, +129717, +129841, +129917, +131076, +132454, +132517, +132646, +132677, +132870, +132901, +132966, +133029, +133092, +133128, +133457, +133636, +133830, +133893, +133956, +134085, +134180, +134214, +134308, +134374, +134596, +134693, +134820, +135237, +135270, +135333, +135398, +135589, +135620, +135654, +135688, +136006, +136101, +136149, +136192, +137437, +137440, +137501, +137632, +137693, +137729, +139121, +139139, +139169, +139268, +149821, +149828, +149981, +150020, +150269, +150276, +150333, +150340, +150493, +150532, +151869, +151876, +152029, +152068, +153149, +153156, +153309, +153348, +153597, +153604, +153661, +153668, +153821, +153860, +154365, +154372, +156221, +156228, +156381, +156420, +158589, +158629, +158737, +159018, +159677, +159748, +160277, +160605, +160768, +163549, +163585, +163805, +163852, +163876, +183733, +183761, +183780, +184342, +184356, +185197, +185230, +185277, +185348, +187761, +187849, +187940, +188221, +188420, +188861, +188868, +188997, +189117, +189444, +190021, +190129, +190205, +190468, +191045, +191133, +191492, +191933, +191940, +192061, +192069, +192157, +192516, +194181, +194246, +194277, +194502, +194757, +194790, +194853, +195217, +195299, +195345, +195443, +195460, +195493, +195549, +195592, +195933, +196106, +196445, +196625, +196812, +196849, +196965, +197082, +197117, +197128, +197469, +197636, +198755, +198788, +200509, +200708, +200869, +200932, +202021, +202052, +202109, +202244, +204509, +204804, +205821, +205829, +205926, +206053, +206118, +206237, +206342, +206405, +206438, +206629, +206749, +206869, +206909, +206993, +207048, +207364, +208349, +208388, +208573, +208900, +210333, +210436, +211293, +211464, +211786, +211837, +211925, +212996, +213733, +213798, +213861, +213917, +213969, +214020, +215718, +215749, +215782, +215813, +216061, +216069, +216102, +216133, +216166, +216229, +216486, +216677, +217021, +217061, +217096, +217437, +217608, +217949, +218129, +218339, +218385, +218589, +218629, +219079, +219109, +219197, +221189, +221318, +221348, +222853, +222886, +222917, +223078, +223109, +223142, +223301, +223334, +223396, +223645, +223752, +224081, +224309, +224613, +224917, +225213, +225285, +225350, +225380, +226342, +226373, +226502, +226565, +226630, +226661, +226756, +226824, +227140, +228549, +228582, +228613, +228678, +228773, +228806, +228837, +228934, +229021, +229265, +229380, +230534, +230789, +231046, +231109, +231197, +231281, +231432, +231773, +231844, +231944, +232260, +233219, +233425, +233473, +233789, +233984, +235389, +235424, +235537, +235805, +236037, +236145, +236165, +236582, +236613, +236836, +236965, +236996, +237189, +237220, +237286, +237317, +237380, +237437, +237569, +238979, +240993, +241411, +241441, +242531, +243717, +245597, +245605, +245760, +245793, +245824, +245857, +245888, +245921, +245952, +245985, +246016, +246049, +246080, +246113, +246144, +246177, +246208, +246241, +246272, +246305, +246336, +246369, +246400, +246433, +246464, +246497, +246528, +246561, +246592, +246625, +246656, +246689, +246720, +246753, +246784, +246817, +246848, +246881, +246912, +246945, +246976, +247009, +247040, +247073, +247104, +247137, +247168, +247201, +247232, +247265, +247296, +247329, +247360, +247393, +247424, +247457, +247488, +247521, +247552, +247585, +247616, +247649, +247680, +247713, +247744, +247777, +247808, +247841, +247872, +247905, +247936, +247969, +248000, +248033, +248064, +248097, +248128, +248161, +248192, +248225, +248256, +248289, +248320, +248353, +248384, +248417, +248448, +248481, +248512, +248545, +248576, +248609, +248640, +248673, +248704, +248737, +248768, +248801, +248832, +248865, +248896, +248929, +248960, +248993, +249024, +249057, +249088, +249121, +249152, +249185, +249216, +249249, +249280, +249313, +249344, +249377, +249408, +249441, +249472, +249505, +249536, +249569, +249600, +249633, +249664, +249697, +249728, +249761, +249792, +249825, +249856, +249889, +249920, +249953, +249984, +250017, +250048, +250081, +250112, +250145, +250176, +250209, +250240, +250273, +250304, +250337, +250368, +250401, +250432, +250465, +250496, +250529, +250816, +250849, +250880, +250913, +250944, +250977, +251008, +251041, +251072, +251105, +251136, +251169, +251200, +251233, +251264, +251297, +251328, +251361, +251392, +251425, +251456, +251489, +251520, +251553, +251584, +251617, +251648, +251681, +251712, +251745, +251776, +251809, +251840, +251873, +251904, +251937, +251968, +252001, +252032, +252065, +252096, +252129, +252160, +252193, +252224, +252257, +252288, +252321, +252352, +252385, +252416, +252449, +252480, +252513, +252544, +252577, +252608, +252641, +252672, +252705, +252736, +252769, +252800, +252833, +252864, +252897, +252928, +252961, +252992, +253025, +253056, +253089, +253120, +253153, +253184, +253217, +253248, +253281, +253312, +253345, +253376, +253409, +253440, +253473, +253504, +253537, +253568, +253601, +253632, +253665, +253696, +253729, +253760, +253793, +253824, +253857, +253888, +253921, +254208, +254465, +254685, +254720, +254941, +254977, +255232, +255489, +255744, +256001, +256221, +256256, +256477, +256513, +256797, +256800, +256861, +256864, +256925, +256928, +256989, +256992, +257025, +257280, +257537, +258013, +258049, +258306, +258561, +258818, +259073, +259330, +259585, +259773, +259777, +259840, +259970, +260020, +260033, +260084, +260161, +260285, +260289, +260352, +260482, +260532, +260609, +260765, +260801, +260864, +261021, +261044, +261121, +261376, +261556, +261661, +261697, +261821, +261825, +261888, +262018, +262068, +262141, +262166, +262522, +262668, +262865, +262927, +262960, +262989, +263023, +263088, +263117, +263151, +263185, +263447, +263480, +263514, +263670, +263697, +263983, +264016, +264049, +264171, +264241, +264338, +264365, +264398, +264433, +264786, +264817, +264843, +264881, +265206, +265242, +265405, +265434, +265738, +265763, +265821, +265866, +266066, +266157, +266190, +266211, +266250, +266578, +266669, +266702, +266749, +266755, +267197, +267283, +268317, +268805, +269223, +269349, +269383, +269477, +269885, +270357, +270400, +270453, +270560, +270613, +270657, +270688, +270785, +270848, +270945, +270997, +271008, +271061, +271122, +271136, +271317, +271488, +271541, +271552, +271605, +271616, +271669, +271680, +271829, +271841, +271872, +272001, +272036, +272161, +272213, +272257, +272320, +272402, +272544, +272577, +272725, +272754, +272789, +272833, +272885, +272906, +273417, +274528, +274561, +274601, +274730, +274773, +274845, +274962, +275125, +275282, +275349, +275474, +275509, +275570, +275605, +275666, +275701, +275922, +275957, +276946, +277013, +277074, +277109, +277138, +277173, +278162, +286741, +286989, +287022, +287053, +287086, +287125, +287762, +287829, +288045, +288078, +288117, +290706, +290741, +291698, +292501, +293778, +293973, +296189, +296981, +297341, +297994, +299925, +302410, +303125, +308978, +309013, +309298, +309333, +311058, +311317, +314866, +314901, +322829, +322862, +322893, +322926, +322957, +322990, +323021, +323054, +323085, +323118, +323149, +323182, +323213, +323246, +323274, +324245, +325650, +325805, +325838, +325874, +326861, +326894, +326925, +326958, +326989, +327022, +327053, +327086, +327117, +327150, +327186, +327701, +335890, +340077, +340110, +340141, +340174, +340205, +340238, +340269, +340302, +340333, +340366, +340397, +340430, +340461, +340494, +340525, +340558, +340589, +340622, +340653, +340686, +340717, +340750, +340786, +342797, +342830, +342861, +342894, +342930, +343949, +343982, +344018, +352277, +353810, +354485, +354546, +354741, +355997, +356053, +357085, +357109, +360448, +361981, +361985, +363517, +363520, +363553, +363584, +363681, +363744, +363777, +363808, +363841, +363872, +363905, +363936, +364065, +364096, +364129, +364192, +364225, +364419, +364480, +364577, +364608, +364641, +364672, +364705, +364736, +364769, +364800, +364833, +364864, +364897, +364928, +364961, +364992, +365025, +365056, +365089, +365120, +365153, +365184, +365217, +365248, +365281, +365312, +365345, +365376, +365409, +365440, +365473, +365504, +365537, +365568, +365601, +365632, +365665, +365696, +365729, +365760, +365793, +365824, +365857, +365888, +365921, +365952, +365985, +366016, +366049, +366080, +366113, +366144, +366177, +366208, +366241, +366272, +366305, +366336, +366369, +366400, +366433, +366464, +366497, +366528, +366561, +366592, +366625, +366656, +366689, +366720, +366753, +366784, +366817, +366848, +366881, +366912, +366945, +366976, +367009, +367040, +367073, +367104, +367137, +367168, +367201, +367232, +367265, +367296, +367329, +367360, +367393, +367424, +367457, +367488, +367521, +367552, +367585, +367616, +367649, +367680, +367713, +367797, +367968, +368001, +368032, +368065, +368101, +368192, +368225, +368285, +368433, +368554, +368593, +368641, +369885, +369889, +369949, +370081, +370141, +370180, +371997, +372195, +372241, +372285, +372709, +372740, +373501, +373764, +374013, +374020, +374269, +374276, +374525, +374532, +374781, +374788, +375037, +375044, +375293, +375300, +375549, +375556, +375805, +375813, +376849, +376911, +376944, +376975, +377008, +377041, +377135, +377168, +377201, +377231, +377264, +377297, +377580, +377617, +377676, +377713, +377743, +377776, +377809, +377871, +377904, +377933, +377966, +377997, +378030, +378061, +378094, +378125, +378158, +378193, +378339, +378385, +378700, +378769, +378892, +378929, +378957, +378993, +379413, +379473, +379517, +380949, +381789, +381813, +384669, +385045, +391901, +392725, +393117, +393238, +393265, +393365, +393379, +393412, +393449, +393485, +393518, +393549, +393582, +393613, +393646, +393677, +393710, +393741, +393774, +393813, +393869, +393902, +393933, +393966, +393997, +394030, +394061, +394094, +394124, +394157, +394190, +394261, +394281, +394565, +394694, +394764, +394787, +394965, +395017, +395107, +395140, +395185, +395221, +395293, +395300, +398077, +398117, +398196, +398243, +398308, +398348, +398372, +401265, +401283, +401380, +401437, +401572, +402973, +402980, +406013, +406037, +406090, +406229, +406532, +407573, +408733, +409092, +409621, +410621, +410634, +410965, +411914, +412181, +412202, +412693, +413706, +414037, +415274, +415765, +425988, +636949, +638980, +1310653, +1310724, +1311395, +1311428, +1348029, +1348117, +1349885, +1350148, +1351427, +1351633, +1351684, +1360259, +1360305, +1360388, +1360904, +1361220, +1361309, +1361920, +1361953, +1361984, +1362017, +1362048, +1362081, +1362112, +1362145, +1362176, +1362209, +1362240, +1362273, +1362304, +1362337, +1362368, +1362401, +1362432, +1362465, +1362496, +1362529, +1362560, +1362593, +1362624, +1362657, +1362688, +1362721, +1362752, +1362785, +1362816, +1362849, +1362880, +1362913, +1362944, +1362977, +1363008, +1363041, +1363072, +1363105, +1363136, +1363169, +1363200, +1363233, +1363264, +1363297, +1363328, +1363361, +1363396, +1363429, +1363463, +1363569, +1363589, +1363921, +1363939, +1363968, +1364001, +1364032, +1364065, +1364096, +1364129, +1364160, +1364193, +1364224, +1364257, +1364288, +1364321, +1364352, +1364385, +1364416, +1364449, +1364480, +1364513, +1364544, +1364577, +1364608, +1364641, +1364672, +1364705, +1364736, +1364769, +1364800, +1364833, +1364867, +1364933, +1364996, +1367241, +1367557, +1367633, +1367837, +1368084, +1368803, +1369108, +1369152, +1369185, +1369216, +1369249, +1369280, +1369313, +1369344, +1369377, +1369408, +1369441, +1369472, +1369505, +1369536, +1369569, +1369664, +1369697, +1369728, +1369761, +1369792, +1369825, +1369856, +1369889, +1369920, +1369953, +1369984, +1370017, +1370048, +1370081, +1370112, +1370145, +1370176, +1370209, +1370240, +1370273, +1370304, +1370337, +1370368, +1370401, +1370432, +1370465, +1370496, +1370529, +1370560, +1370593, +1370624, +1370657, +1370688, +1370721, +1370752, +1370785, +1370816, +1370849, +1370880, +1370913, +1370944, +1370977, +1371008, +1371041, +1371072, +1371105, +1371136, +1371169, +1371200, +1371233, +1371264, +1371297, +1371328, +1371361, +1371392, +1371425, +1371456, +1371489, +1371520, +1371553, +1371584, +1371617, +1371651, +1371681, +1371936, +1371969, +1372000, +1372033, +1372064, +1372129, +1372160, +1372193, +1372224, +1372257, +1372288, +1372321, +1372352, +1372385, +1372419, +1372468, +1372512, +1372545, +1372576, +1372609, +1372644, +1372672, +1372705, +1372736, +1372769, +1372864, +1372897, +1372928, +1372961, +1372992, +1373025, +1373056, +1373089, +1373120, +1373153, +1373184, +1373217, +1373248, +1373281, +1373312, +1373345, +1373376, +1373409, +1373440, +1373473, +1373504, +1373665, +1373696, +1373857, +1373888, +1373921, +1373952, +1373985, +1374016, +1374049, +1374080, +1374113, +1374144, +1374177, +1374237, +1374272, +1374305, +1374336, +1374465, +1374496, +1374529, +1374589, +1375904, +1375937, +1375972, +1376003, +1376065, +1376100, +1376325, +1376356, +1376453, +1376484, +1376613, +1376644, +1377382, +1377445, +1377510, +1377557, +1377669, +1377725, +1377802, +1378005, +1378067, +1378101, +1378141, +1378308, +1379985, +1380125, +1380358, +1380420, +1382022, +1382533, +1382621, +1382865, +1382920, +1383261, +1383429, +1384004, +1384209, +1384292, +1384337, +1384356, +1384421, +1384456, +1384772, +1385669, +1385937, +1385988, +1386725, +1387078, +1387165, +1387505, +1387524, +1388477, +1388549, +1388646, +1388676, +1390181, +1390214, +1390277, +1390406, +1390469, +1390534, +1390641, +1391069, +1391075, +1391112, +1391453, +1391569, +1391620, +1391781, +1391811, +1391844, +1392136, +1392452, +1392637, +1392644, +1393957, +1394150, +1394213, +1394278, +1394341, +1394429, +1394692, +1394789, +1394820, +1395077, +1395110, +1395165, +1395208, +1395549, +1395601, +1395716, +1396227, +1396260, +1396469, +1396548, +1396582, +1396613, +1396646, +1396676, +1398277, +1398308, +1398341, +1398436, +1398501, +1398564, +1398725, +1398788, +1398821, +1398852, +1398909, +1399652, +1399715, +1399761, +1399812, +1400166, +1400197, +1400262, +1400337, +1400388, +1400419, +1400486, +1400517, +1400573, +1400868, +1401085, +1401124, +1401341, +1401380, +1401597, +1401860, +1402109, +1402116, +1402365, +1402369, +1403764, +1403779, +1403905, +1404195, +1404244, +1404317, +1404417, +1406980, +1408102, +1408165, +1408198, +1408261, +1408294, +1408369, +1408390, +1408421, +1408477, +1408520, +1408861, +1409028, +1766557, +1766916, +1767677, +1767780, +1769373, +1769499, +1835036, +2039812, +2051549, +2051588, +2055005, +2056193, +2056445, +2056801, +2056989, +2057124, +2057157, +2057188, +2057522, +2057540, +2057981, +2057988, +2058173, +2058180, +2058237, +2058244, +2058333, +2058340, +2058429, +2058436, +2061908, +2062429, +2062948, +2074574, +2074605, +2074653, +2075140, +2077213, +2077252, +2079005, +2080260, +2080659, +2080693, +2080733, +2080773, +2081297, +2081517, +2081550, +2081585, +2081629, +2081797, +2082321, +2082348, +2082411, +2082477, +2082510, +2082541, +2082574, +2082605, +2082638, +2082669, +2082702, +2082733, +2082766, +2082797, +2082830, +2082861, +2082894, +2082925, +2082958, +2082993, +2083053, +2083086, +2083121, +2083243, +2083345, +2083453, +2083473, +2083596, +2083629, +2083662, +2083693, +2083726, +2083757, +2083790, +2083825, +2083922, +2083948, +2083986, +2084093, +2084113, +2084147, +2084177, +2084253, +2084356, +2084541, +2084548, +2088893, +2088954, +2088989, +2089009, +2089107, +2089137, +2089229, +2089262, +2089297, +2089330, +2089361, +2089388, +2089425, +2089480, +2089809, +2089874, +2089969, +2090016, +2090861, +2090897, +2090926, +2090964, +2090987, +2091028, +2091041, +2091885, +2091922, +2091950, +2091986, +2092013, +2092046, +2092081, +2092109, +2092142, +2092177, +2092228, +2092547, +2092580, +2094019, +2094084, +2095101, +2095172, +2095389, +2095428, +2095645, +2095684, +2095901, +2095940, +2096061, +2096147, +2096210, +2096244, +2096277, +2096307, +2096381, +2096405, +2096434, +2096565, +2096637, +2096954, +2097045, +2097117, +2097156, +2097565, +2097572, +2098429, +2098436, +2099069, +2099076, +2099165, +2099172, +2099677, +2099716, +2100189, +2101252, +2105213, +2105361, +2105469, +2105578, +2107037, +2107125, +2107401, +2109098, +2109237, +2109770, +2109845, +2109949, +2109973, +2110397, +2110485, +2110525, +2112021, +2113445, +2113501, +2117636, +2118589, +2118660, +2120253, +2120709, +2120746, +2121629, +2121732, +2122762, +2122909, +2123172, +2123817, +2123844, +2124105, +2124157, +2124292, +2125509, +2125693, +2125828, +2126813, +2126833, +2126852, +2128029, +2128132, +2128401, +2128425, +2128605, +2129920, +2131201, +2132484, +2135005, +2135048, +2135389, +2135552, +2136733, +2136833, +2138013, +2138116, +2139421, +2139652, +2141341, +2141681, +2141725, +2146308, +2156285, +2156548, +2157277, +2157572, +2157853, +2162692, +2162909, +2162948, +2163005, +2163012, +2164445, +2164452, +2164541, +2164612, +2164669, +2164708, +2165469, +2165489, +2165514, +2165764, +2166517, +2166570, +2166788, +2167805, +2168042, +2168349, +2169860, +2170493, +2170500, +2170589, +2170730, +2170884, +2171594, +2171805, +2171889, +2171908, +2172765, +2172913, +2172957, +2174980, +2176797, +2176906, +2176964, +2177034, +2177565, +2177610, +2179076, +2179109, +2179229, +2179237, +2179325, +2179461, +2179588, +2179741, +2179748, +2179869, +2179876, +2180829, +2180869, +2180989, +2181093, +2181130, +2181437, +2181649, +2181949, +2182148, +2183082, +2183153, +2183172, +2184106, +2184221, +2185220, +2185493, +2185508, +2186405, +2186493, +2186602, +2186769, +2187005, +2187268, +2189021, +2189105, +2189316, +2190045, +2190090, +2190340, +2190973, +2191114, +2191364, +2191965, +2192177, +2192317, +2192682, +2192925, +2195460, +2197821, +2199552, +2201213, +2201601, +2203261, +2203466, +2203652, +2204805, +2204957, +2205192, +2205533, +2214922, +2215933, +2215940, +2217309, +2217317, +2217388, +2217437, +2217476, +2217565, +2220036, +2220970, +2221284, +2221341, +2221572, +2222277, +2222634, +2222769, +2222941, +2225668, +2226346, +2226589, +2227204, +2227965, +2228230, +2228261, +2228294, +2228324, +2230021, +2230513, +2230749, +2230858, +2231496, +2231837, +2232293, +2232390, +2232420, +2233862, +2233957, +2234086, +2234149, +2234225, +2234298, +2234321, +2234461, +2234810, +2234845, +2234884, +2235709, +2235912, +2236253, +2236421, +2236516, +2237669, +2237830, +2237861, +2238141, +2238152, +2238481, +2238596, +2238630, +2238692, +2238749, +2238980, +2240101, +2240145, +2240196, +2240253, +2240517, +2240582, +2240612, +2242150, +2242245, +2242534, +2242596, +2242737, +2242853, +2242993, +2243014, +2243045, +2243080, +2243396, +2243441, +2243460, +2243505, +2243613, +2243626, +2244285, +2244612, +2245213, +2245220, +2246022, +2246117, +2246214, +2246277, +2246310, +2246341, +2246417, +2246597, +2246653, +2248708, +2248957, +2248964, +2249021, +2249028, +2249181, +2249188, +2249693, +2249700, +2250033, +2250077, +2250244, +2251749, +2251782, +2251877, +2252157, +2252296, +2252637, +2252805, +2252870, +2252957, +2252964, +2253245, +2253284, +2253373, +2253412, +2254141, +2254148, +2254397, +2254404, +2254493, +2254500, +2254685, +2254693, +2254756, +2254790, +2254853, +2254886, +2255037, +2255078, +2255165, +2255206, +2255325, +2255364, +2255421, +2255590, +2255645, +2255780, +2255942, +2256029, +2256069, +2256317, +2256389, +2256573, +2260996, +2262694, +2262789, +2263046, +2263109, +2263206, +2263237, +2263268, +2263409, +2263560, +2263889, +2263965, +2263985, +2264005, +2264036, +2264157, +2265092, +2266630, +2266725, +2266918, +2266949, +2266982, +2267109, +2267174, +2267205, +2267268, +2267345, +2267364, +2267421, +2267656, +2267997, +2273284, +2274790, +2274885, +2275037, +2275078, +2275205, +2275270, +2275301, +2275377, +2276100, +2276229, +2276317, +2277380, +2278918, +2279013, +2279270, +2279333, +2279366, +2279397, +2279473, +2279556, +2279613, +2279944, +2280285, +2280465, +2280893, +2281476, +2282853, +2282886, +2282917, +2282950, +2283013, +2283206, +2283237, +2283268, +2283325, +2283528, +2283869, +2285572, +2286461, +2286501, +2286598, +2286661, +2286790, +2286821, +2287005, +2287112, +2287434, +2287505, +2287605, +2287645, +2293764, +2295174, +2295269, +2295558, +2295589, +2295665, +2295709, +2298880, +2299905, +2300936, +2301258, +2301565, +2301924, +2302205, +2302244, +2302301, +2302340, +2302621, +2302628, +2302717, +2302724, +2303494, +2303709, +2303718, +2303805, +2303845, +2303910, +2303941, +2303972, +2304006, +2304036, +2304070, +2304101, +2304145, +2304253, +2304520, +2304861, +2307076, +2307357, +2307396, +2308646, +2308741, +2308893, +2308933, +2308998, +2309125, +2309156, +2309201, +2309220, +2309254, +2309309, +2310148, +2310181, +2310500, +2311781, +2311974, +2312004, +2312037, +2312177, +2312421, +2312477, +2312708, +2312741, +2312934, +2312997, +2313092, +2314565, +2314982, +2315013, +2315089, +2315172, +2315217, +2315389, +2316292, +2318141, +2326532, +2326845, +2326852, +2328038, +2328069, +2328317, +2328325, +2328518, +2328549, +2328580, +2328625, +2328797, +2329096, +2329418, +2330045, +2330129, +2330180, +2331165, +2331205, +2331933, +2331942, +2331973, +2332198, +2332229, +2332294, +2332325, +2332413, +2334724, +2334973, +2334980, +2335069, +2335076, +2336293, +2336509, +2336581, +2336637, +2336645, +2336733, +2336741, +2336964, +2336997, +2337053, +2337288, +2337629, +2337796, +2338013, +2338020, +2338109, +2338116, +2339142, +2339325, +2339333, +2339421, +2339430, +2339493, +2339526, +2339557, +2339588, +2339645, +2339848, +2340189, +2350084, +2350693, +2350758, +2350833, +2350909, +2356740, +2356797, +2357258, +2357941, +2358195, +2358325, +2358877, +2359281, +2359300, +2388829, +2392073, +2395645, +2395665, +2395837, +2396164, +2402461, +2490372, +2524669, +2524698, +2524989, +2654212, +2672893, +2949124, +2967357, +2967556, +2968573, +2968584, +2968925, +2969041, +2969117, +2972164, +2973149, +2973189, +2973361, +2973405, +2973700, +2975237, +2975473, +2975637, +2975747, +2975889, +2975925, +2975965, +2976264, +2976605, +2976618, +2976861, +2976868, +2977565, +2977700, +2978333, +3000320, +3001345, +3002378, +3003121, +3003261, +3006468, +3008893, +3008997, +3009028, +3009062, +3010845, +3011045, +3011171, +3011613, +3013635, +3013713, +3013731, +3013765, +3013821, +3014150, +3014237, +3014660, +3211037, +3211268, +3250909, +3252228, +3252541, +3538948, +3548157, +3549700, +3549821, +3550340, +3550493, +3550724, +3563421, +3637252, +3640701, +3640836, +3641277, +3641348, +3641661, +3641860, +3642205, +3642261, +3642277, +3642353, +3642394, +3642525, +3801109, +3808989, +3809301, +3810557, +3810613, +3812518, +3812581, +3812693, +3812774, +3812986, +3813221, +3813493, +3813541, +3813781, +3814725, +3814869, +3816765, +3817493, +3819589, +3819701, +3819741, +3824650, +3825309, +3825685, +3828477, +3828746, +3829565, +3833856, +3834689, +3835520, +3836353, +3836605, +3836609, +3837184, +3838017, +3838848, +3838909, +3838912, +3839005, +3839040, +3839101, +3839136, +3839229, +3839264, +3839421, +3839424, +3839681, +3839837, +3839841, +3839901, +3839905, +3840157, +3840161, +3840512, +3841345, +3842176, +3842269, +3842272, +3842429, +3842464, +3842749, +3842752, +3843005, +3843009, +3843840, +3843933, +3843936, +3844093, +3844096, +3844285, +3844288, +3844349, +3844416, +3844669, +3844673, +3845504, +3846337, +3847168, +3848001, +3848832, +3849665, +3850496, +3851329, +3852160, +3852993, +3853824, +3854657, +3855581, +3855616, +3856434, +3856449, +3857266, +3857281, +3857472, +3858290, +3858305, +3859122, +3859137, +3859328, +3860146, +3860161, +3860978, +3860993, +3861184, +3862002, +3862017, +3862834, +3862849, +3863040, +3863858, +3863873, +3864690, +3864705, +3864896, +3864929, +3864989, +3865032, +3866645, +3883013, +3884789, +3884901, +3886517, +3886757, +3886805, +3887237, +3887285, +3887345, +3887517, +3887973, +3888157, +3888165, +3888669, +3932165, +3932413, +3932421, +3932989, +3933029, +3933277, +3933285, +3933373, +3933381, +3933565, +3940356, +3941821, +3941893, +3942115, +3942365, +3942408, +3942749, +3942852, +3942901, +3942941, +3954692, +3956101, +3956232, +3956573, +3956723, +3956765, +3997700, +4004029, +4004074, +4004357, +4004605, +4005888, +4006977, +4008069, +4008291, +4008349, +4008456, +4008797, +4008913, +4008989, +4034090, +4035989, +4036010, +4036115, +4036138, +4036285, +4038698, +4040149, +4040170, +4040669, +4046852, +4047005, +4047012, +4047901, +4047908, +4047997, +4048004, +4048061, +4048100, +4048157, +4048164, +4048509, +4048516, +4048669, +4048676, +4048733, +4048740, +4048797, +4048964, +4049021, +4049124, +4049181, +4049188, +4049245, +4049252, +4049309, +4049316, +4049437, +4049444, +4049533, +4049540, +4049597, +4049636, +4049693, +4049700, +4049757, +4049764, +4049821, +4049828, +4049885, +4049892, +4049949, +4049956, +4050045, +4050052, +4050109, +4050148, +4050301, +4050308, +4050557, +4050564, +4050717, +4050724, +4050877, +4050884, +4050941, +4050948, +4051293, +4051300, +4051869, +4052004, +4052125, +4052132, +4052317, +4052324, +4052893, +4054546, +4054621, +4063253, +4064669, +4064789, +4067997, +4068373, +4068861, +4068917, +4069405, +4069429, +4069917, +4069941, +4071133, +4071434, +4071861, +4077021, +4078805, +4079741, +4080149, +4081565, +4081685, +4081981, +4082197, +4082269, +4082709, +4082909, +4087829, +4095860, +4096021, +4119325, +4119573, +4119997, +4120085, +4120509, +4120597, +4124317, +4124693, +4127549, +4127765, +4128157, +4128789, +4129181, +4129301, +4131101, +4131349, +4131677, +4131861, +4133149, +4133397, +4134365, +4134421, +4134493, +4136981, +4140861, +4140885, +4143517, +4143541, +4147869, +4148245, +4148701, +4148757, +4148925, +4149013, +4149117, +4149269, +4149501, +4149781, +4150589, +4150805, +4151037, +4151317, +4151421, +4151829, +4152061, +4153365, +4158077, +4158101, +4159869, +4161032, +4161373, +4194308, +5561309, +5562372, +5695165, +5695492, +5702621, +5702660, +5887069, +5887492, +6126653, +6225924, +6243293, +6291460, +6449533, +29360186, +29360221, +29361178, +29364253, +29368325, +29376029, +31457308, +33554397, +33554460, +35651549, +35651613, +//--Autogenerated -- end of section automatically generated +}; + +constexpr int maxUnicode = 0x10ffff; +constexpr int maskCategory = 0x1F; + +} + +// Each element in catRanges is the start of a range of Unicode characters in +// one general category. +// The value is comprised of a 21-bit character value shifted 5 bits and a 5 bit +// category matching the CharacterCategory enumeration. +// Initial version has 3249 entries and adds about 13K to the executable. +// The array is in ascending order so can be searched using binary search. +// Therefore the average call takes log2(3249) = 12 comparisons. +// For speed, it may be useful to make a linear table for the common values, +// possibly for 0..0xff for most Western European text or 0..0xfff for most +// alphabetic languages. + +CharacterCategory CategoriseCharacter(int character) { + if (character < 0 || character > maxUnicode) + return ccCn; + const int baseValue = character * (maskCategory+1) + maskCategory; + const int *placeAfter = std::lower_bound(catRanges, std::end(catRanges), baseValue); + return static_cast(*(placeAfter-1) & maskCategory); +} + +// Implementation of character sets recommended for identifiers in Unicode Standard Annex #31. +// http://unicode.org/reports/tr31/ + +namespace { + +enum class OtherID { oidNone, oidStart, oidContinue }; + +// Some characters are treated as valid for identifiers even +// though most characters from their category are not. +// Values copied from http://www.unicode.org/Public/9.0.0/ucd/PropList.txt +OtherID OtherIDOfCharacter(int character) noexcept { + if ( + (character == 0x1885) || // MONGOLIAN LETTER ALI GALI BALUDA + (character == 0x1886) || // MONGOLIAN LETTER ALI GALI THREE BALUDA + (character == 0x2118) || // SCRIPT CAPITAL P + (character == 0x212E) || // ESTIMATED SYMBOL + (character == 0x309B) || // KATAKANA-HIRAGANA VOICED SOUND MARK + (character == 0x309C)) { // KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + return OtherID::oidStart; + } else if ( + (character == 0x00B7) || // MIDDLE DOT + (character == 0x0387) || // GREEK ANO TELEIA + ((character >= 0x1369) && (character <= 0x1371)) || // ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE + (character == 0x19DA)) { // NEW TAI LUE THAM DIGIT ONE + return OtherID::oidContinue; + } else { + return OtherID::oidNone; + } +} + +// Determine if a character is in Ll|Lu|Lt|Lm|Lo|Nl|Mn|Mc|Nd|Pc and has +// Pattern_Syntax|Pattern_White_Space. +// As of Unicode 9, only VERTICAL TILDE which is in Lm and has Pattern_Syntax matches. +// Should really generate from PropList.txt a list of Pattern_Syntax and Pattern_White_Space. +constexpr bool IsIdPattern(int character) noexcept { + return character == 0x2E2F; +} + +bool OmitXidStart(int character) noexcept { + switch (character) { + case 0x037A: // GREEK YPOGEGRAMMENI + case 0x0E33: // THAI CHARACTER SARA AM + case 0x0EB3: // LAO VOWEL SIGN AM + case 0x309B: // KATAKANA-HIRAGANA VOICED SOUND MARK + case 0x309C: // KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + case 0xFC5E: // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM + case 0xFC5F: // ARABIC LIGATURE SHADDA WITH KASRATAN ISOLATED FORM + case 0xFC60: // ARABIC LIGATURE SHADDA WITH FATHA ISOLATED FORM + case 0xFC61: // ARABIC LIGATURE SHADDA WITH DAMMA ISOLATED FORM + case 0xFC62: // ARABIC LIGATURE SHADDA WITH KASRA ISOLATED FORM + case 0xFC63: // ARABIC LIGATURE SHADDA WITH SUPERSCRIPT ALEF ISOLATED FORM + case 0xFDFA: // ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM + case 0xFDFB: // ARABIC LIGATURE JALLAJALALOUHOU + case 0xFE70: // ARABIC FATHATAN ISOLATED FORM + case 0xFE72: // ARABIC DAMMATAN ISOLATED FORM + case 0xFE74: // ARABIC KASRATAN ISOLATED FORM + case 0xFE76: // ARABIC FATHA ISOLATED FORM + case 0xFE78: // ARABIC DAMMA ISOLATED FORM + case 0xFE7A: // ARABIC KASRA ISOLATED FORM + case 0xFE7C: // ARABIC SHADDA ISOLATED FORM + case 0xFE7E: // ARABIC SUKUN ISOLATED FORM + case 0xFF9E: // HALFWIDTH KATAKANA VOICED SOUND MARK + case 0xFF9F: // HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK + return true; + default: + return false; + } +} + +bool OmitXidContinue(int character) noexcept { + switch (character) { + case 0x037A: // GREEK YPOGEGRAMMENI + case 0x309B: // KATAKANA-HIRAGANA VOICED SOUND MARK + case 0x309C: // KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + case 0xFC5E: // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM + case 0xFC5F: // ARABIC LIGATURE SHADDA WITH KASRATAN ISOLATED FORM + case 0xFC60: // ARABIC LIGATURE SHADDA WITH FATHA ISOLATED FORM + case 0xFC61: // ARABIC LIGATURE SHADDA WITH DAMMA ISOLATED FORM + case 0xFC62: // ARABIC LIGATURE SHADDA WITH KASRA ISOLATED FORM + case 0xFC63: // ARABIC LIGATURE SHADDA WITH SUPERSCRIPT ALEF ISOLATED FORM + case 0xFDFA: // ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM + case 0xFDFB: // ARABIC LIGATURE JALLAJALALOUHOU + case 0xFE70: // ARABIC FATHATAN ISOLATED FORM + case 0xFE72: // ARABIC DAMMATAN ISOLATED FORM + case 0xFE74: // ARABIC KASRATAN ISOLATED FORM + case 0xFE76: // ARABIC FATHA ISOLATED FORM + case 0xFE78: // ARABIC DAMMA ISOLATED FORM + case 0xFE7A: // ARABIC KASRA ISOLATED FORM + case 0xFE7C: // ARABIC SHADDA ISOLATED FORM + case 0xFE7E: // ARABIC SUKUN ISOLATED FORM + return true; + default: + return false; + } +} + +} + +// UAX #31 defines ID_Start as +// [[:L:][:Nl:][:Other_ID_Start:]--[:Pattern_Syntax:]--[:Pattern_White_Space:]] +bool IsIdStart(int character) { + if (IsIdPattern(character)) { + return false; + } + const OtherID oid = OtherIDOfCharacter(character); + if (oid == OtherID::oidStart) { + return true; + } + const CharacterCategory c = CategoriseCharacter(character); + return (c == ccLl || c == ccLu || c == ccLt || c == ccLm || c == ccLo + || c == ccNl); +} + +// UAX #31 defines ID_Continue as +// [[:ID_Start:][:Mn:][:Mc:][:Nd:][:Pc:][:Other_ID_Continue:]--[:Pattern_Syntax:]--[:Pattern_White_Space:]] +bool IsIdContinue(int character) { + if (IsIdPattern(character)) { + return false; + } + const OtherID oid = OtherIDOfCharacter(character); + if (oid != OtherID::oidNone) { + return true; + } + const CharacterCategory c = CategoriseCharacter(character); + return (c == ccLl || c == ccLu || c == ccLt || c == ccLm || c == ccLo + || c == ccNl || c == ccMn || c == ccMc || c == ccNd || c == ccPc); +} + +// XID_Start is ID_Start modified for Normalization Form KC in UAX #31 +bool IsXidStart(int character) { + if (OmitXidStart(character)) { + return false; + } else { + return IsIdStart(character); + } +} + +// XID_Continue is ID_Continue modified for Normalization Form KC in UAX #31 +bool IsXidContinue(int character) { + if (OmitXidContinue(character)) { + return false; + } else { + return IsIdContinue(character); + } +} + +CharacterCategoryMap::CharacterCategoryMap() { + Optimize(256); +} + +int CharacterCategoryMap::Size() const noexcept { + return static_cast(dense.size()); +} + +void CharacterCategoryMap::Optimize(int countCharacters) { + const int characters = std::clamp(countCharacters, 256, maxUnicode + 1); + dense.resize(characters); + + int end = 0; + int index = 0; + int current = catRanges[index]; + ++index; + do { + const int next = catRanges[index]; + const unsigned char category = current & maskCategory; + current >>= 5; + end = std::min(characters, next >> 5); + while (current < end) { + dense[current++] = category; + } + current = next; + ++index; + } while (characters > end); +} + +} diff --git a/src/CharacterCategoryMap.h b/src/CharacterCategoryMap.h new file mode 100644 index 000000000..35706eda7 --- /dev/null +++ b/src/CharacterCategoryMap.h @@ -0,0 +1,52 @@ +// Scintilla source code edit control +/** @file CharacterCategoryMap.h + ** Returns the Unicode general category of a character. + ** Similar code to Lexilla's lexilla/lexlib/CharacterCategory.h but renamed + ** to avoid problems with builds that statically include both Scintilla and Lexilla. + **/ +// Copyright 2013 by Neil Hodgson +// The License.txt file describes the conditions under which this software may be distributed. + +#ifndef CHARACTERCATEGORYMAP_H +#define CHARACTERCATEGORYMAP_H + +namespace Scintilla { + +enum CharacterCategory { + ccLu, ccLl, ccLt, ccLm, ccLo, + ccMn, ccMc, ccMe, + ccNd, ccNl, ccNo, + ccPc, ccPd, ccPs, ccPe, ccPi, ccPf, ccPo, + ccSm, ccSc, ccSk, ccSo, + ccZs, ccZl, ccZp, + ccCc, ccCf, ccCs, ccCo, ccCn +}; + +CharacterCategory CategoriseCharacter(int character); + +// Common definitions of allowable characters in identifiers from UAX #31. +bool IsIdStart(int character); +bool IsIdContinue(int character); +bool IsXidStart(int character); +bool IsXidContinue(int character); + +class CharacterCategoryMap { +private: + std::vector dense; +public: + CharacterCategoryMap(); + CharacterCategory CategoryFor(int character) const { + if (static_cast(character) < dense.size()) { + return static_cast(dense[character]); + } else { + // binary search through ranges + return CategoriseCharacter(character); + } + } + int Size() const noexcept; + void Optimize(int countCharacters); +}; + +} + +#endif diff --git a/src/CharacterSet.cxx b/src/CharacterSet.cxx deleted file mode 100644 index b934c2dd4..000000000 --- a/src/CharacterSet.cxx +++ /dev/null @@ -1,52 +0,0 @@ -// Scintilla source code edit control -/** @file CharacterSet.cxx - ** Simple case functions for ASCII. - ** Lexer infrastructure. - **/ -// Copyright 1998-2010 by Neil Hodgson -// The License.txt file describes the conditions under which this software may be distributed. - -#include -#include - -#include "CharacterSet.h" - -using namespace Scintilla; - -namespace Scintilla { - -int CompareCaseInsensitive(const char *a, const char *b) noexcept { - while (*a && *b) { - if (*a != *b) { - const char upperA = MakeUpperCase(*a); - const char upperB = MakeUpperCase(*b); - if (upperA != upperB) - return upperA - upperB; - } - a++; - b++; - } - // Either *a or *b is nul - return *a - *b; -} - -int CompareNCaseInsensitive(const char *a, const char *b, size_t len) noexcept { - while (*a && *b && len) { - if (*a != *b) { - const char upperA = MakeUpperCase(*a); - const char upperB = MakeUpperCase(*b); - if (upperA != upperB) - return upperA - upperB; - } - a++; - b++; - len--; - } - if (len == 0) - return 0; - else - // Either *a or *b is nul - return *a - *b; -} - -} diff --git a/src/CharacterSet.h b/src/CharacterSet.h deleted file mode 100644 index a518c27fc..000000000 --- a/src/CharacterSet.h +++ /dev/null @@ -1,208 +0,0 @@ -// Scintilla source code edit control -/** @file CharacterSet.h - ** Encapsulates a set of characters. Used to test if a character is within a set. - **/ -// Copyright 2007 by Neil Hodgson -// The License.txt file describes the conditions under which this software may be distributed. - -#ifndef CHARACTERSET_H -#define CHARACTERSET_H - -namespace Scintilla { - -class CharacterSet { - int size; - bool valueAfter; - bool *bset; -public: - enum setBase { - setNone=0, - setLower=1, - setUpper=2, - setDigits=4, - setAlpha=setLower|setUpper, - setAlphaNum=setAlpha|setDigits - }; - CharacterSet(setBase base=setNone, const char *initialSet="", int size_=0x80, bool valueAfter_=false) { - size = size_; - valueAfter = valueAfter_; - bset = new bool[size]; - for (int i=0; i < size; i++) { - bset[i] = false; - } - AddString(initialSet); - if (base & setLower) - AddString("abcdefghijklmnopqrstuvwxyz"); - if (base & setUpper) - AddString("ABCDEFGHIJKLMNOPQRSTUVWXYZ"); - if (base & setDigits) - AddString("0123456789"); - } - CharacterSet(const CharacterSet &other) { - size = other.size; - valueAfter = other.valueAfter; - bset = new bool[size]; - for (int i=0; i < size; i++) { - bset[i] = other.bset[i]; - } - } - CharacterSet(CharacterSet &&other) noexcept { - size = other.size; - valueAfter = other.valueAfter; - bset = other.bset; - other.size = 0; - other.bset = nullptr; - } - CharacterSet &operator=(const CharacterSet &other) { - if (this != &other) { - bool *bsetNew = new bool[other.size]; - for (int i = 0; i < other.size; i++) { - bsetNew[i] = other.bset[i]; - } - delete[]bset; - size = other.size; - valueAfter = other.valueAfter; - bset = bsetNew; - } - return *this; - } - CharacterSet &operator=(CharacterSet &&other) noexcept { - if (this != &other) { - delete []bset; - size = other.size; - valueAfter = other.valueAfter; - bset = other.bset; - other.size = 0; - other.bset = nullptr; - } - return *this; - } - ~CharacterSet() { - delete []bset; - bset = nullptr; - size = 0; - } - void Add(int val) { - assert(val >= 0); - assert(val < size); - bset[val] = true; - } - void AddString(const char *setToAdd) { - for (const char *cp=setToAdd; *cp; cp++) { - const unsigned char uch = *cp; - assert(uch < size); - bset[uch] = true; - } - } - bool Contains(int val) const noexcept { - assert(val >= 0); - if (val < 0) return false; - return (val < size) ? bset[val] : valueAfter; - } - bool Contains(char ch) const noexcept { - // Overload char as char may be signed - const unsigned char uch = ch; - return Contains(uch); - } -}; - -// Functions for classifying characters - -constexpr bool IsASpace(int ch) noexcept { - return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d)); -} - -constexpr bool IsASpaceOrTab(int ch) noexcept { - return (ch == ' ') || (ch == '\t'); -} - -constexpr bool IsADigit(int ch) noexcept { - return (ch >= '0') && (ch <= '9'); -} - -constexpr bool IsADigit(int ch, int base) noexcept { - if (base <= 10) { - return (ch >= '0') && (ch < '0' + base); - } else { - return ((ch >= '0') && (ch <= '9')) || - ((ch >= 'A') && (ch < 'A' + base - 10)) || - ((ch >= 'a') && (ch < 'a' + base - 10)); - } -} - -constexpr bool IsASCII(int ch) noexcept { - return (ch >= 0) && (ch < 0x80); -} - -constexpr bool IsLowerCase(int ch) noexcept { - return (ch >= 'a') && (ch <= 'z'); -} - -constexpr bool IsUpperCase(int ch) noexcept { - return (ch >= 'A') && (ch <= 'Z'); -} - -constexpr bool IsUpperOrLowerCase(int ch) noexcept { - return IsUpperCase(ch) || IsLowerCase(ch); -} - -constexpr bool IsAlphaNumeric(int ch) noexcept { - return - ((ch >= '0') && (ch <= '9')) || - ((ch >= 'a') && (ch <= 'z')) || - ((ch >= 'A') && (ch <= 'Z')); -} - -/** - * Check if a character is a space. - * This is ASCII specific but is safe with chars >= 0x80. - */ -constexpr bool isspacechar(int ch) noexcept { - return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d)); -} - -constexpr bool iswordchar(int ch) noexcept { - return IsAlphaNumeric(ch) || ch == '.' || ch == '_'; -} - -constexpr bool iswordstart(int ch) noexcept { - return IsAlphaNumeric(ch) || ch == '_'; -} - -constexpr bool isoperator(int ch) noexcept { - if (IsAlphaNumeric(ch)) - return false; - if (ch == '%' || ch == '^' || ch == '&' || ch == '*' || - ch == '(' || ch == ')' || ch == '-' || ch == '+' || - ch == '=' || ch == '|' || ch == '{' || ch == '}' || - ch == '[' || ch == ']' || ch == ':' || ch == ';' || - ch == '<' || ch == '>' || ch == ',' || ch == '/' || - ch == '?' || ch == '!' || ch == '.' || ch == '~') - return true; - return false; -} - -// Simple case functions for ASCII supersets. - -template -constexpr T MakeUpperCase(T ch) noexcept { - if (ch < 'a' || ch > 'z') - return ch; - else - return ch - 'a' + 'A'; -} - -template -constexpr T MakeLowerCase(T ch) noexcept { - if (ch < 'A' || ch > 'Z') - return ch; - else - return ch - 'A' + 'a'; -} - -int CompareCaseInsensitive(const char *a, const char *b) noexcept; -int CompareNCaseInsensitive(const char *a, const char *b, size_t len) noexcept; - -} - -#endif diff --git a/src/CharacterType.cxx b/src/CharacterType.cxx new file mode 100644 index 000000000..04d6a2abe --- /dev/null +++ b/src/CharacterType.cxx @@ -0,0 +1,51 @@ +// Scintilla source code edit control +/** @file CharacterType.cxx + ** Tests for character type and case-insensitive comparisons. + **/ +// Copyright 1998-2010 by Neil Hodgson +// The License.txt file describes the conditions under which this software may be distributed. + +#include +#include + +#include "CharacterType.h" + +using namespace Scintilla; + +namespace Scintilla { + +int CompareCaseInsensitive(const char *a, const char *b) noexcept { + while (*a && *b) { + if (*a != *b) { + const char upperA = MakeUpperCase(*a); + const char upperB = MakeUpperCase(*b); + if (upperA != upperB) + return upperA - upperB; + } + a++; + b++; + } + // Either *a or *b is nul + return *a - *b; +} + +int CompareNCaseInsensitive(const char *a, const char *b, size_t len) noexcept { + while (*a && *b && len) { + if (*a != *b) { + const char upperA = MakeUpperCase(*a); + const char upperB = MakeUpperCase(*b); + if (upperA != upperB) + return upperA - upperB; + } + a++; + b++; + len--; + } + if (len == 0) + return 0; + else + // Either *a or *b is nul + return *a - *b; +} + +} diff --git a/src/CharacterType.h b/src/CharacterType.h new file mode 100644 index 000000000..1a478cbe4 --- /dev/null +++ b/src/CharacterType.h @@ -0,0 +1,112 @@ +// Scintilla source code edit control +/** @file CharacterType.h + ** Tests for character type and case-insensitive comparisons. + **/ +// Copyright 2007 by Neil Hodgson +// The License.txt file describes the conditions under which this software may be distributed. + +#ifndef CHARACTERTYPE_H +#define CHARACTERTYPE_H + +namespace Scintilla { + +// Functions for classifying characters + +constexpr bool IsASpace(int ch) noexcept { + return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d)); +} + +constexpr bool IsASpaceOrTab(int ch) noexcept { + return (ch == ' ') || (ch == '\t'); +} + +constexpr bool IsADigit(int ch) noexcept { + return (ch >= '0') && (ch <= '9'); +} + +constexpr bool IsADigit(int ch, int base) noexcept { + if (base <= 10) { + return (ch >= '0') && (ch < '0' + base); + } else { + return ((ch >= '0') && (ch <= '9')) || + ((ch >= 'A') && (ch < 'A' + base - 10)) || + ((ch >= 'a') && (ch < 'a' + base - 10)); + } +} + +constexpr bool IsASCII(int ch) noexcept { + return (ch >= 0) && (ch < 0x80); +} + +constexpr bool IsLowerCase(int ch) noexcept { + return (ch >= 'a') && (ch <= 'z'); +} + +constexpr bool IsUpperCase(int ch) noexcept { + return (ch >= 'A') && (ch <= 'Z'); +} + +constexpr bool IsUpperOrLowerCase(int ch) noexcept { + return IsUpperCase(ch) || IsLowerCase(ch); +} + +constexpr bool IsAlphaNumeric(int ch) noexcept { + return + ((ch >= '0') && (ch <= '9')) || + ((ch >= 'a') && (ch <= 'z')) || + ((ch >= 'A') && (ch <= 'Z')); +} + +/** + * Check if a character is a space. + * This is ASCII specific but is safe with chars >= 0x80. + */ +constexpr bool isspacechar(int ch) noexcept { + return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d)); +} + +constexpr bool iswordchar(int ch) noexcept { + return IsAlphaNumeric(ch) || ch == '.' || ch == '_'; +} + +constexpr bool iswordstart(int ch) noexcept { + return IsAlphaNumeric(ch) || ch == '_'; +} + +constexpr bool isoperator(int ch) noexcept { + if (IsAlphaNumeric(ch)) + return false; + if (ch == '%' || ch == '^' || ch == '&' || ch == '*' || + ch == '(' || ch == ')' || ch == '-' || ch == '+' || + ch == '=' || ch == '|' || ch == '{' || ch == '}' || + ch == '[' || ch == ']' || ch == ':' || ch == ';' || + ch == '<' || ch == '>' || ch == ',' || ch == '/' || + ch == '?' || ch == '!' || ch == '.' || ch == '~') + return true; + return false; +} + +// Simple case functions for ASCII supersets. + +template +constexpr T MakeUpperCase(T ch) noexcept { + if (ch < 'a' || ch > 'z') + return ch; + else + return ch - 'a' + 'A'; +} + +template +constexpr T MakeLowerCase(T ch) noexcept { + if (ch < 'A' || ch > 'Z') + return ch; + else + return ch - 'A' + 'a'; +} + +int CompareCaseInsensitive(const char *a, const char *b) noexcept; +int CompareNCaseInsensitive(const char *a, const char *b, size_t len) noexcept; + +} + +#endif diff --git a/src/Document.cxx b/src/Document.cxx index 0a4c8ca0b..ffbf8f557 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -32,8 +32,8 @@ #include "ILexer.h" #include "Scintilla.h" -#include "CharacterSet.h" -#include "CharacterCategory.h" +#include "CharacterType.h" +#include "CharacterCategoryMap.h" #include "Position.h" #include "SplitVector.h" #include "Partitioning.h" diff --git a/src/EditModel.cxx b/src/EditModel.cxx index 013f848ba..c7fe1c710 100644 --- a/src/EditModel.cxx +++ b/src/EditModel.cxx @@ -29,7 +29,7 @@ #include "ILexer.h" #include "Scintilla.h" -#include "CharacterCategory.h" +#include "CharacterCategoryMap.h" #include "Position.h" #include "UniqueString.h" diff --git a/src/EditView.cxx b/src/EditView.cxx index 08bdfb55c..1061d28fc 100644 --- a/src/EditView.cxx +++ b/src/EditView.cxx @@ -33,8 +33,8 @@ #include "ILexer.h" #include "Scintilla.h" -#include "CharacterSet.h" -#include "CharacterCategory.h" +#include "CharacterType.h" +#include "CharacterCategoryMap.h" #include "Position.h" #include "UniqueString.h" #include "SplitVector.h" diff --git a/src/Editor.cxx b/src/Editor.cxx index 66b8fe47d..adc9f3492 100644 --- a/src/Editor.cxx +++ b/src/Editor.cxx @@ -33,8 +33,8 @@ #include "ILexer.h" #include "Scintilla.h" -#include "CharacterSet.h" -#include "CharacterCategory.h" +#include "CharacterType.h" +#include "CharacterCategoryMap.h" #include "Position.h" #include "UniqueString.h" #include "SplitVector.h" diff --git a/src/MarginView.cxx b/src/MarginView.cxx index 308923ad8..c2dcbf32d 100644 --- a/src/MarginView.cxx +++ b/src/MarginView.cxx @@ -30,7 +30,7 @@ #include "ILexer.h" #include "Scintilla.h" -#include "CharacterCategory.h" +#include "CharacterCategoryMap.h" #include "Position.h" #include "UniqueString.h" #include "SplitVector.h" diff --git a/src/PositionCache.cxx b/src/PositionCache.cxx index 7516c82ec..0a9ef259a 100644 --- a/src/PositionCache.cxx +++ b/src/PositionCache.cxx @@ -29,7 +29,7 @@ #include "ILexer.h" #include "Scintilla.h" -#include "CharacterCategory.h" +#include "CharacterCategoryMap.h" #include "Position.h" #include "UniqueString.h" #include "SplitVector.h" diff --git a/src/ScintillaBase.cxx b/src/ScintillaBase.cxx index 0b9e8e41c..62c3fa658 100644 --- a/src/ScintillaBase.cxx +++ b/src/ScintillaBase.cxx @@ -28,7 +28,7 @@ #include "ILexer.h" #include "Scintilla.h" -#include "CharacterCategory.h" +#include "CharacterCategoryMap.h" #include "Position.h" #include "UniqueString.h" -- cgit v1.2.3