diff options
Diffstat (limited to 'regc_locale.c')
| -rw-r--r-- | regc_locale.c | 91 |
1 files changed, 52 insertions, 39 deletions
diff --git a/regc_locale.c b/regc_locale.c index a6bc3af..97aa702 100644 --- a/regc_locale.c +++ b/regc_locale.c @@ -120,12 +120,16 @@ static const struct cname { * Unicode character-class tables. */ +// FIXME: Perhaps define a new type here, similar to the +// original chr, so we don't waste space on the tables +// in ASCII (non-UTF-8) builds. +// Or perhaps pchr should just be like chr in the original implementation. typedef struct crange { - chr start; - chr end; + pchr start; + pchr end; } crange; -#if defined(REGEX_STANDALONE) && ! defined(REGEX_WCHAR) +#if defined(REGEX_STANDALONE) && ! defined(REGEX_UTF8) static const crange alphaRangeTable[] = { {0x41, 0x5a}, {0x61, 0x7a} @@ -133,10 +137,10 @@ static const crange alphaRangeTable[] = { #define NUM_ALPHA_RANGE (sizeof(alphaRangeTable)/sizeof(crange)) -static const chr alphaCharTable[] = { +static const pchr alphaCharTable[] = { }; -#define NUM_ALPHA_CHAR (sizeof(alphaCharTable)/sizeof(chr)) +#define NUM_ALPHA_CHAR (sizeof(alphaCharTable)/sizeof(pchr)) static const crange digitRangeTable[] = { {0x30, 0x39} @@ -150,11 +154,11 @@ static const crange punctRangeTable[] = { #define NUM_PUNCT_RANGE (sizeof(punctRangeTable)/sizeof(crange)) -static const chr punctCharTable[] = { +static const pchr punctCharTable[] = { 0x3a, 0x3b, 0x3f, 0x40, 0x5f, 0x7b, 0x7d }; -#define NUM_PUNCT_CHAR (sizeof(punctCharTable)/sizeof(chr)) +#define NUM_PUNCT_CHAR (sizeof(punctCharTable)/sizeof(pchr)) static const crange spaceRangeTable[] = { {0x09, 0x0d} @@ -162,11 +166,11 @@ static const crange spaceRangeTable[] = { #define NUM_SPACE_RANGE (sizeof(spaceRangeTable)/sizeof(crange)) -static const chr spaceCharTable[] = { +static const pchr spaceCharTable[] = { 0x20 }; -#define NUM_SPACE_CHAR (sizeof(spaceCharTable)/sizeof(chr)) +#define NUM_SPACE_CHAR (sizeof(spaceCharTable)/sizeof(pchr)) static const crange lowerRangeTable[] = { {0x61, 0x7a} @@ -174,10 +178,10 @@ static const crange lowerRangeTable[] = { #define NUM_LOWER_RANGE (sizeof(lowerRangeTable)/sizeof(crange)) -static const chr lowerCharTable[] = { +static const pchr lowerCharTable[] = { }; -#define NUM_LOWER_CHAR (sizeof(lowerCharTable)/sizeof(chr)) +#define NUM_LOWER_CHAR (sizeof(lowerCharTable)/sizeof(pchr)) static const crange upperRangeTable[] = { {0x41, 0x5a} @@ -185,10 +189,10 @@ static const crange upperRangeTable[] = { #define NUM_UPPER_RANGE (sizeof(upperRangeTable)/sizeof(crange)) -static const chr upperCharTable[] = { +static const pchr upperCharTable[] = { }; -#define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(chr)) +#define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(pchr)) static const crange graphRangeTable[] = { {0x21, 0x7e} @@ -196,10 +200,10 @@ static const crange graphRangeTable[] = { #define NUM_GRAPH_RANGE (sizeof(graphRangeTable)/sizeof(crange)) -static const chr graphCharTable[] = { +static const pchr graphCharTable[] = { }; -#define NUM_GRAPH_CHAR (sizeof(graphCharTable)/sizeof(chr)) +#define NUM_GRAPH_CHAR (sizeof(graphCharTable)/sizeof(pchr)) static const crange printRangeTable[] = { {0x20, 0x7E} @@ -207,10 +211,10 @@ static const crange printRangeTable[] = { #define NUM_PRINT_RANGE (sizeof(printRangeTable)/sizeof(crange)) -static const chr printCharTable[] = { +static const pchr printCharTable[] = { }; -#define NUM_PRINT_CHAR (sizeof(printCharTable)/sizeof(chr)) +#define NUM_PRINT_CHAR (sizeof(printCharTable)/sizeof(pchr)) #else /* @@ -269,7 +273,7 @@ static const crange alphaRangeTable[] = { #define NUM_ALPHA_RANGE (sizeof(alphaRangeTable)/sizeof(crange)) -static const chr alphaCharTable[] = { +static const pchr alphaCharTable[] = { 0x00aa, 0x00b5, 0x00ba, 0x02d0, 0x02d1, 0x02ee, 0x037a, 0x0386, 0x038c, 0x04c7, 0x04c8, 0x04cb, 0x04cc, 0x04f8, 0x04f9, 0x0559, 0x06d5, 0x06e5, 0x06e6, 0x0710, 0x093d, 0x0950, 0x098f, 0x0990, 0x09b2, 0x09dc, 0x09dd, @@ -285,7 +289,7 @@ static const chr alphaCharTable[] = { 0x309e, 0xfb1d, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfe74, 0xfffe }; -#define NUM_ALPHA_CHAR (sizeof(alphaCharTable)/sizeof(chr)) +#define NUM_ALPHA_CHAR (sizeof(alphaCharTable)/sizeof(pchr)) /* * Unicode: decimal digit characters @@ -321,7 +325,7 @@ static const crange punctRangeTable[] = { #define NUM_PUNCT_RANGE (sizeof(punctRangeTable)/sizeof(crange)) -static const chr punctCharTable[] = { +static const pchr punctCharTable[] = { 0x003a, 0x003b, 0x003f, 0x0040, 0x005f, 0x007b, 0x007d, 0x00a1, 0x00ab, 0x00ad, 0x00b7, 0x00bb, 0x00bf, 0x037e, 0x0387, 0x0589, 0x058a, 0x05be, 0x05c0, 0x05c3, 0x05f3, 0x05f4, 0x060c, 0x061b, 0x061f, 0x06d4, 0x0964, @@ -331,7 +335,7 @@ static const chr punctCharTable[] = { 0xfe6a, 0xfe6b, 0xff1a, 0xff1b, 0xff1f, 0xff20, 0xff3f, 0xff5b, 0xff5d }; -#define NUM_PUNCT_CHAR (sizeof(punctCharTable)/sizeof(chr)) +#define NUM_PUNCT_CHAR (sizeof(punctCharTable)/sizeof(pchr)) /* * Unicode: white space characters. @@ -343,11 +347,11 @@ static const crange spaceRangeTable[] = { #define NUM_SPACE_RANGE (sizeof(spaceRangeTable)/sizeof(crange)) -static const chr spaceCharTable[] = { +static const pchr spaceCharTable[] = { 0x0020, 0x00a0, 0x1680, 0x2028, 0x2029, 0x202f, 0x3000 }; -#define NUM_SPACE_CHAR (sizeof(spaceCharTable)/sizeof(chr)) +#define NUM_SPACE_CHAR (sizeof(spaceCharTable)/sizeof(pchr)) /* * Unicode: lowercase characters @@ -366,7 +370,7 @@ static const crange lowerRangeTable[] = { #define NUM_LOWER_RANGE (sizeof(lowerRangeTable)/sizeof(crange)) -static const chr lowerCharTable[] = { +static const pchr lowerCharTable[] = { 0x00aa, 0x00b5, 0x00ba, 0x0101, 0x0103, 0x0105, 0x0107, 0x0109, 0x010b, 0x010d, 0x010f, 0x0111, 0x0113, 0x0115, 0x0117, 0x0119, 0x011b, 0x011d, 0x011f, 0x0121, 0x0123, 0x0125, 0x0127, 0x0129, 0x012b, 0x012d, 0x012f, @@ -409,7 +413,7 @@ static const chr lowerCharTable[] = { 0x210f, 0x2113, 0x212f, 0x2134, 0x2139 }; -#define NUM_LOWER_CHAR (sizeof(lowerCharTable)/sizeof(chr)) +#define NUM_LOWER_CHAR (sizeof(lowerCharTable)/sizeof(pchr)) /* * Unicode: uppercase characters. @@ -428,7 +432,7 @@ static const crange upperRangeTable[] = { #define NUM_UPPER_RANGE (sizeof(upperRangeTable)/sizeof(crange)) -static const chr upperCharTable[] = { +static const pchr upperCharTable[] = { 0x0100, 0x0102, 0x0104, 0x0106, 0x0108, 0x010a, 0x010c, 0x010e, 0x0110, 0x0112, 0x0114, 0x0116, 0x0118, 0x011a, 0x011c, 0x011e, 0x0120, 0x0122, 0x0124, 0x0126, 0x0128, 0x012a, 0x012c, 0x012e, 0x0130, 0x0132, 0x0134, @@ -471,7 +475,7 @@ static const chr upperCharTable[] = { 0x2131, 0x2133 }; -#define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(chr)) +#define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(pchr)) /* * Unicode: unicode print characters excluding space. @@ -599,7 +603,7 @@ static const crange graphRangeTable[] = { #define NUM_GRAPH_RANGE (sizeof(graphRangeTable)/sizeof(crange)) -static const chr graphCharTable[] = { +static const pchr graphCharTable[] = { 0x0374, 0x0375, 0x037a, 0x037e, 0x038c, 0x0488, 0x0489, 0x04c7, 0x04c8, 0x04cb, 0x04cc, 0x04f8, 0x04f9, 0x0589, 0x058a, 0x060c, 0x061b, 0x061f, 0x098f, 0x0990, 0x09b2, 0x09bc, 0x09c7, 0x09c8, 0x09d7, 0x09dc, 0x09dd, @@ -616,7 +620,7 @@ static const chr graphCharTable[] = { 0x303e, 0x303f, 0xa4c6, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfe74 }; -#define NUM_GRAPH_CHAR (sizeof(graphCharTable)/sizeof(chr)) +#define NUM_GRAPH_CHAR (sizeof(graphCharTable)/sizeof(pchr)) /* * Unicode: unicode print characters including space, i.e. all Letters (class @@ -681,7 +685,7 @@ static const crange printRangeTable[] = { #define NUM_PRINT_RANGE (sizeof(printRangeTable)/sizeof(crange)) -static const chr printCharTable[] = { +static const pchr printCharTable[] = { 0x037A, 0x037E, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0589, 0x05BE, 0x05C0, 0x05C3, 0x060C, 0x061B, 0x061F, 0x06E9, 0x093D, 0x0950, 0x09B2, 0x0A5E, 0x0A8D, 0x0ABD, 0x0AD0, 0x0AE0, 0x0B3D, 0x0B9C, 0x0CDE, 0x0E01, @@ -690,7 +694,7 @@ static const chr printCharTable[] = { 0x2070, 0x2300, 0x274D, 0x2756, 0x303F, 0xFB3E, 0xFE74 }; -#define NUM_PRINT_CHAR (sizeof(printCharTable)/sizeof(chr)) +#define NUM_PRINT_CHAR (sizeof(printCharTable)/sizeof(pchr)) #endif /* @@ -720,9 +724,17 @@ element( assert(startp < endp); len = endp - startp; +#ifdef REGEX_UTF8 + wchar_t c; + if (mbtowc(&c, (const char *)startp, len) == len) { + // single character + return c; + } +#else if (len == 1) { return *startp; } +#endif NOTE(REG_ULOCALE); @@ -790,9 +802,9 @@ range( for (c=a; c<=b; c++) { addchr(cv, c); - lc = Tcl_UniCharToLower((chr)c); - uc = Tcl_UniCharToUpper((chr)c); - tc = Tcl_UniCharToTitle((chr)c); + lc = Tcl_UniCharToLower(c); + uc = Tcl_UniCharToUpper(c); + tc = Tcl_UniCharToTitle(c); if (c != lc) { addchr(cv, lc); } @@ -859,7 +871,7 @@ eclass( } cv = getcvec(v, 1, 0); assert(cv != NULL); - addchr(cv, (chr)c); + addchr(cv, c); return cv; } @@ -1097,12 +1109,12 @@ allcases( pchr pc) /* character to get case equivs of */ { struct cvec *cv; - chr c = (chr)pc; + pchr c = pc; chr lc, uc, tc; - lc = Tcl_UniCharToLower((chr)c); - uc = Tcl_UniCharToUpper((chr)c); - tc = Tcl_UniCharToTitle((chr)c); + lc = Tcl_UniCharToLower(c); + uc = Tcl_UniCharToUpper(c); + tc = Tcl_UniCharToTitle(c); if (tc != uc) { cv = getcvec(v, 3, 0); @@ -1147,6 +1159,7 @@ casecmp( size_t len) /* exact length of comparison */ { for (; len > 0; len--, x++, y++) { + // FIXME: Will fail if REGEX_UTF8. if ((*x!=*y) && (Tcl_UniCharToLower(*x) != Tcl_UniCharToLower(*y))) { return 1; } |
