aboutsummaryrefslogtreecommitdiff
path: root/regc_locale.c
diff options
context:
space:
mode:
Diffstat (limited to 'regc_locale.c')
-rw-r--r--regc_locale.c91
1 files changed, 52 insertions, 39 deletions
diff --git a/regc_locale.c b/regc_locale.c
index a6bc3af..97aa702 100644
--- a/regc_locale.c
+++ b/regc_locale.c
@@ -120,12 +120,16 @@ static const struct cname {
* Unicode character-class tables.
*/
+// FIXME: Perhaps define a new type here, similar to the
+// original chr, so we don't waste space on the tables
+// in ASCII (non-UTF-8) builds.
+// Or perhaps pchr should just be like chr in the original implementation.
typedef struct crange {
- chr start;
- chr end;
+ pchr start;
+ pchr end;
} crange;
-#if defined(REGEX_STANDALONE) && ! defined(REGEX_WCHAR)
+#if defined(REGEX_STANDALONE) && ! defined(REGEX_UTF8)
static const crange alphaRangeTable[] = {
{0x41, 0x5a}, {0x61, 0x7a}
@@ -133,10 +137,10 @@ static const crange alphaRangeTable[] = {
#define NUM_ALPHA_RANGE (sizeof(alphaRangeTable)/sizeof(crange))
-static const chr alphaCharTable[] = {
+static const pchr alphaCharTable[] = {
};
-#define NUM_ALPHA_CHAR (sizeof(alphaCharTable)/sizeof(chr))
+#define NUM_ALPHA_CHAR (sizeof(alphaCharTable)/sizeof(pchr))
static const crange digitRangeTable[] = {
{0x30, 0x39}
@@ -150,11 +154,11 @@ static const crange punctRangeTable[] = {
#define NUM_PUNCT_RANGE (sizeof(punctRangeTable)/sizeof(crange))
-static const chr punctCharTable[] = {
+static const pchr punctCharTable[] = {
0x3a, 0x3b, 0x3f, 0x40, 0x5f, 0x7b, 0x7d
};
-#define NUM_PUNCT_CHAR (sizeof(punctCharTable)/sizeof(chr))
+#define NUM_PUNCT_CHAR (sizeof(punctCharTable)/sizeof(pchr))
static const crange spaceRangeTable[] = {
{0x09, 0x0d}
@@ -162,11 +166,11 @@ static const crange spaceRangeTable[] = {
#define NUM_SPACE_RANGE (sizeof(spaceRangeTable)/sizeof(crange))
-static const chr spaceCharTable[] = {
+static const pchr spaceCharTable[] = {
0x20
};
-#define NUM_SPACE_CHAR (sizeof(spaceCharTable)/sizeof(chr))
+#define NUM_SPACE_CHAR (sizeof(spaceCharTable)/sizeof(pchr))
static const crange lowerRangeTable[] = {
{0x61, 0x7a}
@@ -174,10 +178,10 @@ static const crange lowerRangeTable[] = {
#define NUM_LOWER_RANGE (sizeof(lowerRangeTable)/sizeof(crange))
-static const chr lowerCharTable[] = {
+static const pchr lowerCharTable[] = {
};
-#define NUM_LOWER_CHAR (sizeof(lowerCharTable)/sizeof(chr))
+#define NUM_LOWER_CHAR (sizeof(lowerCharTable)/sizeof(pchr))
static const crange upperRangeTable[] = {
{0x41, 0x5a}
@@ -185,10 +189,10 @@ static const crange upperRangeTable[] = {
#define NUM_UPPER_RANGE (sizeof(upperRangeTable)/sizeof(crange))
-static const chr upperCharTable[] = {
+static const pchr upperCharTable[] = {
};
-#define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(chr))
+#define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(pchr))
static const crange graphRangeTable[] = {
{0x21, 0x7e}
@@ -196,10 +200,10 @@ static const crange graphRangeTable[] = {
#define NUM_GRAPH_RANGE (sizeof(graphRangeTable)/sizeof(crange))
-static const chr graphCharTable[] = {
+static const pchr graphCharTable[] = {
};
-#define NUM_GRAPH_CHAR (sizeof(graphCharTable)/sizeof(chr))
+#define NUM_GRAPH_CHAR (sizeof(graphCharTable)/sizeof(pchr))
static const crange printRangeTable[] = {
{0x20, 0x7E}
@@ -207,10 +211,10 @@ static const crange printRangeTable[] = {
#define NUM_PRINT_RANGE (sizeof(printRangeTable)/sizeof(crange))
-static const chr printCharTable[] = {
+static const pchr printCharTable[] = {
};
-#define NUM_PRINT_CHAR (sizeof(printCharTable)/sizeof(chr))
+#define NUM_PRINT_CHAR (sizeof(printCharTable)/sizeof(pchr))
#else
/*
@@ -269,7 +273,7 @@ static const crange alphaRangeTable[] = {
#define NUM_ALPHA_RANGE (sizeof(alphaRangeTable)/sizeof(crange))
-static const chr alphaCharTable[] = {
+static const pchr alphaCharTable[] = {
0x00aa, 0x00b5, 0x00ba, 0x02d0, 0x02d1, 0x02ee, 0x037a, 0x0386, 0x038c,
0x04c7, 0x04c8, 0x04cb, 0x04cc, 0x04f8, 0x04f9, 0x0559, 0x06d5, 0x06e5,
0x06e6, 0x0710, 0x093d, 0x0950, 0x098f, 0x0990, 0x09b2, 0x09dc, 0x09dd,
@@ -285,7 +289,7 @@ static const chr alphaCharTable[] = {
0x309e, 0xfb1d, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfe74, 0xfffe
};
-#define NUM_ALPHA_CHAR (sizeof(alphaCharTable)/sizeof(chr))
+#define NUM_ALPHA_CHAR (sizeof(alphaCharTable)/sizeof(pchr))
/*
* Unicode: decimal digit characters
@@ -321,7 +325,7 @@ static const crange punctRangeTable[] = {
#define NUM_PUNCT_RANGE (sizeof(punctRangeTable)/sizeof(crange))
-static const chr punctCharTable[] = {
+static const pchr punctCharTable[] = {
0x003a, 0x003b, 0x003f, 0x0040, 0x005f, 0x007b, 0x007d, 0x00a1, 0x00ab,
0x00ad, 0x00b7, 0x00bb, 0x00bf, 0x037e, 0x0387, 0x0589, 0x058a, 0x05be,
0x05c0, 0x05c3, 0x05f3, 0x05f4, 0x060c, 0x061b, 0x061f, 0x06d4, 0x0964,
@@ -331,7 +335,7 @@ static const chr punctCharTable[] = {
0xfe6a, 0xfe6b, 0xff1a, 0xff1b, 0xff1f, 0xff20, 0xff3f, 0xff5b, 0xff5d
};
-#define NUM_PUNCT_CHAR (sizeof(punctCharTable)/sizeof(chr))
+#define NUM_PUNCT_CHAR (sizeof(punctCharTable)/sizeof(pchr))
/*
* Unicode: white space characters.
@@ -343,11 +347,11 @@ static const crange spaceRangeTable[] = {
#define NUM_SPACE_RANGE (sizeof(spaceRangeTable)/sizeof(crange))
-static const chr spaceCharTable[] = {
+static const pchr spaceCharTable[] = {
0x0020, 0x00a0, 0x1680, 0x2028, 0x2029, 0x202f, 0x3000
};
-#define NUM_SPACE_CHAR (sizeof(spaceCharTable)/sizeof(chr))
+#define NUM_SPACE_CHAR (sizeof(spaceCharTable)/sizeof(pchr))
/*
* Unicode: lowercase characters
@@ -366,7 +370,7 @@ static const crange lowerRangeTable[] = {
#define NUM_LOWER_RANGE (sizeof(lowerRangeTable)/sizeof(crange))
-static const chr lowerCharTable[] = {
+static const pchr lowerCharTable[] = {
0x00aa, 0x00b5, 0x00ba, 0x0101, 0x0103, 0x0105, 0x0107, 0x0109, 0x010b,
0x010d, 0x010f, 0x0111, 0x0113, 0x0115, 0x0117, 0x0119, 0x011b, 0x011d,
0x011f, 0x0121, 0x0123, 0x0125, 0x0127, 0x0129, 0x012b, 0x012d, 0x012f,
@@ -409,7 +413,7 @@ static const chr lowerCharTable[] = {
0x210f, 0x2113, 0x212f, 0x2134, 0x2139
};
-#define NUM_LOWER_CHAR (sizeof(lowerCharTable)/sizeof(chr))
+#define NUM_LOWER_CHAR (sizeof(lowerCharTable)/sizeof(pchr))
/*
* Unicode: uppercase characters.
@@ -428,7 +432,7 @@ static const crange upperRangeTable[] = {
#define NUM_UPPER_RANGE (sizeof(upperRangeTable)/sizeof(crange))
-static const chr upperCharTable[] = {
+static const pchr upperCharTable[] = {
0x0100, 0x0102, 0x0104, 0x0106, 0x0108, 0x010a, 0x010c, 0x010e, 0x0110,
0x0112, 0x0114, 0x0116, 0x0118, 0x011a, 0x011c, 0x011e, 0x0120, 0x0122,
0x0124, 0x0126, 0x0128, 0x012a, 0x012c, 0x012e, 0x0130, 0x0132, 0x0134,
@@ -471,7 +475,7 @@ static const chr upperCharTable[] = {
0x2131, 0x2133
};
-#define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(chr))
+#define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(pchr))
/*
* Unicode: unicode print characters excluding space.
@@ -599,7 +603,7 @@ static const crange graphRangeTable[] = {
#define NUM_GRAPH_RANGE (sizeof(graphRangeTable)/sizeof(crange))
-static const chr graphCharTable[] = {
+static const pchr graphCharTable[] = {
0x0374, 0x0375, 0x037a, 0x037e, 0x038c, 0x0488, 0x0489, 0x04c7, 0x04c8,
0x04cb, 0x04cc, 0x04f8, 0x04f9, 0x0589, 0x058a, 0x060c, 0x061b, 0x061f,
0x098f, 0x0990, 0x09b2, 0x09bc, 0x09c7, 0x09c8, 0x09d7, 0x09dc, 0x09dd,
@@ -616,7 +620,7 @@ static const chr graphCharTable[] = {
0x303e, 0x303f, 0xa4c6, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfe74
};
-#define NUM_GRAPH_CHAR (sizeof(graphCharTable)/sizeof(chr))
+#define NUM_GRAPH_CHAR (sizeof(graphCharTable)/sizeof(pchr))
/*
* Unicode: unicode print characters including space, i.e. all Letters (class
@@ -681,7 +685,7 @@ static const crange printRangeTable[] = {
#define NUM_PRINT_RANGE (sizeof(printRangeTable)/sizeof(crange))
-static const chr printCharTable[] = {
+static const pchr printCharTable[] = {
0x037A, 0x037E, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0589, 0x05BE,
0x05C0, 0x05C3, 0x060C, 0x061B, 0x061F, 0x06E9, 0x093D, 0x0950, 0x09B2,
0x0A5E, 0x0A8D, 0x0ABD, 0x0AD0, 0x0AE0, 0x0B3D, 0x0B9C, 0x0CDE, 0x0E01,
@@ -690,7 +694,7 @@ static const chr printCharTable[] = {
0x2070, 0x2300, 0x274D, 0x2756, 0x303F, 0xFB3E, 0xFE74
};
-#define NUM_PRINT_CHAR (sizeof(printCharTable)/sizeof(chr))
+#define NUM_PRINT_CHAR (sizeof(printCharTable)/sizeof(pchr))
#endif
/*
@@ -720,9 +724,17 @@ element(
assert(startp < endp);
len = endp - startp;
+#ifdef REGEX_UTF8
+ wchar_t c;
+ if (mbtowc(&c, (const char *)startp, len) == len) {
+ // single character
+ return c;
+ }
+#else
if (len == 1) {
return *startp;
}
+#endif
NOTE(REG_ULOCALE);
@@ -790,9 +802,9 @@ range(
for (c=a; c<=b; c++) {
addchr(cv, c);
- lc = Tcl_UniCharToLower((chr)c);
- uc = Tcl_UniCharToUpper((chr)c);
- tc = Tcl_UniCharToTitle((chr)c);
+ lc = Tcl_UniCharToLower(c);
+ uc = Tcl_UniCharToUpper(c);
+ tc = Tcl_UniCharToTitle(c);
if (c != lc) {
addchr(cv, lc);
}
@@ -859,7 +871,7 @@ eclass(
}
cv = getcvec(v, 1, 0);
assert(cv != NULL);
- addchr(cv, (chr)c);
+ addchr(cv, c);
return cv;
}
@@ -1097,12 +1109,12 @@ allcases(
pchr pc) /* character to get case equivs of */
{
struct cvec *cv;
- chr c = (chr)pc;
+ pchr c = pc;
chr lc, uc, tc;
- lc = Tcl_UniCharToLower((chr)c);
- uc = Tcl_UniCharToUpper((chr)c);
- tc = Tcl_UniCharToTitle((chr)c);
+ lc = Tcl_UniCharToLower(c);
+ uc = Tcl_UniCharToUpper(c);
+ tc = Tcl_UniCharToTitle(c);
if (tc != uc) {
cv = getcvec(v, 3, 0);
@@ -1147,6 +1159,7 @@ casecmp(
size_t len) /* exact length of comparison */
{
for (; len > 0; len--, x++, y++) {
+ // FIXME: Will fail if REGEX_UTF8.
if ((*x!=*y) && (Tcl_UniCharToLower(*x) != Tcl_UniCharToLower(*y))) {
return 1;
}