aboutsummaryrefslogtreecommitdiff
path: root/regc_lex.c
diff options
context:
space:
mode:
Diffstat (limited to 'regc_lex.c')
-rw-r--r--regc_lex.c40
1 files changed, 28 insertions, 12 deletions
diff --git a/regc_lex.c b/regc_lex.c
index 4be02c6..ae71884 100644
--- a/regc_lex.c
+++ b/regc_lex.c
@@ -32,6 +32,7 @@
/* scanning macros (know about v) */
#define ATEOS() (v->now >= v->stop)
#define HAVE(n) (v->stop - v->now >= (n))
+/* will work only for ANSI characters */
#define NEXT1(c) (!ATEOS() && *v->now == CHR(c))
#define NEXT2(a,b) (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b))
#define NEXT3(a,b,c) \
@@ -45,6 +46,17 @@
#define FAILW(e) return (ERR(e), 0) /* ERR does SET(EOS) */
#define LASTTYPE(t) (v->lasttype == (t))
+/* return and skip the next (unicode) character */
+#ifdef REGEX_UTF8
+#define SKIPCHR(x) do { \
+ wchar_t __c; \
+ v->now += mbtowc(&__c, (const char *)v->now, v->stop - v->now); \
+ x = __c; \
+} while (0)
+#else
+#define SKIPCHR(x) do x = *v->now++; while (0)
+#endif
+
/* lexical contexts */
#define L_ERE 1 /* mainline ERE/ARE */
#define L_BRE 2 /* mainline BRE */
@@ -292,7 +304,7 @@ static int /* 1 normal, 0 failure */
next(
struct vars *v)
{
- chr c;
+ pchr c;
/*
* Errors yield an infinite sequence of failures.
@@ -371,7 +383,7 @@ next(
* Okay, time to actually get a character.
*/
- c = *v->now++;
+ SKIPCHR(c);
/*
* Deal with the easy contexts, punt EREs to code below.
@@ -697,11 +709,12 @@ next(
assert(!ATEOS());
if (!(v->cflags&REG_ADVF)) {/* only AREs have non-trivial escapes */
- if (iscalnum(*v->now)) {
+ SKIPCHR(c);
+ if (iscalnum(c)) {
NOTE(REG_UBSALNUM);
NOTE(REG_UUNSPEC);
}
- RETV(PLAIN, *v->now++);
+ RETV(PLAIN, c);
}
(DISCARD)lexescape(v);
if (ISERR()) {
@@ -741,7 +754,7 @@ static int /* not actually used, but convenient for RETV */
lexescape(
struct vars *v)
{
- chr c;
+ pchr c;
static chr alert[] = {
CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t')
};
@@ -753,7 +766,7 @@ lexescape(
assert(v->cflags&REG_ADVF);
assert(!ATEOS());
- c = *v->now++;
+ SKIPCHR(c);
if (!iscalnum(c)) {
RETV(PLAIN, c);
}
@@ -777,7 +790,8 @@ lexescape(
if (ATEOS()) {
FAILW(REG_EESCAPE);
}
- RETV(PLAIN, (chr)(*v->now++ & 037));
+ SKIPCHR(c);
+ RETV(PLAIN, c & 037);
break;
case CHR('d'):
NOTE(REG_ULOCALE);
@@ -911,6 +925,8 @@ lexescape(
- lexdigits - slurp up digits and return chr value
^ static chr lexdigits(struct vars *, int, int, int);
*/
+// FIXME: Perhaps directly return unsigned int.
+// Why should we be restricted to 0-255?
static chr /* chr value; errors signalled via ERR */
lexdigits(
struct vars *v,
@@ -972,7 +988,7 @@ brenext(
struct vars *v,
pchr pc)
{
- chr c = (chr)pc;
+ pchr c = pc;
switch (c) {
case CHR('*'):
@@ -1039,7 +1055,7 @@ brenext(
FAILW(REG_EESCAPE);
}
- c = *v->now++;
+ SKIPCHR(c);
switch (c) {
case CHR('{'):
INTOCON(L_BBND);
@@ -1147,7 +1163,7 @@ ch(void)
* use that it hardly matters.
^ static chr chrnamed(struct vars *, const chr *, const chr *, pchr);
*/
-static chr
+static pchr
chrnamed(
struct vars *v,
const chr *startp, /* start of name */
@@ -1166,12 +1182,12 @@ chrnamed(
v->err = errsave;
if (e != 0) {
- return (chr)lastresort;
+ return (pchr)lastresort;
}
cv = range(v, c, c, 0);
if (cv->nchrs == 0) {
- return (chr)lastresort;
+ return (pchr)lastresort;
}
return cv->chrs[0];
}