diff options
Diffstat (limited to 'regc_lex.c')
| -rw-r--r-- | regc_lex.c | 40 |
1 files changed, 28 insertions, 12 deletions
@@ -32,6 +32,7 @@ /* scanning macros (know about v) */ #define ATEOS() (v->now >= v->stop) #define HAVE(n) (v->stop - v->now >= (n)) +/* will work only for ANSI characters */ #define NEXT1(c) (!ATEOS() && *v->now == CHR(c)) #define NEXT2(a,b) (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b)) #define NEXT3(a,b,c) \ @@ -45,6 +46,17 @@ #define FAILW(e) return (ERR(e), 0) /* ERR does SET(EOS) */ #define LASTTYPE(t) (v->lasttype == (t)) +/* return and skip the next (unicode) character */ +#ifdef REGEX_UTF8 +#define SKIPCHR(x) do { \ + wchar_t __c; \ + v->now += mbtowc(&__c, (const char *)v->now, v->stop - v->now); \ + x = __c; \ +} while (0) +#else +#define SKIPCHR(x) do x = *v->now++; while (0) +#endif + /* lexical contexts */ #define L_ERE 1 /* mainline ERE/ARE */ #define L_BRE 2 /* mainline BRE */ @@ -292,7 +304,7 @@ static int /* 1 normal, 0 failure */ next( struct vars *v) { - chr c; + pchr c; /* * Errors yield an infinite sequence of failures. @@ -371,7 +383,7 @@ next( * Okay, time to actually get a character. */ - c = *v->now++; + SKIPCHR(c); /* * Deal with the easy contexts, punt EREs to code below. @@ -697,11 +709,12 @@ next( assert(!ATEOS()); if (!(v->cflags®_ADVF)) {/* only AREs have non-trivial escapes */ - if (iscalnum(*v->now)) { + SKIPCHR(c); + if (iscalnum(c)) { NOTE(REG_UBSALNUM); NOTE(REG_UUNSPEC); } - RETV(PLAIN, *v->now++); + RETV(PLAIN, c); } (DISCARD)lexescape(v); if (ISERR()) { @@ -741,7 +754,7 @@ static int /* not actually used, but convenient for RETV */ lexescape( struct vars *v) { - chr c; + pchr c; static chr alert[] = { CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t') }; @@ -753,7 +766,7 @@ lexescape( assert(v->cflags®_ADVF); assert(!ATEOS()); - c = *v->now++; + SKIPCHR(c); if (!iscalnum(c)) { RETV(PLAIN, c); } @@ -777,7 +790,8 @@ lexescape( if (ATEOS()) { FAILW(REG_EESCAPE); } - RETV(PLAIN, (chr)(*v->now++ & 037)); + SKIPCHR(c); + RETV(PLAIN, c & 037); break; case CHR('d'): NOTE(REG_ULOCALE); @@ -911,6 +925,8 @@ lexescape( - lexdigits - slurp up digits and return chr value ^ static chr lexdigits(struct vars *, int, int, int); */ +// FIXME: Perhaps directly return unsigned int. +// Why should we be restricted to 0-255? static chr /* chr value; errors signalled via ERR */ lexdigits( struct vars *v, @@ -972,7 +988,7 @@ brenext( struct vars *v, pchr pc) { - chr c = (chr)pc; + pchr c = pc; switch (c) { case CHR('*'): @@ -1039,7 +1055,7 @@ brenext( FAILW(REG_EESCAPE); } - c = *v->now++; + SKIPCHR(c); switch (c) { case CHR('{'): INTOCON(L_BBND); @@ -1147,7 +1163,7 @@ ch(void) * use that it hardly matters. ^ static chr chrnamed(struct vars *, const chr *, const chr *, pchr); */ -static chr +static pchr chrnamed( struct vars *v, const chr *startp, /* start of name */ @@ -1166,12 +1182,12 @@ chrnamed( v->err = errsave; if (e != 0) { - return (chr)lastresort; + return (pchr)lastresort; } cv = range(v, c, c, 0); if (cv->nchrs == 0) { - return (chr)lastresort; + return (pchr)lastresort; } return cv->chrs[0]; } |
