diff options
Diffstat (limited to 'regexec.c')
| -rw-r--r-- | regexec.c | 54 |
1 files changed, 50 insertions, 4 deletions
@@ -155,6 +155,52 @@ static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *); /* automatically gathered by fwd; do not hand-edit */ /* =====^!^===== end forwards =====^!^===== */ +#ifdef REGEX_UTF8 + +static inline chr * +nextchr(chr *s) +{ + unsigned char c = (unsigned char)*s; + + if (c < 0x80) /* 0xxxxxxx */ + return s + 1; + if ((c & 0xE0) == 0xC0) /* 110xxxxx */ + return s + 2; + if ((c & 0xF0) == 0xE0) /* 1110xxxx */ + return s + 3; + if ((c & 0xF8) == 0xF0) /* 11110xxx */ + return s + 4; + + /* invalid lead byte, including stray continuation byte */ + return s + 1; +} + +static inline chr * +prevchr(chr *s) +{ + do { + --s; + } while (((unsigned char)*s & 0xC0) == 0x80); + + return s; +} + +static inline pchr +getchr(const chr *s, const chr *end) +{ + wchar_t c = 0; + mbtowc(&c, (const char *)s, end - s); + return c; +} + +#else /* !REGEX_UTF8 */ + +static inline chr *nextchr(chr *s) { return s+1; } +static inline chr *prevchr(chr *s) { return s-1; } +static inline pchr getchr(const chr *s, const chr *end) { return *s; } + +#endif + /* - exec - match regular expression ^ int exec(regex_t *, const chr *, size_t, rm_detail_t *, @@ -353,7 +399,7 @@ find( d = newdfa(v, cnfa, cm, &v->dfa1); assert(!(ISERR() && d != NULL)); NOERR(); - for (begin = open; begin <= close; begin++) { + for (begin = open; begin <= close; begin = nextchr(begin)) { MDEBUG(("\nfind trying at %ld\n", LOFF(begin))); if (shorter) { end = shortest(v, d, begin, begin, v->stop, NULL, &hitend); @@ -478,7 +524,7 @@ cfindloop( open = cold; cold = NULL; MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close))); - for (begin = open; begin <= close; begin++) { + for (begin = open; begin <= close; begin = nextchr(begin)) { MDEBUG(("\ncfind trying at %ld\n", LOFF(begin))); estart = begin; estop = v->stop; @@ -525,9 +571,9 @@ cfindloop( */ if (shorter) { - estart = end + 1; + estart = nextchr(end); } else { - estop = end - 1; + estop = prevchr(end); } } } |
