aboutsummaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c54
1 files changed, 50 insertions, 4 deletions
diff --git a/regexec.c b/regexec.c
index 24edb41..2f8a234 100644
--- a/regexec.c
+++ b/regexec.c
@@ -155,6 +155,52 @@ static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *);
/* automatically gathered by fwd; do not hand-edit */
/* =====^!^===== end forwards =====^!^===== */
+#ifdef REGEX_UTF8
+
+static inline chr *
+nextchr(chr *s)
+{
+ unsigned char c = (unsigned char)*s;
+
+ if (c < 0x80) /* 0xxxxxxx */
+ return s + 1;
+ if ((c & 0xE0) == 0xC0) /* 110xxxxx */
+ return s + 2;
+ if ((c & 0xF0) == 0xE0) /* 1110xxxx */
+ return s + 3;
+ if ((c & 0xF8) == 0xF0) /* 11110xxx */
+ return s + 4;
+
+ /* invalid lead byte, including stray continuation byte */
+ return s + 1;
+}
+
+static inline chr *
+prevchr(chr *s)
+{
+ do {
+ --s;
+ } while (((unsigned char)*s & 0xC0) == 0x80);
+
+ return s;
+}
+
+static inline pchr
+getchr(const chr *s, const chr *end)
+{
+ wchar_t c = 0;
+ mbtowc(&c, (const char *)s, end - s);
+ return c;
+}
+
+#else /* !REGEX_UTF8 */
+
+static inline chr *nextchr(chr *s) { return s+1; }
+static inline chr *prevchr(chr *s) { return s-1; }
+static inline pchr getchr(const chr *s, const chr *end) { return *s; }
+
+#endif
+
/*
- exec - match regular expression
^ int exec(regex_t *, const chr *, size_t, rm_detail_t *,
@@ -353,7 +399,7 @@ find(
d = newdfa(v, cnfa, cm, &v->dfa1);
assert(!(ISERR() && d != NULL));
NOERR();
- for (begin = open; begin <= close; begin++) {
+ for (begin = open; begin <= close; begin = nextchr(begin)) {
MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
if (shorter) {
end = shortest(v, d, begin, begin, v->stop, NULL, &hitend);
@@ -478,7 +524,7 @@ cfindloop(
open = cold;
cold = NULL;
MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
- for (begin = open; begin <= close; begin++) {
+ for (begin = open; begin <= close; begin = nextchr(begin)) {
MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
estart = begin;
estop = v->stop;
@@ -525,9 +571,9 @@ cfindloop(
*/
if (shorter) {
- estart = end + 1;
+ estart = nextchr(end);
} else {
- estop = end - 1;
+ estop = prevchr(end);
}
}
}