21 files changed, 11164 insertions, 69 deletions
diff --git a/Makefile.am b/Makefile.am
index 3375ce8..10e54d4 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -5,7 +5,7 @@ ACLOCAL_AMFLAGS = -I m4
 if REPLACE_MALLOC
 MAYBE_DLMALLOC = contrib/dlmalloc
 endif
-SUBDIRS = lib $(MAYBE_DLMALLOC) contrib/rb3ptr src doc tests
+SUBDIRS = lib $(MAYBE_DLMALLOC) contrib/rb3ptr contrib/hsrex src doc tests
 
 dist_scitecodata_DATA = sample.teco_ini
 
diff --git a/configure.ac b/configure.ac
index b7a5636..5fe6fb7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -470,6 +470,7 @@ AC_CONFIG_FILES([GNUmakefile:Makefile.in src/GNUmakefile:src/Makefile.in]
                 [src/interface-curses/GNUmakefile:src/interface-curses/Makefile.in]
                 [contrib/dlmalloc/GNUmakefile:contrib/dlmalloc/Makefile.in]
                 [contrib/rb3ptr/GNUmakefile:contrib/rb3ptr/Makefile.in]
+                [contrib/hsrex/GNUmakefile:contrib/hsrex/Makefile.in]
                 [lib/GNUmakefile:lib/Makefile.in]
                 [doc/GNUmakefile:doc/Makefile.in doc/Doxyfile]
                 [tests/GNUmakefile:tests/Makefile.in tests/atlocal])
diff --git a/contrib/hsrex/Makefile.am b/contrib/hsrex/Makefile.am
new file mode 100644
index 0000000..11b979a
--- /dev/null
+++ b/contrib/hsrex/Makefile.am
@@ -0,0 +1,10 @@
+# FIXME: We probably need both ASCII and widechar versions
+# as separate libraries.
+AM_CPPFLAGS = -DREGEX_STANDALONE
+# -DREGEX_WCHAR
+
+noinst_LTLIBRARIES = libhswrex.la
+libhswrex_la_SOURCES = regcomp.c regexec.c regerror.c regfree.c regalone.c \
+                       regalone.h  regcustom.h regerrs.h regex.h regguts.h
+# included from regcomp.c and regexec.c
+EXTRA_libhswrex_la_SOURCES = regc_color.c regc_cvec.c regc_lex.c regc_locale.c regc_nfa.c
diff --git a/contrib/hsrex/regalone.c b/contrib/hsrex/regalone.c
new file mode 100644
index 0000000..e0a5fcc
--- /dev/null
+++ b/contrib/hsrex/regalone.c
@@ -0,0 +1,267 @@
+#ifdef REGEX_WCHAR
+
+#include "regcustom.h"
+
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_DStringInit --
+ *
+ *	Initializes a dynamic string, discarding any previous contents of the
+ *	string (Tcl_DStringFree should have been called already if the dynamic
+ *	string was previously in use).
+ *
+ * Results:
+ *	None.
+ *
+ * Side effects:
+ *	The dynamic string is initialized to be empty.
+ *
+ *----------------------------------------------------------------------
+ */
+
+void
+Tcl_DStringInit(
+    Tcl_DString *dsPtr)		/* Pointer to structure for dynamic string. */
+{
+    dsPtr->string = dsPtr->staticSpace;
+    dsPtr->length = 0;
+    dsPtr->spaceAvl = TCL_DSTRING_STATIC_SIZE;
+    dsPtr->staticSpace[0] = '\0';
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_DStringSetLength --
+ *
+ *	Change the length of a dynamic string. This can cause the string to
+ *	either grow or shrink, depending on the value of length.
+ *
+ * Results:
+ *	None.
+ *
+ * Side effects:
+ *	The length of dsPtr is changed to length and a null byte is stored at
+ *	that position in the string. If length is larger than the space
+ *	allocated for dsPtr, then a panic occurs.
+ *
+ *----------------------------------------------------------------------
+ */
+
+void
+Tcl_DStringSetLength(
+    Tcl_DString *dsPtr,		/* Structure describing dynamic string. */
+    int length)			/* New length for dynamic string. */
+{
+    int newsize;
+
+    if (length < 0) {
+	length = 0;
+    }
+    if (length >= dsPtr->spaceAvl) {
+	/*
+	 * There are two interesting cases here. In the first case, the user
+	 * may be trying to allocate a large buffer of a specific size. It
+	 * would be wasteful to overallocate that buffer, so we just allocate
+	 * enough for the requested size plus the trailing null byte. In the
+	 * second case, we are growing the buffer incrementally, so we need
+	 * behavior similar to Tcl_DStringAppend. The requested length will
+	 * usually be a small delta above the current spaceAvl, so we'll end
+	 * up doubling the old size. This won't grow the buffer quite as
+	 * quickly, but it should be close enough.
+	 */
+
+	newsize = dsPtr->spaceAvl * 2;
+	if (length < newsize) {
+	    dsPtr->spaceAvl = newsize;
+	} else {
+	    dsPtr->spaceAvl = length + 1;
+	}
+	if (dsPtr->string == dsPtr->staticSpace) {
+	    char *newString = ckalloc((unsigned) dsPtr->spaceAvl);
+
+	    memcpy(newString, dsPtr->string, (size_t) dsPtr->length);
+	    dsPtr->string = newString;
+	} else {
+	    dsPtr->string = (char *) ckrealloc((void *) dsPtr->string,
+		    (size_t) dsPtr->spaceAvl);
+	}
+    }
+    dsPtr->length = length;
+    dsPtr->string[length] = 0;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_DStringFree --
+ *
+ *	Frees up any memory allocated for the dynamic string and reinitializes
+ *	the string to an empty state.
+ *
+ * Results:
+ *	None.
+ *
+ * Side effects:
+ *	The previous contents of the dynamic string are lost, and the new
+ *	value is an empty string.
+ *
+ *----------------------------------------------------------------------
+ */
+
+void
+Tcl_DStringFree(
+    Tcl_DString *dsPtr)		/* Structure describing dynamic string. */
+{
+    if (dsPtr->string != dsPtr->staticSpace) {
+	ckfree(dsPtr->string);
+    }
+    dsPtr->string = dsPtr->staticSpace;
+    dsPtr->length = 0;
+    dsPtr->spaceAvl = TCL_DSTRING_STATIC_SIZE;
+    dsPtr->staticSpace[0] = '\0';
+}
+
+
+
+/*
+ * Unicode characters less than this value are represented by themselves in
+ * UTF-8 strings.
+ */
+
+#define UNICODE_SELF	0x80
+
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * Tcl_UniCharToUtf --
+ *
+ *	Store the given Tcl_UniChar as a sequence of UTF-8 bytes in the
+ *	provided buffer. Equivalent to Plan 9 runetochar().
+ *
+ * Results:
+ *	The return values is the number of bytes in the buffer that were
+ *	consumed.
+ *
+ * Side effects:
+ *	None.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+INLINE int
+Tcl_UniCharToUtf(
+    int ch,			/* The Tcl_UniChar to be stored in the
+				 * buffer. */
+    char *buf)			/* Buffer in which the UTF-8 representation of
+				 * the Tcl_UniChar is stored. Buffer must be
+				 * large enough to hold the UTF-8 character
+				 * (at most TCL_UTF_MAX bytes). */
+{
+    if ((ch > 0) && (ch < UNICODE_SELF)) {
+	buf[0] = (char) ch;
+	return 1;
+    }
+    if (ch >= 0) {
+	if (ch <= 0x7FF) {
+	    buf[1] = (char) ((ch | 0x80) & 0xBF);
+	    buf[0] = (char) ((ch >> 6) | 0xC0);
+	    return 2;
+	}
+	if (ch <= 0xFFFF) {
+	three:
+	    buf[2] = (char) ((ch | 0x80) & 0xBF);
+	    buf[1] = (char) (((ch >> 6) | 0x80) & 0xBF);
+	    buf[0] = (char) ((ch >> 12) | 0xE0);
+	    return 3;
+	}
+
+#if TCL_UTF_MAX > 3
+	if (ch <= 0x1FFFFF) {
+	    buf[3] = (char) ((ch | 0x80) & 0xBF);
+	    buf[2] = (char) (((ch >> 6) | 0x80) & 0xBF);
+	    buf[1] = (char) (((ch >> 12) | 0x80) & 0xBF);
+	    buf[0] = (char) ((ch >> 18) | 0xF0);
+	    return 4;
+	}
+	if (ch <= 0x3FFFFFF) {
+	    buf[4] = (char) ((ch | 0x80) & 0xBF);
+	    buf[3] = (char) (((ch >> 6) | 0x80) & 0xBF);
+	    buf[2] = (char) (((ch >> 12) | 0x80) & 0xBF);
+	    buf[1] = (char) (((ch >> 18) | 0x80) & 0xBF);
+	    buf[0] = (char) ((ch >> 24) | 0xF8);
+	    return 5;
+	}
+	if (ch <= 0x7FFFFFFF) {
+	    buf[5] = (char) ((ch | 0x80) & 0xBF);
+	    buf[4] = (char) (((ch >> 6) | 0x80) & 0xBF);
+	    buf[3] = (char) (((ch >> 12) | 0x80) & 0xBF);
+	    buf[2] = (char) (((ch >> 18) | 0x80) & 0xBF);
+	    buf[1] = (char) (((ch >> 24) | 0x80) & 0xBF);
+	    buf[0] = (char) ((ch >> 30) | 0xFC);
+	    return 6;
+	}
+#endif
+    }
+
+    ch = 0xFFFD;
+    goto three;
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * Tcl_UniCharToUtfDString --
+ *
+ *	Convert the given Unicode string to UTF-8.
+ *
+ * Results:
+ *	The return value is a pointer to the UTF-8 representation of the
+ *	Unicode string. Storage for the return value is appended to the end of
+ *	dsPtr.
+ *
+ * Side effects:
+ *	None.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+char *
+Tcl_UniCharToUtfDString(
+    const Tcl_UniChar *uniStr,	/* Unicode string to convert to UTF-8. */
+    int uniLength,		/* Length of Unicode string in Tcl_UniChars
+				 * (must be >= 0). */
+    Tcl_DString *dsPtr)		/* UTF-8 representation of string is appended
+				 * to this previously initialized DString. */
+{
+    const Tcl_UniChar *w, *wEnd;
+    char *p, *string;
+    int oldLength;
+
+    /*
+     * UTF-8 string length in bytes will be <= Unicode string length *
+     * TCL_UTF_MAX.
+     */
+
+    oldLength = Tcl_DStringLength(dsPtr);
+    Tcl_DStringSetLength(dsPtr, (oldLength + uniLength + 1) * TCL_UTF_MAX);
+    string = Tcl_DStringValue(dsPtr) + oldLength;
+
+    p = string;
+    wEnd = uniStr + uniLength;
+    for (w = uniStr; w < wEnd; ) {
+	p += Tcl_UniCharToUtf(*w, p);
+	w++;
+    }
+    Tcl_DStringSetLength(dsPtr, oldLength + (p - string));
+
+    return string;
+}
+
+#endif		/* REGEX_WCHAR	*/
diff --git a/contrib/hsrex/regalone.h b/contrib/hsrex/regalone.h
new file mode 100644
index 0000000..940c11d
--- /dev/null
+++ b/contrib/hsrex/regalone.h
@@ -0,0 +1,250 @@
+
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifndef REGEX_STANDALONE
+# define	REGEX_STANDALONE
+#endif
+
+#ifdef REGEX_WCHAR
+#	include <wctype.h>
+#	include <wchar.h>
+	typedef wchar_t chr;
+	typedef chr Tcl_UniChar;
+#else
+#	include <ctype.h>
+	typedef unsigned char chr;
+	typedef wchar_t Tcl_UniChar;
+#endif
+
+/*
+ * In The standalone version we are more concerned with performance,
+ * so an automatic var is our best choice.
+ */
+#define AllocVars(vPtr)					\
+		struct vars regex_autovar;		\
+		register struct vars *vPtr = &regex_autovar;
+
+#define MALLOC(n)	calloc(1,n)
+#define FREE(p)		free(VS(p))
+#define REALLOC(p,n)	realloc(VS(p),n)
+#define ckalloc(n)	calloc(1,n)
+#define ckrealloc(p,n)	realloc(p,n)
+#define ckfree(p)	free(p)
+
+#ifdef REGEX_WCHAR
+#	define Tcl_UniCharToLower(c)		towlower(c)
+#	define Tcl_UniCharToUpper(c)		towupper(c)
+#	define Tcl_UniCharToTitle(c)		towupper(c)
+#	define Tcl_UniCharIsAlpha(c)		iswalpha(c)
+#	define Tcl_UniCharIsAlnum(c)		iswalnum(c)
+#	define Tcl_UniCharIsDigit(c)		iswdigit(c)
+#	define Tcl_UniCharIsSpace(c)		iswspace(c)
+#else
+#	define Tcl_DStringInit(ds)
+#	define Tcl_UniCharToUtfDString(s,l,ds)	(s)
+#	define Tcl_DStringFree(ds)
+#	define Tcl_UniCharToLower(c)		tolower(c)
+#	define Tcl_UniCharToUpper(c)		toupper(c)
+#	define Tcl_UniCharToTitle(c)		toupper(c)
+#	define Tcl_UniCharIsAlpha(c)		isalpha(c)
+#	define Tcl_UniCharIsAlnum(c)		isalnum(c)
+#	define Tcl_UniCharIsDigit(c)		isdigit(c)
+#	define Tcl_UniCharIsSpace(c)		isspace(c)
+#endif
+
+
+/*
+ * The maximum number of bytes that are necessary to represent a single
+ * Unicode character in UTF-8. The valid values should be 3 or 6 (or perhaps 1
+ * if we want to support a non-unicode enabled core). If 3, then Tcl_UniChar
+ * must be 2-bytes in size (UCS-2) (the default). If 6, then Tcl_UniChar must
+ * be 4-bytes in size (UCS-4). At this time UCS-2 mode is the default and
+ * recommended mode. UCS-4 is experimental and not recommended. It works for
+ * the core, but most extensions expect UCS-2.
+ */
+
+#ifndef TCL_UTF_MAX
+#define TCL_UTF_MAX		3
+#endif
+
+
+/*
+ * The structure defined below is used to hold dynamic strings. The only
+ * fields that clients should use are string and length, accessible via the
+ * macros Tcl_DStringValue and Tcl_DStringLength.
+ */
+
+#define TCL_DSTRING_STATIC_SIZE 200
+typedef struct Tcl_DString {
+    char *string;		/* Points to beginning of string: either
+				 * staticSpace below or a malloced array. */
+    int length;			/* Number of non-NULL characters in the
+				 * string. */
+    int spaceAvl;		/* Total number of bytes available for the
+				 * string and its terminating NULL char. */
+    char staticSpace[TCL_DSTRING_STATIC_SIZE];
+				/* Space to use in common case where string is
+				 * small. */
+} Tcl_DString;
+
+#define Tcl_DStringLength(dsPtr) ((dsPtr)->length)
+#define Tcl_DStringValue(dsPtr) ((dsPtr)->string)
+
+
+/*
+ * The macro below is used to modify a "char" value (e.g. by casting it to an
+ * unsigned character) so that it can be used safely with macros such as
+ * isspace.
+ */
+
+#define UCHAR(c) ((unsigned char) (c))
+
+
+/*
+ * Used to tag functions that are only to be visible within the module being
+ * built and not outside it (where this is supported by the linker).
+ */
+
+#ifndef MODULE_SCOPE
+#   ifdef __cplusplus
+#	define MODULE_SCOPE extern "C"
+#   else
+#	define MODULE_SCOPE extern
+#   endif
+#endif
+
+
+/*
+ * Macros used to declare a function to be exported by a DLL. Used by Windows,
+ * maps to no-op declarations on non-Windows systems. The default build on
+ * windows is for a DLL, which causes the DLLIMPORT and DLLEXPORT macros to be
+ * nonempty. To build a static library, the macro STATIC_BUILD should be
+ * defined.
+ *
+ * Note: when building static but linking dynamically to MSVCRT we must still
+ *       correctly decorate the C library imported function.  Use CRTIMPORT
+ *       for this purpose.  _DLL is defined by the compiler when linking to
+ *       MSVCRT.
+ */
+
+#if (defined(__WIN32__) && (defined(_MSC_VER) || (__BORLANDC__ >= 0x0550) || defined(__LCC__) || defined(__WATCOMC__) || (defined(__GNUC__) && defined(__declspec))))
+#   define HAVE_DECLSPEC 1
+#   ifdef STATIC_BUILD
+#       define DLLIMPORT
+#       define DLLEXPORT
+#       ifdef _DLL
+#           define CRTIMPORT __declspec(dllimport)
+#       else
+#           define CRTIMPORT
+#       endif
+#   else
+#       define DLLIMPORT __declspec(dllimport)
+#       define DLLEXPORT __declspec(dllexport)
+#       define CRTIMPORT __declspec(dllimport)
+#   endif
+#else
+#   define DLLIMPORT
+#   if defined(__GNUC__) && __GNUC__ > 3
+#       define DLLEXPORT __attribute__ ((visibility("default")))
+#   else
+#       define DLLEXPORT
+#   endif
+#   define CRTIMPORT
+#endif
+
+/*
+ * These macros are used to control whether functions are being declared for
+ * import or export. If a function is being declared while it is being built
+ * to be included in a shared library, then it should have the DLLEXPORT
+ * storage class. If is being declared for use by a module that is going to
+ * link against the shared library, then it should have the DLLIMPORT storage
+ * class. If the symbol is beind declared for a static build or for use from a
+ * stub library, then the storage class should be empty.
+ *
+ * The convention is that a macro called BUILD_xxxx, where xxxx is the name of
+ * a library we are building, is set on the compile line for sources that are
+ * to be placed in the library. When this macro is set, the storage class will
+ * be set to DLLEXPORT. At the end of the header file, the storage class will
+ * be reset to DLLIMPORT.
+ */
+
+#undef TCL_STORAGE_CLASS
+#ifdef BUILD_tcl
+#   define TCL_STORAGE_CLASS DLLEXPORT
+#else
+#   ifdef USE_TCL_STUBS
+#      define TCL_STORAGE_CLASS
+#   else
+#      define TCL_STORAGE_CLASS DLLIMPORT
+#   endif
+#endif
+
+/*
+ * Definitions that allow this header file to be used either with or without
+ * ANSI C features like function prototypes.
+ */
+
+#undef _ANSI_ARGS_
+#undef CONST
+#ifndef INLINE
+#   define INLINE
+#endif
+
+#ifndef NO_CONST
+#   define CONST const
+#else
+#   define CONST
+#endif
+
+#ifndef NO_PROTOTYPES
+#   define _ANSI_ARGS_(x)	x
+#else
+#   define _ANSI_ARGS_(x)	()
+#endif
+
+#ifdef USE_NON_CONST
+#   ifdef USE_COMPAT_CONST
+#      error define at most one of USE_NON_CONST and USE_COMPAT_CONST
+#   endif
+#   define CONST84
+#   define CONST84_RETURN
+#else
+#   ifdef USE_COMPAT_CONST
+#      define CONST84
+#      define CONST84_RETURN CONST
+#   else
+#      define CONST84 CONST
+#      define CONST84_RETURN CONST
+#   endif
+#endif
+
+#ifndef CONST86
+#      define CONST86 CONST
+#endif
+
+/*
+ * Make sure EXTERN isn't defined elsewhere
+ */
+
+#ifdef EXTERN
+#   undef EXTERN
+#endif /* EXTERN */
+
+#ifdef __cplusplus
+#   define EXTERN extern "C" TCL_STORAGE_CLASS
+#else
+#   define EXTERN extern TCL_STORAGE_CLASS
+#endif
+
+
+#ifdef REGEX_WCHAR
+EXTERN void		Tcl_DStringFree (Tcl_DString * dsPtr);
+EXTERN void		Tcl_DStringInit (Tcl_DString * dsPtr);
+EXTERN char *		Tcl_UniCharToUtfDString (CONST Tcl_UniChar * uniStr, 
+				int uniLength, Tcl_DString * dsPtr);
+EXTERN void		Tcl_DStringSetLength (Tcl_DString * dsPtr, 
+				int length);
+#endif		/* REGEX_WCHAR	*/
diff --git a/contrib/hsrex/regc_color.c b/contrib/hsrex/regc_color.c
new file mode 100644
index 0000000..7a98dcb
--- /dev/null
+++ b/contrib/hsrex/regc_color.c
@@ -0,0 +1,848 @@
+/*
+ * colorings of characters
+ * This file is #included by regcomp.c.
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation of
+ * software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Note that there are some incestuous relationships between this code and NFA
+ * arc maintenance, which perhaps ought to be cleaned up sometime.
+ */
+
+#define	CISERR()	VISERR(cm->v)
+#define	CERR(e)		VERR(cm->v, (e))
+
+/*
+ - initcm - set up new colormap
+ ^ static void initcm(struct vars *, struct colormap *);
+ */
+static void
+initcm(
+    struct vars *v,
+    struct colormap *cm)
+{
+    int i;
+    int j;
+    union tree *t;
+    union tree *nextt;
+    struct colordesc *cd;
+
+    cm->magic = CMMAGIC;
+    cm->v = v;
+
+    cm->ncds = NINLINECDS;
+    cm->cd = cm->cdspace;
+    cm->max = 0;
+    cm->free = 0;
+
+    cd = cm->cd;		/* cm->cd[WHITE] */
+    cd->sub = NOSUB;
+    cd->arcs = NULL;
+    cd->flags = 0;
+    cd->nchrs = CHR_MAX - CHR_MIN + 1;
+
+    /*
+     * Upper levels of tree.
+     */
+
+    for (t=&cm->tree[0], j=NBYTS-1 ; j>0 ; t=nextt, j--) {
+	nextt = t + 1;
+	for (i=BYTTAB-1 ; i>=0 ; i--) {
+	    t->tptr[i] = nextt;
+	}
+    }
+
+    /*
+     * Bottom level is solid white.
+     */
+
+    t = &cm->tree[NBYTS-1];
+    for (i=BYTTAB-1 ; i>=0 ; i--) {
+	t->tcolor[i] = WHITE;
+    }
+    cd->block = t;
+}
+
+/*
+ - freecm - free dynamically-allocated things in a colormap
+ ^ static void freecm(struct colormap *);
+ */
+static void
+freecm(
+    struct colormap *cm)
+{
+    size_t i;
+    union tree *cb;
+
+    cm->magic = 0;
+    if (NBYTS > 1) {
+	cmtreefree(cm, cm->tree, 0);
+    }
+    for (i=1 ; i<=cm->max ; i++) {	/* skip WHITE */
+	if (!UNUSEDCOLOR(&cm->cd[i])) {
+	    cb = cm->cd[i].block;
+	    if (cb != NULL) {
+		FREE(cb);
+	    }
+	}
+    }
+    if (cm->cd != cm->cdspace) {
+	FREE(cm->cd);
+    }
+}
+
+/*
+ - cmtreefree - free a non-terminal part of a colormap tree
+ ^ static void cmtreefree(struct colormap *, union tree *, int);
+ */
+static void
+cmtreefree(
+    struct colormap *cm,
+    union tree *tree,
+    int level)			/* level number (top == 0) of this block */
+{
+    int i;
+    union tree *t;
+    union tree *fillt = &cm->tree[level+1];
+    union tree *cb;
+
+    assert(level < NBYTS-1);	/* this level has pointers */
+    for (i=BYTTAB-1 ; i>=0 ; i--) {
+	t = tree->tptr[i];
+	assert(t != NULL);
+	if (t != fillt) {
+	    if (level < NBYTS-2) {	/* more pointer blocks below */
+		cmtreefree(cm, t, level+1);
+		FREE(t);
+	    } else {		/* color block below */
+		cb = cm->cd[t->tcolor[0]].block;
+		if (t != cb) {	/* not a solid block */
+		    FREE(t);
+		}
+	    }
+	}
+    }
+}
+
+/*
+ - setcolor - set the color of a character in a colormap
+ ^ static color setcolor(struct colormap *, pchr, pcolor);
+ */
+static color			/* previous color */
+setcolor(
+    struct colormap *cm,
+    pchr c,
+    pcolor co)
+{
+    uchr uc = c;
+    int shift;
+    int level;
+    int b;
+    int bottom;
+    union tree *t;
+    union tree *newt;
+    union tree *fillt;
+    union tree *lastt;
+    union tree *cb;
+    color prev;
+
+    assert(cm->magic == CMMAGIC);
+    if (CISERR() || co == COLORLESS) {
+	return COLORLESS;
+    }
+
+    t = cm->tree;
+    for (level=0, shift=BYTBITS*(NBYTS-1) ; shift>0; level++, shift-=BYTBITS){
+	b = (uc >> shift) & BYTMASK;
+	lastt = t;
+	t = lastt->tptr[b];
+	assert(t != NULL);
+	fillt = &cm->tree[level+1];
+	bottom = (shift <= BYTBITS) ? 1 : 0;
+	cb = (bottom) ? cm->cd[t->tcolor[0]].block : fillt;
+	if (t == fillt || t == cb) {	/* must allocate a new block */
+	    newt = (union tree *) MALLOC((bottom) ?
+		    sizeof(struct colors) : sizeof(struct ptrs));
+	    if (newt == NULL) {
+		CERR(REG_ESPACE);
+		return COLORLESS;
+	    }
+	    if (bottom) {
+		memcpy(newt->tcolor, t->tcolor, BYTTAB*sizeof(color));
+	    } else {
+		memcpy(newt->tptr, t->tptr, BYTTAB*sizeof(union tree *));
+	    }
+	    t = newt;
+	    lastt->tptr[b] = t;
+	}
+    }
+
+    b = uc & BYTMASK;
+    prev = t->tcolor[b];
+    t->tcolor[b] = (color) co;
+    return prev;
+}
+
+/*
+ - maxcolor - report largest color number in use
+ ^ static color maxcolor(struct colormap *);
+ */
+static color
+maxcolor(
+    struct colormap *cm)
+{
+    if (CISERR()) {
+	return COLORLESS;
+    }
+
+    return (color) cm->max;
+}
+
+/*
+ - newcolor - find a new color (must be subject of setcolor at once)
+ * Beware: may relocate the colordescs.
+ ^ static color newcolor(struct colormap *);
+ */
+static color			/* COLORLESS for error */
+newcolor(
+    struct colormap *cm)
+{
+    struct colordesc *cd;
+    size_t n;
+
+    if (CISERR()) {
+	return COLORLESS;
+    }
+
+    if (cm->free != 0) {
+	assert(cm->free > 0);
+	assert((size_t) cm->free < cm->ncds);
+	cd = &cm->cd[cm->free];
+	assert(UNUSEDCOLOR(cd));
+	assert(cd->arcs == NULL);
+	cm->free = cd->sub;
+    } else if (cm->max < cm->ncds - 1) {
+	cm->max++;
+	cd = &cm->cd[cm->max];
+    } else {
+	struct colordesc *newCd;
+
+	/*
+	 * Oops, must allocate more.
+	 */
+
+	n = cm->ncds * 2;
+	if (cm->cd == cm->cdspace) {
+	    newCd = (struct colordesc *) MALLOC(n * sizeof(struct colordesc));
+	    if (newCd != NULL) {
+		memcpy(newCd, cm->cdspace,
+			cm->ncds * sizeof(struct colordesc));
+	    }
+	} else {
+	    newCd = (struct colordesc *)
+		    REALLOC(cm->cd, n * sizeof(struct colordesc));
+	}
+	if (newCd == NULL) {
+	    CERR(REG_ESPACE);
+	    return COLORLESS;
+	}
+	cm->cd = newCd;
+	cm->ncds = n;
+	assert(cm->max < cm->ncds - 1);
+	cm->max++;
+	cd = &cm->cd[cm->max];
+    }
+
+    cd->nchrs = 0;
+    cd->sub = NOSUB;
+    cd->arcs = NULL;
+    cd->flags = 0;
+    cd->block = NULL;
+
+    return (color) (cd - cm->cd);
+}
+
+/*
+ - freecolor - free a color (must have no arcs or subcolor)
+ ^ static void freecolor(struct colormap *, pcolor);
+ */
+static void
+freecolor(
+    struct colormap *cm,
+    pcolor co)
+{
+    struct colordesc *cd = &cm->cd[co];
+    color pco, nco;		/* for freelist scan */
+
+    assert(co >= 0);
+    if (co == WHITE) {
+	return;
+    }
+
+    assert(cd->arcs == NULL);
+    assert(cd->sub == NOSUB);
+    assert(cd->nchrs == 0);
+    cd->flags = FREECOL;
+    if (cd->block != NULL) {
+	FREE(cd->block);
+	cd->block = NULL;	/* just paranoia */
+    }
+
+    if ((size_t) co == cm->max) {
+	while (cm->max > WHITE && UNUSEDCOLOR(&cm->cd[cm->max])) {
+	    cm->max--;
+	}
+	assert(cm->free >= 0);
+	while ((size_t) cm->free > cm->max) {
+	    cm->free = cm->cd[cm->free].sub;
+	}
+	if (cm->free > 0) {
+	    assert(cm->free < cm->max);
+	    pco = cm->free;
+	    nco = cm->cd[pco].sub;
+	    while (nco > 0) {
+		if ((size_t) nco > cm->max) {
+		    /*
+		     * Take this one out of freelist.
+		     */
+
+		    nco = cm->cd[nco].sub;
+		    cm->cd[pco].sub = nco;
+		} else {
+		    assert(nco < cm->max);
+		    pco = nco;
+		    nco = cm->cd[pco].sub;
+		}
+	    }
+	}
+    } else {
+	cd->sub = cm->free;
+	cm->free = (color) (cd - cm->cd);
+    }
+}
+
+/*
+ - pseudocolor - allocate a false color, to be managed by other means
+ ^ static color pseudocolor(struct colormap *);
+ */
+static color
+pseudocolor(
+    struct colormap *cm)
+{
+    color co;
+
+    co = newcolor(cm);
+    if (CISERR()) {
+	return COLORLESS;
+    }
+    cm->cd[co].nchrs = 1;
+    cm->cd[co].flags = PSEUDO;
+    return co;
+}
+
+/*
+ - subcolor - allocate a new subcolor (if necessary) to this chr
+ ^ static color subcolor(struct colormap *, pchr c);
+ */
+static color
+subcolor(
+    struct colormap *cm,
+    pchr c)
+{
+    color co;			/* current color of c */
+    color sco;			/* new subcolor */
+
+    co = GETCOLOR(cm, c);
+    sco = newsub(cm, co);
+    if (CISERR()) {
+	return COLORLESS;
+    }
+    assert(sco != COLORLESS);
+
+    if (co == sco) {		/* already in an open subcolor */
+	return co;		/* rest is redundant */
+    }
+    cm->cd[co].nchrs--;
+    cm->cd[sco].nchrs++;
+    setcolor(cm, c, sco);
+    return sco;
+}
+
+/*
+ - newsub - allocate a new subcolor (if necessary) for a color
+ ^ static color newsub(struct colormap *, pcolor);
+ */
+static color
+newsub(
+    struct colormap *cm,
+    pcolor co)
+{
+    color sco;			/* new subcolor */
+
+    sco = cm->cd[co].sub;
+    if (sco == NOSUB) {		/* color has no open subcolor */
+	if (cm->cd[co].nchrs == 1) {	/* optimization */
+	    return co;
+	}
+	sco = newcolor(cm);	/* must create subcolor */
+	if (sco == COLORLESS) {
+	    assert(CISERR());
+	    return COLORLESS;
+	}
+	cm->cd[co].sub = sco;
+	cm->cd[sco].sub = sco;	/* open subcolor points to self */
+    }
+    assert(sco != NOSUB);
+
+    return sco;
+}
+
+/*
+ - subrange - allocate new subcolors to this range of chrs, fill in arcs
+ ^ static void subrange(struct vars *, pchr, pchr, struct state *,
+ ^ 	struct state *);
+ */
+static void
+subrange(
+    struct vars *v,
+    pchr from,
+    pchr to,
+    struct state *lp,
+    struct state *rp)
+{
+    uchr uf;
+    int i;
+
+    assert(from <= to);
+
+    /*
+     * First, align "from" on a tree-block boundary
+     */
+
+    uf = (uchr) from;
+    i = (int) (((uf + BYTTAB - 1) & (uchr) ~BYTMASK) - uf);
+    for (; from<=to && i>0; i--, from++) {
+	newarc(v->nfa, PLAIN, subcolor(v->cm, from), lp, rp);
+    }
+    if (from > to) {		/* didn't reach a boundary */
+	return;
+    }
+
+    /*
+     * Deal with whole blocks.
+     */
+
+    for (; to-from>=BYTTAB ; from+=BYTTAB) {
+	subblock(v, from, lp, rp);
+    }
+
+    /*
+     * Clean up any remaining partial table.
+     */
+
+    for (; from<=to ; from++) {
+	newarc(v->nfa, PLAIN, subcolor(v->cm, from), lp, rp);
+    }
+}
+
+/*
+ - subblock - allocate new subcolors for one tree block of chrs, fill in arcs
+ ^ static void subblock(struct vars *, pchr, struct state *, struct state *);
+ */
+static void
+subblock(
+    struct vars *v,
+    pchr start,			/* first of BYTTAB chrs */
+    struct state *lp,
+    struct state *rp)
+{
+    uchr uc = start;
+    struct colormap *cm = v->cm;
+    int shift;
+    int level;
+    int i;
+    int b;
+    union tree *t;
+    union tree *cb;
+    union tree *fillt;
+    union tree *lastt;
+    int previ;
+    int ndone;
+    color co;
+    color sco;
+
+    assert((uc % BYTTAB) == 0);
+
+    /*
+     * Find its color block, making new pointer blocks as needed.
+     */
+
+    t = cm->tree;
+    fillt = NULL;
+    for (level=0, shift=BYTBITS*(NBYTS-1); shift>0; level++, shift-=BYTBITS) {
+	b = (uc >> shift) & BYTMASK;
+	lastt = t;
+	t = lastt->tptr[b];
+	assert(t != NULL);
+	fillt = &cm->tree[level+1];
+	if (t == fillt && shift > BYTBITS) {	/* need new ptr block */
+	    t = (union tree *) MALLOC(sizeof(struct ptrs));
+	    if (t == NULL) {
+		CERR(REG_ESPACE);
+		return;
+	    }
+	    memcpy(t->tptr, fillt->tptr, BYTTAB*sizeof(union tree *));
+	    lastt->tptr[b] = t;
+	}
+    }
+
+    /*
+     * Special cases: fill block or solid block.
+     */
+    co = t->tcolor[0];
+    cb = cm->cd[co].block;
+    if (t == fillt || t == cb) {
+	/*
+	 * Either way, we want a subcolor solid block.
+	 */
+
+	sco = newsub(cm, co);
+	t = cm->cd[sco].block;
+	if (t == NULL) {	/* must set it up */
+	    t = (union tree *) MALLOC(sizeof(struct colors));
+	    if (t == NULL) {
+		CERR(REG_ESPACE);
+		return;
+	    }
+	    for (i=0 ; i<BYTTAB ; i++) {
+		t->tcolor[i] = sco;
+	    }
+	    cm->cd[sco].block = t;
+	}
+
+	/*
+	 * Find loop must have run at least once.
+	 */
+
+	lastt->tptr[b] = t;
+	newarc(v->nfa, PLAIN, sco, lp, rp);
+	cm->cd[co].nchrs -= BYTTAB;
+	cm->cd[sco].nchrs += BYTTAB;
+	return;
+    }
+
+    /*
+     * General case, a mixed block to be altered.
+     */
+
+    i = 0;
+    while (i < BYTTAB) {
+	co = t->tcolor[i];
+	sco = newsub(cm, co);
+	newarc(v->nfa, PLAIN, sco, lp, rp);
+	previ = i;
+	do {
+	    t->tcolor[i++] = sco;
+	} while (i < BYTTAB && t->tcolor[i] == co);
+	ndone = i - previ;
+	cm->cd[co].nchrs -= ndone;
+	cm->cd[sco].nchrs += ndone;
+    }
+}
+
+/*
+ - okcolors - promote subcolors to full colors
+ ^ static void okcolors(struct nfa *, struct colormap *);
+ */
+static void
+okcolors(
+    struct nfa *nfa,
+    struct colormap *cm)
+{
+    struct colordesc *cd;
+    struct colordesc *end = CDEND(cm);
+    struct colordesc *scd;
+    struct arc *a;
+    color co;
+    color sco;
+
+    for (cd=cm->cd, co=0 ; cd<end ; cd++, co++) {
+	sco = cd->sub;
+	if (UNUSEDCOLOR(cd) || sco == NOSUB) {
+	    /*
+	     * Has no subcolor, no further action.
+	     */
+	} else if (sco == co) {
+	    /*
+	     * Is subcolor, let parent deal with it.
+	     */
+	} else if (cd->nchrs == 0) {
+	    /*
+	     * Parent empty, its arcs change color to subcolor.
+	     */
+
+	    cd->sub = NOSUB;
+	    scd = &cm->cd[sco];
+	    assert(scd->nchrs > 0);
+	    assert(scd->sub == sco);
+	    scd->sub = NOSUB;
+	    while ((a = cd->arcs) != NULL) {
+		assert(a->co == co);
+		uncolorchain(cm, a);
+		a->co = sco;
+		colorchain(cm, a);
+	    }
+	    freecolor(cm, co);
+	} else {
+	    /*
+	     * Parent's arcs must gain parallel subcolor arcs.
+	     */
+
+	    cd->sub = NOSUB;
+	    scd = &cm->cd[sco];
+	    assert(scd->nchrs > 0);
+	    assert(scd->sub == sco);
+	    scd->sub = NOSUB;
+	    for (a=cd->arcs ; a!=NULL ; a=a->colorchain) {
+		assert(a->co == co);
+		newarc(nfa, a->type, sco, a->from, a->to);
+	    }
+	}
+    }
+}
+
+/*
+ - colorchain - add this arc to the color chain of its color
+ ^ static void colorchain(struct colormap *, struct arc *);
+ */
+static void
+colorchain(
+    struct colormap *cm,
+    struct arc *a)
+{
+    struct colordesc *cd = &cm->cd[a->co];
+
+    if (cd->arcs != NULL) {
+	cd->arcs->colorchainRev = a;
+    }
+    a->colorchain = cd->arcs;
+    a->colorchainRev = NULL;
+    cd->arcs = a;
+}
+
+/*
+ - uncolorchain - delete this arc from the color chain of its color
+ ^ static void uncolorchain(struct colormap *, struct arc *);
+ */
+static void
+uncolorchain(
+    struct colormap *cm,
+    struct arc *a)
+{
+    struct colordesc *cd = &cm->cd[a->co];
+    struct arc *aa = a->colorchainRev;
+
+    if (aa == NULL) {
+	assert(cd->arcs == a);
+	cd->arcs = a->colorchain;
+    } else {
+	assert(aa->colorchain == a);
+	aa->colorchain = a->colorchain;
+    }
+    if (a->colorchain != NULL) {
+	a->colorchain->colorchainRev = aa;
+    }
+    a->colorchain = NULL;	/* paranoia */
+    a->colorchainRev = NULL;
+}
+
+/*
+ - rainbow - add arcs of all full colors (but one) between specified states
+ ^ static void rainbow(struct nfa *, struct colormap *, int, pcolor,
+ ^ 	struct state *, struct state *);
+ */
+static void
+rainbow(
+    struct nfa *nfa,
+    struct colormap *cm,
+    int type,
+    pcolor but,			/* COLORLESS if no exceptions */
+    struct state *from,
+    struct state *to)
+{
+    struct colordesc *cd;
+    struct colordesc *end = CDEND(cm);
+    color co;
+
+    for (cd=cm->cd, co=0 ; cd<end && !CISERR(); cd++, co++) {
+	if (!UNUSEDCOLOR(cd) && (cd->sub != co) && (co != but)
+		&& !(cd->flags&PSEUDO)) {
+	    newarc(nfa, type, co, from, to);
+	}
+    }
+}
+
+/*
+ - colorcomplement - add arcs of complementary colors
+ * The calling sequence ought to be reconciled with cloneouts().
+ ^ static void colorcomplement(struct nfa *, struct colormap *, int,
+ ^ 	struct state *, struct state *, struct state *);
+ */
+static void
+colorcomplement(
+    struct nfa *nfa,
+    struct colormap *cm,
+    int type,
+    struct state *of,		/* complements of this guy's PLAIN outarcs */
+    struct state *from,
+    struct state *to)
+{
+    struct colordesc *cd;
+    struct colordesc *end = CDEND(cm);
+    color co;
+
+    assert(of != from);
+    for (cd=cm->cd, co=0 ; cd<end && !CISERR() ; cd++, co++) {
+	if (!UNUSEDCOLOR(cd) && !(cd->flags&PSEUDO)) {
+	    if (findarc(of, PLAIN, co) == NULL) {
+		newarc(nfa, type, co, from, to);
+	    }
+	}
+    }
+}
+
+#ifdef REG_DEBUG
+/*
+ ^ #ifdef REG_DEBUG
+ */
+
+/*
+ - dumpcolors - debugging output
+ ^ static void dumpcolors(struct colormap *, FILE *);
+ */
+static void
+dumpcolors(
+    struct colormap *cm,
+    FILE *f)
+{
+    struct colordesc *cd;
+    struct colordesc *end;
+    color co;
+    chr c;
+    char *has;
+
+    fprintf(f, "max %ld\n", (long) cm->max);
+    if (NBYTS > 1) {
+	fillcheck(cm, cm->tree, 0, f);
+    }
+    end = CDEND(cm);
+    for (cd=cm->cd+1, co=1 ; cd<end ; cd++, co++) {	/* skip 0 */
+	if (!UNUSEDCOLOR(cd)) {
+	    assert(cd->nchrs > 0);
+	    has = (cd->block != NULL) ? "#" : "";
+	    if (cd->flags&PSEUDO) {
+		fprintf(f, "#%2ld%s(ps): ", (long) co, has);
+	    } else {
+		fprintf(f, "#%2ld%s(%2d): ", (long) co, has, cd->nchrs);
+	    }
+
+	    /*
+	     * It's hard to do this more efficiently.
+	     */
+
+	    for (c=CHR_MIN ; c<CHR_MAX ; c++) {
+		if (GETCOLOR(cm, c) == co) {
+		    dumpchr(c, f);
+		}
+	    }
+	    assert(c == CHR_MAX);
+	    if (GETCOLOR(cm, c) == co) {
+		dumpchr(c, f);
+	    }
+	    fprintf(f, "\n");
+	}
+    }
+}
+
+/*
+ - fillcheck - check proper filling of a tree
+ ^ static void fillcheck(struct colormap *, union tree *, int, FILE *);
+ */
+static void
+fillcheck(
+    struct colormap *cm,
+    union tree *tree,
+    int level,			/* level number (top == 0) of this block */
+    FILE *f)
+{
+    int i;
+    union tree *t;
+    union tree *fillt = &cm->tree[level+1];
+
+    assert(level < NBYTS-1);	/* this level has pointers */
+    for (i=BYTTAB-1 ; i>=0 ; i--) {
+	t = tree->tptr[i];
+	if (t == NULL) {
+	    fprintf(f, "NULL found in filled tree!\n");
+	} else if (t == fillt) {
+	    /* empty body */
+	} else if (level < NBYTS-2) {	/* more pointer blocks below */
+	    fillcheck(cm, t, level+1, f);
+	}
+    }
+}
+
+/*
+ - dumpchr - print a chr
+ * Kind of char-centric but works well enough for debug use.
+ ^ static void dumpchr(pchr, FILE *);
+ */
+static void
+dumpchr(
+    pchr c,
+    FILE *f)
+{
+    if (c == '\\') {
+	fprintf(f, "\\\\");
+    } else if (c > ' ' && c <= '~') {
+	putc((char) c, f);
+    } else {
+	fprintf(f, "\\u%04lx", (long) c);
+    }
+}
+
+/*
+ ^ #endif
+ */
+#endif				/* ifdef REG_DEBUG */
+
+/*
+ * Local Variables:
+ * mode: c
+ * c-basic-offset: 4
+ * fill-column: 78
+ * End:
+ */
diff --git a/contrib/hsrex/regc_cvec.c b/contrib/hsrex/regc_cvec.c
new file mode 100644
index 0000000..0247521
--- /dev/null
+++ b/contrib/hsrex/regc_cvec.c
@@ -0,0 +1,146 @@
+/*
+ * Utility functions for handling cvecs
+ * This file is #included by regcomp.c.
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation of
+ * software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Notes:
+ * Only (selected) functions in _this_ file should treat chr* as non-constant.
+ */
+
+/*
+ - newcvec - allocate a new cvec
+ ^ static struct cvec *newcvec(int, int);
+ */
+static struct cvec *
+newcvec(
+    int nchrs,			/* to hold this many chrs... */
+    int nranges)		/* ... and this many ranges... */
+{
+    size_t nc = (size_t)nchrs + (size_t)nranges*2;
+    size_t n = sizeof(struct cvec) + nc*sizeof(chr);
+    struct cvec *cv = (struct cvec *) MALLOC(n);
+
+    if (cv == NULL) {
+	return NULL;
+    }
+    cv->chrspace = nchrs;
+    cv->chrs = (chr *)(((char *)cv)+sizeof(struct cvec));
+    cv->ranges = cv->chrs + nchrs;
+    cv->rangespace = nranges;
+    return clearcvec(cv);
+}
+
+/*
+ - clearcvec - clear a possibly-new cvec
+ * Returns pointer as convenience.
+ ^ static struct cvec *clearcvec(struct cvec *);
+ */
+static struct cvec *
+clearcvec(
+    struct cvec *cv)		/* character vector */
+{
+    assert(cv != NULL);
+    cv->nchrs = 0;
+    cv->nranges = 0;
+    return cv;
+}
+
+/*
+ - addchr - add a chr to a cvec
+ ^ static void addchr(struct cvec *, pchr);
+ */
+static void
+addchr(
+    struct cvec *cv,		/* character vector */
+    pchr c)			/* character to add */
+{
+    cv->chrs[cv->nchrs++] = (chr)c;
+}
+
+/*
+ - addrange - add a range to a cvec
+ ^ static void addrange(struct cvec *, pchr, pchr);
+ */
+static void
+addrange(
+    struct cvec *cv,		/* character vector */
+    pchr from,			/* first character of range */
+    pchr to)			/* last character of range */
+{
+    assert(cv->nranges < cv->rangespace);
+    cv->ranges[cv->nranges*2] = (chr)from;
+    cv->ranges[cv->nranges*2 + 1] = (chr)to;
+    cv->nranges++;
+}
+
+/*
+ - getcvec - get a cvec, remembering it as v->cv
+ ^ static struct cvec *getcvec(struct vars *, int, int);
+ */
+static struct cvec *
+getcvec(
+    struct vars *v,		/* context */
+    int nchrs,			/* to hold this many chrs... */
+    int nranges)		/* ... and this many ranges... */
+{
+    if ((v->cv != NULL) && (nchrs <= v->cv->chrspace) &&
+	    (nranges <= v->cv->rangespace)) {
+	return clearcvec(v->cv);
+    }
+
+    if (v->cv != NULL) {
+	freecvec(v->cv);
+    }
+    v->cv = newcvec(nchrs, nranges);
+    if (v->cv == NULL) {
+	ERR(REG_ESPACE);
+    }
+
+    return v->cv;
+}
+
+/*
+ - freecvec - free a cvec
+ ^ static void freecvec(struct cvec *);
+ */
+static void
+freecvec(
+    struct cvec *cv)		/* character vector */
+{
+    FREE(cv);
+}
+
+/*
+ * Local Variables:
+ * mode: c
+ * c-basic-offset: 4
+ * fill-column: 78
+ * End:
+ */
diff --git a/contrib/hsrex/regc_lex.c b/contrib/hsrex/regc_lex.c
new file mode 100644
index 0000000..4be02c6
--- /dev/null
+++ b/contrib/hsrex/regc_lex.c
@@ -0,0 +1,1185 @@
+/*
+ * lexical analyzer
+ * This file is #included by regcomp.c.
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results.  The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation of
+ * software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* scanning macros (know about v) */
+#define	ATEOS()		(v->now >= v->stop)
+#define	HAVE(n)		(v->stop - v->now >= (n))
+#define	NEXT1(c)	(!ATEOS() && *v->now == CHR(c))
+#define	NEXT2(a,b)	(HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b))
+#define	NEXT3(a,b,c) \
+	(HAVE(3) && *v->now == CHR(a) && \
+		*(v->now+1) == CHR(b) && \
+		*(v->now+2) == CHR(c))
+#define	SET(c)		(v->nexttype = (c))
+#define	SETV(c, n)	(v->nexttype = (c), v->nextvalue = (n))
+#define	RET(c)		return (SET(c), 1)
+#define	RETV(c, n)	return (SETV(c, n), 1)
+#define	FAILW(e)	return (ERR(e), 0)	/* ERR does SET(EOS) */
+#define	LASTTYPE(t)	(v->lasttype == (t))
+
+/* lexical contexts */
+#define	L_ERE	1	/* mainline ERE/ARE */
+#define	L_BRE	2	/* mainline BRE */
+#define	L_Q	3	/* REG_QUOTE */
+#define	L_EBND	4	/* ERE/ARE bound */
+#define	L_BBND	5	/* BRE bound */
+#define	L_BRACK	6	/* brackets */
+#define	L_CEL	7	/* collating element */
+#define	L_ECL	8	/* equivalence class */
+#define	L_CCL	9	/* character class */
+#define	INTOCON(c)	(v->lexcon = (c))
+#define	INCON(con)	(v->lexcon == (con))
+
+/* construct pointer past end of chr array */
+#define	ENDOF(array)	((array) + sizeof(array)/sizeof(chr))
+
+/*
+ - lexstart - set up lexical stuff, scan leading options
+ ^ static void lexstart(struct vars *);
+ */
+static void
+lexstart(
+    struct vars *v)
+{
+    prefixes(v);		/* may turn on new type bits etc. */
+    NOERR();
+
+    if (v->cflags&REG_QUOTE) {
+	assert(!(v->cflags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE)));
+	INTOCON(L_Q);
+    } else if (v->cflags&REG_EXTENDED) {
+	assert(!(v->cflags&REG_QUOTE));
+	INTOCON(L_ERE);
+    } else {
+	assert(!(v->cflags&(REG_QUOTE|REG_ADVF)));
+	INTOCON(L_BRE);
+    }
+
+    v->nexttype = EMPTY;	/* remember we were at the start */
+    next(v);			/* set up the first token */
+}
+
+/*
+ - prefixes - implement various special prefixes
+ ^ static void prefixes(struct vars *);
+ */
+static void
+prefixes(
+    struct vars *v)
+{
+    /*
+     * Literal string doesn't get any of this stuff.
+     */
+
+    if (v->cflags&REG_QUOTE) {
+	return;
+    }
+
+    /*
+     * Initial "***" gets special things.
+     */
+
+    if (HAVE(4) && NEXT3('*', '*', '*')) {
+	switch (*(v->now + 3)) {
+	case CHR('?'):		/* "***?" error, msg shows version */
+	    ERR(REG_BADPAT);
+	    return;		/* proceed no further */
+	    break;
+	case CHR('='):		/* "***=" shifts to literal string */
+	    NOTE(REG_UNONPOSIX);
+	    v->cflags |= REG_QUOTE;
+	    v->cflags &= ~(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE);
+	    v->now += 4;
+	    return;		/* and there can be no more prefixes */
+	    break;
+	case CHR(':'):		/* "***:" shifts to AREs */
+	    NOTE(REG_UNONPOSIX);
+	    v->cflags |= REG_ADVANCED;
+	    v->now += 4;
+	    break;
+	default:		/* otherwise *** is just an error */
+	    ERR(REG_BADRPT);
+	    return;
+	    break;
+	}
+    }
+
+    /*
+     * BREs and EREs don't get embedded options.
+     */
+
+    if ((v->cflags&REG_ADVANCED) != REG_ADVANCED) {
+	return;
+    }
+
+    /*
+     * Embedded options (AREs only).
+     */
+
+    if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2))) {
+	NOTE(REG_UNONPOSIX);
+	v->now += 2;
+	for (; !ATEOS() && iscalpha(*v->now); v->now++) {
+	    switch (*v->now) {
+	    case CHR('b'):	/* BREs (but why???) */
+		v->cflags &= ~(REG_ADVANCED|REG_QUOTE);
+		break;
+	    case CHR('c'):	/* case sensitive */
+		v->cflags &= ~REG_ICASE;
+		break;
+	    case CHR('e'):	/* plain EREs */
+		v->cflags |= REG_EXTENDED;
+		v->cflags &= ~(REG_ADVF|REG_QUOTE);
+		break;
+	    case CHR('i'):	/* case insensitive */
+		v->cflags |= REG_ICASE;
+		break;
+	    case CHR('m'):	/* Perloid synonym for n */
+	    case CHR('n'):	/* \n affects ^ $ . [^ */
+		v->cflags |= REG_NEWLINE;
+		break;
+	    case CHR('p'):	/* ~Perl, \n affects . [^ */
+		v->cflags |= REG_NLSTOP;
+		v->cflags &= ~REG_NLANCH;
+		break;
+	    case CHR('q'):	/* literal string */
+		v->cflags |= REG_QUOTE;
+		v->cflags &= ~REG_ADVANCED;
+		break;
+	    case CHR('s'):	/* single line, \n ordinary */
+		v->cflags &= ~REG_NEWLINE;
+		break;
+	    case CHR('t'):	/* tight syntax */
+		v->cflags &= ~REG_EXPANDED;
+		break;
+	    case CHR('w'):	/* weird, \n affects ^ $ only */
+		v->cflags &= ~REG_NLSTOP;
+		v->cflags |= REG_NLANCH;
+		break;
+	    case CHR('x'):	/* expanded syntax */
+		v->cflags |= REG_EXPANDED;
+		break;
+	    default:
+		ERR(REG_BADOPT);
+		return;
+	    }
+	}
+	if (!NEXT1(')')) {
+	    ERR(REG_BADOPT);
+	    return;
+	}
+	v->now++;
+	if (v->cflags&REG_QUOTE) {
+	    v->cflags &= ~(REG_EXPANDED|REG_NEWLINE);
+	}
+    }
+}
+
+/*
+ - lexnest - "call a subroutine", interpolating string at the lexical level
+ * Note, this is not a very general facility.  There are a number of
+ * implicit assumptions about what sorts of strings can be subroutines.
+ ^ static void lexnest(struct vars *, const chr *, const chr *);
+ */
+static void
+lexnest(
+    struct vars *v,
+    const chr *beginp,		/* start of interpolation */
+    const chr *endp)		/* one past end of interpolation */
+{
+    assert(v->savenow == NULL);	/* only one level of nesting */
+    v->savenow = v->now;
+    v->savestop = v->stop;
+    v->now = beginp;
+    v->stop = endp;
+}
+
+/*
+ * string constants to interpolate as expansions of things like \d
+ */
+
+static const chr backd[] = {	/* \d */
+    CHR('['), CHR('['), CHR(':'),
+    CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
+    CHR(':'), CHR(']'), CHR(']')
+};
+static const chr backD[] = {	/* \D */
+    CHR('['), CHR('^'), CHR('['), CHR(':'),
+    CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
+    CHR(':'), CHR(']'), CHR(']')
+};
+static const chr brbackd[] = {	/* \d within brackets */
+    CHR('['), CHR(':'),
+    CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
+    CHR(':'), CHR(']')
+};
+static const chr backs[] = {	/* \s */
+    CHR('['), CHR('['), CHR(':'),
+    CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
+    CHR(':'), CHR(']'), CHR(']')
+};
+static const chr backS[] = {	/* \S */
+    CHR('['), CHR('^'), CHR('['), CHR(':'),
+    CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
+    CHR(':'), CHR(']'), CHR(']')
+};
+static const chr brbacks[] = {	/* \s within brackets */
+    CHR('['), CHR(':'),
+    CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
+    CHR(':'), CHR(']')
+};
+static const chr backw[] = {	/* \w */
+    CHR('['), CHR('['), CHR(':'),
+    CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
+    CHR(':'), CHR(']'), CHR('_'), CHR(']')
+};
+static const chr backW[] = {	/* \W */
+    CHR('['), CHR('^'), CHR('['), CHR(':'),
+    CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
+    CHR(':'), CHR(']'), CHR('_'), CHR(']')
+};
+static const chr brbackw[] = {	/* \w within brackets */
+    CHR('['), CHR(':'),
+    CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
+    CHR(':'), CHR(']'), CHR('_')
+};
+
+/*
+ - lexword - interpolate a bracket expression for word characters
+ * Possibly ought to inquire whether there is a "word" character class.
+ ^ static void lexword(struct vars *);
+ */
+static void
+lexword(
+    struct vars *v)
+{
+    lexnest(v, backw, ENDOF(backw));
+}
+
+/*
+ - next - get next token
+ ^ static int next(struct vars *);
+ */
+static int			/* 1 normal, 0 failure */
+next(
+    struct vars *v)
+{
+    chr c;
+
+    /*
+     * Errors yield an infinite sequence of failures.
+     */
+
+    if (ISERR()) {
+	return 0;		/* the error has set nexttype to EOS */
+    }
+
+    /*
+     * Remember flavor of last token.
+     */
+
+    v->lasttype = v->nexttype;
+
+    /*
+     * REG_BOSONLY
+     */
+
+    if (v->nexttype == EMPTY && (v->cflags&REG_BOSONLY)) {
+	/* at start of a REG_BOSONLY RE */
+	RETV(SBEGIN, 0);	/* same as \A */
+    }
+
+    /*
+     * If we're nested and we've hit end, return to outer level.
+     */
+
+    if (v->savenow != NULL && ATEOS()) {
+	v->now = v->savenow;
+	v->stop = v->savestop;
+	v->savenow = v->savestop = NULL;
+    }
+
+    /*
+     * Skip white space etc. if appropriate (not in literal or [])
+     */
+
+    if (v->cflags&REG_EXPANDED) {
+	switch (v->lexcon) {
+	case L_ERE:
+	case L_BRE:
+	case L_EBND:
+	case L_BBND:
+	    skip(v);
+	    break;
+	}
+    }
+
+    /*
+     * Handle EOS, depending on context.
+     */
+
+    if (ATEOS()) {
+	switch (v->lexcon) {
+	case L_ERE:
+	case L_BRE:
+	case L_Q:
+	    RET(EOS);
+	    break;
+	case L_EBND:
+	case L_BBND:
+	    FAILW(REG_EBRACE);
+	    break;
+	case L_BRACK:
+	case L_CEL:
+	case L_ECL:
+	case L_CCL:
+	    FAILW(REG_EBRACK);
+	    break;
+	}
+	assert(NOTREACHED);
+    }
+
+    /*
+     * Okay, time to actually get a character.
+     */
+
+    c = *v->now++;
+
+    /*
+     * Deal with the easy contexts, punt EREs to code below.
+     */
+
+    switch (v->lexcon) {
+    case L_BRE:			/* punt BREs to separate function */
+	return brenext(v, c);
+	break;
+    case L_ERE:			/* see below */
+	break;
+    case L_Q:			/* literal strings are easy */
+	RETV(PLAIN, c);
+	break;
+    case L_BBND:		/* bounds are fairly simple */
+    case L_EBND:
+	switch (c) {
+	case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'):
+	case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'):
+	case CHR('8'): case CHR('9'):
+	    RETV(DIGIT, (chr)DIGITVAL(c));
+	    break;
+	case CHR(','):
+	    RET(',');
+	    break;
+	case CHR('}'):		/* ERE bound ends with } */
+	    if (INCON(L_EBND)) {
+		INTOCON(L_ERE);
+		if ((v->cflags&REG_ADVF) && NEXT1('?')) {
+		    v->now++;
+		    NOTE(REG_UNONPOSIX);
+		    RETV('}', 0);
+		}
+		RETV('}', 1);
+	    } else {
+		FAILW(REG_BADBR);
+	    }
+	    break;
+	case CHR('\\'):		/* BRE bound ends with \} */
+	    if (INCON(L_BBND) && NEXT1('}')) {
+		v->now++;
+		INTOCON(L_BRE);
+		RET('}');
+	    } else {
+		FAILW(REG_BADBR);
+	    }
+	    break;
+	default:
+	    FAILW(REG_BADBR);
+	    break;
+	}
+	assert(NOTREACHED);
+	break;
+    case L_BRACK:		/* brackets are not too hard */
+	switch (c) {
+	case CHR(']'):
+	    if (LASTTYPE('[')) {
+		RETV(PLAIN, c);
+	    } else {
+		INTOCON((v->cflags&REG_EXTENDED) ? L_ERE : L_BRE);
+		RET(']');
+	    }
+	    break;
+	case CHR('\\'):
+	    NOTE(REG_UBBS);
+	    if (!(v->cflags&REG_ADVF)) {
+		RETV(PLAIN, c);
+	    }
+	    NOTE(REG_UNONPOSIX);
+	    if (ATEOS()) {
+		FAILW(REG_EESCAPE);
+	    }
+	    (DISCARD)lexescape(v);
+	    switch (v->nexttype) {	/* not all escapes okay here */
+	    case PLAIN:
+		return 1;
+		break;
+	    case CCLASS:
+		switch (v->nextvalue) {
+		case 'd':
+		    lexnest(v, brbackd, ENDOF(brbackd));
+		    break;
+		case 's':
+		    lexnest(v, brbacks, ENDOF(brbacks));
+		    break;
+		case 'w':
+		    lexnest(v, brbackw, ENDOF(brbackw));
+		    break;
+		default:
+		    FAILW(REG_EESCAPE);
+		    break;
+		}
+
+		/*
+		 * lexnest() done, back up and try again.
+		 */
+
+		v->nexttype = v->lasttype;
+		return next(v);
+		break;
+	    }
+
+	    /*
+	     * Not one of the acceptable escapes.
+	     */
+
+	    FAILW(REG_EESCAPE);
+	    break;
+	case CHR('-'):
+	    if (LASTTYPE('[') || NEXT1(']')) {
+		RETV(PLAIN, c);
+	    } else {
+		RETV(RANGE, c);
+	    }
+	    break;
+	case CHR('['):
+	    if (ATEOS()) {
+		FAILW(REG_EBRACK);
+	    }
+	    switch (*v->now++) {
+	    case CHR('.'):
+		INTOCON(L_CEL);
+
+		/*
+		 * Might or might not be locale-specific.
+		 */
+
+		RET(COLLEL);
+		break;
+	    case CHR('='):
+		INTOCON(L_ECL);
+		NOTE(REG_ULOCALE);
+		RET(ECLASS);
+		break;
+	    case CHR(':'):
+		INTOCON(L_CCL);
+		NOTE(REG_ULOCALE);
+		RET(CCLASS);
+		break;
+	    default:		/* oops */
+		v->now--;
+		RETV(PLAIN, c);
+		break;
+	    }
+	    assert(NOTREACHED);
+	    break;
+	default:
+	    RETV(PLAIN, c);
+	    break;
+	}
+	assert(NOTREACHED);
+	break;
+    case L_CEL:			/* collating elements are easy */
+	if (c == CHR('.') && NEXT1(']')) {
+	    v->now++;
+	    INTOCON(L_BRACK);
+	    RETV(END, '.');
+	} else {
+	    RETV(PLAIN, c);
+	}
+	break;
+    case L_ECL:			/* ditto equivalence classes */
+	if (c == CHR('=') && NEXT1(']')) {
+	    v->now++;
+	    INTOCON(L_BRACK);
+	    RETV(END, '=');
+	} else {
+	    RETV(PLAIN, c);
+	}
+	break;
+    case L_CCL:			/* ditto character classes */
+	if (c == CHR(':') && NEXT1(']')) {
+	    v->now++;
+	    INTOCON(L_BRACK);
+	    RETV(END, ':');
+	} else {
+	    RETV(PLAIN, c);
+	}
+	break;
+    default:
+	assert(NOTREACHED);
+	break;
+    }
+
+    /*
+     * That got rid of everything except EREs and AREs.
+     */
+
+    assert(INCON(L_ERE));
+
+    /*
+     * Deal with EREs and AREs, except for backslashes.
+     */
+
+    switch (c) {
+    case CHR('|'):
+	RET('|');
+	break;
+    case CHR('*'):
+	if ((v->cflags&REG_ADVF) && NEXT1('?')) {
+	    v->now++;
+	    NOTE(REG_UNONPOSIX);
+	    RETV('*', 0);
+	}
+	RETV('*', 1);
+	break;
+    case CHR('+'):
+	if ((v->cflags&REG_ADVF) && NEXT1('?')) {
+	    v->now++;
+	    NOTE(REG_UNONPOSIX);
+	    RETV('+', 0);
+	}
+	RETV('+', 1);
+	break;
+    case CHR('?'):
+	if ((v->cflags&REG_ADVF) && NEXT1('?')) {
+	    v->now++;
+	    NOTE(REG_UNONPOSIX);
+	    RETV('?', 0);
+	}
+	RETV('?', 1);
+	break;
+    case CHR('{'):		/* bounds start or plain character */
+	if (v->cflags&REG_EXPANDED) {
+	    skip(v);
+	}
+	if (ATEOS() || !iscdigit(*v->now)) {
+	    NOTE(REG_UBRACES);
+	    NOTE(REG_UUNSPEC);
+	    RETV(PLAIN, c);
+	} else {
+	    NOTE(REG_UBOUNDS);
+	    INTOCON(L_EBND);
+	    RET('{');
+	}
+	assert(NOTREACHED);
+	break;
+    case CHR('('):		/* parenthesis, or advanced extension */
+	if ((v->cflags&REG_ADVF) && NEXT1('?')) {
+	    NOTE(REG_UNONPOSIX);
+	    v->now++;
+	    switch (*v->now++) {
+	    case CHR(':'):	/* non-capturing paren */
+		RETV('(', 0);
+		break;
+	    case CHR('#'):	/* comment */
+		while (!ATEOS() && *v->now != CHR(')')) {
+		    v->now++;
+		}
+		if (!ATEOS()) {
+		    v->now++;
+		}
+		assert(v->nexttype == v->lasttype);
+		return next(v);
+		break;
+	    case CHR('='):	/* positive lookahead */
+		NOTE(REG_ULOOKAHEAD);
+		RETV(LACON, 1);
+		break;
+	    case CHR('!'):	/* negative lookahead */
+		NOTE(REG_ULOOKAHEAD);
+		RETV(LACON, 0);
+		break;
+	    default:
+		FAILW(REG_BADRPT);
+		break;
+	    }
+	    assert(NOTREACHED);
+	}
+	if (v->cflags&REG_NOSUB) {
+	    RETV('(', 0);	/* all parens non-capturing */
+	} else {
+	    RETV('(', 1);
+	}
+	break;
+    case CHR(')'):
+	if (LASTTYPE('(')) {
+	    NOTE(REG_UUNSPEC);
+	}
+	RETV(')', c);
+	break;
+    case CHR('['):		/* easy except for [[:<:]] and [[:>:]] */
+	if (HAVE(6) &&	*(v->now+0) == CHR('[') &&
+		*(v->now+1) == CHR(':') &&
+		(*(v->now+2) == CHR('<') || *(v->now+2) == CHR('>')) &&
+		*(v->now+3) == CHR(':') &&
+		*(v->now+4) == CHR(']') &&
+		*(v->now+5) == CHR(']')) {
+	    c = *(v->now+2);
+	    v->now += 6;
+	    NOTE(REG_UNONPOSIX);
+	    RET((c == CHR('<')) ? '<' : '>');
+	}
+	INTOCON(L_BRACK);
+	if (NEXT1('^')) {
+	    v->now++;
+	    RETV('[', 0);
+	}
+	RETV('[', 1);
+	break;
+    case CHR('.'):
+	RET('.');
+	break;
+    case CHR('^'):
+	RET('^');
+	break;
+    case CHR('$'):
+	RET('$');
+	break;
+    case CHR('\\'):		/* mostly punt backslashes to code below */
+	if (ATEOS()) {
+	    FAILW(REG_EESCAPE);
+	}
+	break;
+    default:		/* ordinary character */
+	RETV(PLAIN, c);
+	break;
+    }
+
+    /*
+     * ERE/ARE backslash handling; backslash already eaten.
+     */
+
+    assert(!ATEOS());
+    if (!(v->cflags&REG_ADVF)) {/* only AREs have non-trivial escapes */
+	if (iscalnum(*v->now)) {
+	    NOTE(REG_UBSALNUM);
+	    NOTE(REG_UUNSPEC);
+	}
+	RETV(PLAIN, *v->now++);
+    }
+    (DISCARD)lexescape(v);
+    if (ISERR()) {
+	FAILW(REG_EESCAPE);
+    }
+    if (v->nexttype == CCLASS) {/* fudge at lexical level */
+	switch (v->nextvalue) {
+	case 'd':	lexnest(v, backd, ENDOF(backd)); break;
+	case 'D':	lexnest(v, backD, ENDOF(backD)); break;
+	case 's':	lexnest(v, backs, ENDOF(backs)); break;
+	case 'S':	lexnest(v, backS, ENDOF(backS)); break;
+	case 'w':	lexnest(v, backw, ENDOF(backw)); break;
+	case 'W':	lexnest(v, backW, ENDOF(backW)); break;
+	default:
+	    assert(NOTREACHED);
+	    FAILW(REG_ASSERT);
+	    break;
+	}
+	/* lexnest done, back up and try again */
+	v->nexttype = v->lasttype;
+	return next(v);
+    }
+
+    /*
+     * Otherwise, lexescape has already done the work.
+     */
+
+    return !ISERR();
+}
+
+/*
+ - lexescape - parse an ARE backslash escape (backslash already eaten)
+ * Note slightly nonstandard use of the CCLASS type code.
+ ^ static int lexescape(struct vars *);
+ */
+static int			/* not actually used, but convenient for RETV */
+lexescape(
+    struct vars *v)
+{
+    chr c;
+    static chr alert[] = {
+	CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t')
+    };
+    static chr esc[] = {
+	CHR('E'), CHR('S'), CHR('C')
+    };
+    const chr *save;
+
+    assert(v->cflags&REG_ADVF);
+
+    assert(!ATEOS());
+    c = *v->now++;
+    if (!iscalnum(c)) {
+	RETV(PLAIN, c);
+    }
+
+    NOTE(REG_UNONPOSIX);
+    switch (c) {
+    case CHR('a'):
+	RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007')));
+	break;
+    case CHR('A'):
+	RETV(SBEGIN, 0);
+	break;
+    case CHR('b'):
+	RETV(PLAIN, CHR('\b'));
+	break;
+    case CHR('B'):
+	RETV(PLAIN, CHR('\\'));
+	break;
+    case CHR('c'):
+	NOTE(REG_UUNPORT);
+	if (ATEOS()) {
+	    FAILW(REG_EESCAPE);
+	}
+	RETV(PLAIN, (chr)(*v->now++ & 037));
+	break;
+    case CHR('d'):
+	NOTE(REG_ULOCALE);
+	RETV(CCLASS, 'd');
+	break;
+    case CHR('D'):
+	NOTE(REG_ULOCALE);
+	RETV(CCLASS, 'D');
+	break;
+    case CHR('e'):
+	NOTE(REG_UUNPORT);
+	RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033')));
+	break;
+    case CHR('f'):
+	RETV(PLAIN, CHR('\f'));
+	break;
+    case CHR('m'):
+	RET('<');
+	break;
+    case CHR('M'):
+	RET('>');
+	break;
+    case CHR('n'):
+	RETV(PLAIN, CHR('\n'));
+	break;
+    case CHR('r'):
+	RETV(PLAIN, CHR('\r'));
+	break;
+    case CHR('s'):
+	NOTE(REG_ULOCALE);
+	RETV(CCLASS, 's');
+	break;
+    case CHR('S'):
+	NOTE(REG_ULOCALE);
+	RETV(CCLASS, 'S');
+	break;
+    case CHR('t'):
+	RETV(PLAIN, CHR('\t'));
+	break;
+    case CHR('u'):
+	c = lexdigits(v, 16, 4, 4);
+	if (ISERR()) {
+	    FAILW(REG_EESCAPE);
+	}
+	RETV(PLAIN, c);
+	break;
+    case CHR('U'):
+	c = lexdigits(v, 16, 8, 8);
+	if (ISERR()) {
+	    FAILW(REG_EESCAPE);
+	}
+	RETV(PLAIN, c);
+	break;
+    case CHR('v'):
+	RETV(PLAIN, CHR('\v'));
+	break;
+    case CHR('w'):
+	NOTE(REG_ULOCALE);
+	RETV(CCLASS, 'w');
+	break;
+    case CHR('W'):
+	NOTE(REG_ULOCALE);
+	RETV(CCLASS, 'W');
+	break;
+    case CHR('x'):
+	NOTE(REG_UUNPORT);
+	c = lexdigits(v, 16, 1, 255);	/* REs >255 long outside spec */
+	if (ISERR()) {
+	    FAILW(REG_EESCAPE);
+	}
+	RETV(PLAIN, c);
+	break;
+    case CHR('y'):
+	NOTE(REG_ULOCALE);
+	RETV(WBDRY, 0);
+	break;
+    case CHR('Y'):
+	NOTE(REG_ULOCALE);
+	RETV(NWBDRY, 0);
+	break;
+    case CHR('Z'):
+	RETV(SEND, 0);
+	break;
+    case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):
+    case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):
+    case CHR('9'):
+	save = v->now;
+	v->now--;		/* put first digit back */
+	c = lexdigits(v, 10, 1, 255);	/* REs >255 long outside spec */
+	if (ISERR()) {
+	    FAILW(REG_EESCAPE);
+	}
+
+	/*
+	 * Ugly heuristic (first test is "exactly 1 digit?")
+	 */
+
+	if (v->now - save == 0 || ((int) c > 0 && (int)c <= v->nsubexp)) {
+	    NOTE(REG_UBACKREF);
+	    RETV(BACKREF, (chr)c);
+	}
+
+	/*
+	 * Oops, doesn't look like it's a backref after all...
+	 */
+
+	v->now = save;
+
+	/*
+	 * And fall through into octal number.
+	 */
+
+    case CHR('0'):
+	NOTE(REG_UUNPORT);
+	v->now--;		/* put first digit back */
+	c = lexdigits(v, 8, 1, 3);
+	if (ISERR()) {
+	    FAILW(REG_EESCAPE);
+	}
+	RETV(PLAIN, c);
+	break;
+    default:
+	assert(iscalpha(c));
+	FAILW(REG_EESCAPE);	/* unknown alphabetic escape */
+	break;
+    }
+    assert(NOTREACHED);
+}
+
+/*
+ - lexdigits - slurp up digits and return chr value
+ ^ static chr lexdigits(struct vars *, int, int, int);
+ */
+static chr			/* chr value; errors signalled via ERR */
+lexdigits(
+    struct vars *v,
+    int base,
+    int minlen,
+    int maxlen)
+{
+    uchr n;			/* unsigned to avoid overflow misbehavior */
+    int len;
+    chr c;
+    int d;
+    const uchr ub = (uchr) base;
+
+    n = 0;
+    for (len = 0; len < maxlen && !ATEOS(); len++) {
+	c = *v->now++;
+	switch (c) {
+	case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'):
+	case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'):
+	case CHR('8'): case CHR('9'):
+	    d = DIGITVAL(c);
+	    break;
+	case CHR('a'): case CHR('A'): d = 10; break;
+	case CHR('b'): case CHR('B'): d = 11; break;
+	case CHR('c'): case CHR('C'): d = 12; break;
+	case CHR('d'): case CHR('D'): d = 13; break;
+	case CHR('e'): case CHR('E'): d = 14; break;
+	case CHR('f'): case CHR('F'): d = 15; break;
+	default:
+	    v->now--;		/* oops, not a digit at all */
+	    d = -1;
+	    break;
+	}
+
+	if (d >= base) {	/* not a plausible digit */
+	    v->now--;
+	    d = -1;
+	}
+	if (d < 0) {
+	    break;		/* NOTE BREAK OUT */
+	}
+	n = n*ub + (uchr)d;
+    }
+    if (len < minlen) {
+	ERR(REG_EESCAPE);
+    }
+
+    return (chr)n;
+}
+
+/*
+ - brenext - get next BRE token
+ * This is much like EREs except for all the stupid backslashes and the
+ * context-dependency of some things.
+ ^ static int brenext(struct vars *, pchr);
+ */
+static int			/* 1 normal, 0 failure */
+brenext(
+    struct vars *v,
+    pchr pc)
+{
+    chr c = (chr)pc;
+
+    switch (c) {
+    case CHR('*'):
+	if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^')) {
+	    RETV(PLAIN, c);
+	}
+	RET('*');
+	break;
+    case CHR('['):
+	if (HAVE(6) &&	*(v->now+0) == CHR('[') &&
+		*(v->now+1) == CHR(':') &&
+		(*(v->now+2) == CHR('<') || *(v->now+2) == CHR('>')) &&
+		*(v->now+3) == CHR(':') &&
+		*(v->now+4) == CHR(']') &&
+		*(v->now+5) == CHR(']')) {
+	    c = *(v->now+2);
+	    v->now += 6;
+	    NOTE(REG_UNONPOSIX);
+	    RET((c == CHR('<')) ? '<' : '>');
+	}
+	INTOCON(L_BRACK);
+	if (NEXT1('^')) {
+	    v->now++;
+	    RETV('[', 0);
+	}
+	RETV('[', 1);
+	break;
+    case CHR('.'):
+	RET('.');
+	break;
+    case CHR('^'):
+	if (LASTTYPE(EMPTY)) {
+	    RET('^');
+	}
+	if (LASTTYPE('(')) {
+	    NOTE(REG_UUNSPEC);
+	    RET('^');
+	}
+	RETV(PLAIN, c);
+	break;
+    case CHR('$'):
+	if (v->cflags&REG_EXPANDED) {
+	    skip(v);
+	}
+	if (ATEOS()) {
+	    RET('$');
+	}
+	if (NEXT2('\\', ')')) {
+	    NOTE(REG_UUNSPEC);
+	    RET('$');
+	}
+	RETV(PLAIN, c);
+	break;
+    case CHR('\\'):
+	break;			/* see below */
+    default:
+	RETV(PLAIN, c);
+	break;
+    }
+
+    assert(c == CHR('\\'));
+
+    if (ATEOS()) {
+	FAILW(REG_EESCAPE);
+    }
+
+    c = *v->now++;
+    switch (c) {
+    case CHR('{'):
+	INTOCON(L_BBND);
+	NOTE(REG_UBOUNDS);
+	RET('{');
+	break;
+    case CHR('('):
+	RETV('(', 1);
+	break;
+    case CHR(')'):
+	RETV(')', c);
+	break;
+    case CHR('<'):
+	NOTE(REG_UNONPOSIX);
+	RET('<');
+	break;
+    case CHR('>'):
+	NOTE(REG_UNONPOSIX);
+	RET('>');
+	break;
+    case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):
+    case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):
+    case CHR('9'):
+	NOTE(REG_UBACKREF);
+	RETV(BACKREF, (chr)DIGITVAL(c));
+	break;
+    default:
+	if (iscalnum(c)) {
+	    NOTE(REG_UBSALNUM);
+	    NOTE(REG_UUNSPEC);
+	}
+	RETV(PLAIN, c);
+	break;
+    }
+
+    assert(NOTREACHED);
+}
+
+/*
+ - skip - skip white space and comments in expanded form
+ ^ static void skip(struct vars *);
+ */
+static void
+skip(
+    struct vars *v)
+{
+    const chr *start = v->now;
+
+    assert(v->cflags&REG_EXPANDED);
+
+    for (;;) {
+	while (!ATEOS() && iscspace(*v->now)) {
+	    v->now++;
+	}
+	if (ATEOS() || *v->now != CHR('#')) {
+	    break;		/* NOTE BREAK OUT */
+	}
+	assert(NEXT1('#'));
+	while (!ATEOS() && *v->now != CHR('\n')) {
+	    v->now++;
+	}
+
+	/*
+	 * Leave the newline to be picked up by the iscspace loop.
+	 */
+    }
+
+    if (v->now != start) {
+	NOTE(REG_UNONPOSIX);
+    }
+}
+
+/*
+ - newline - return the chr for a newline
+ * This helps confine use of CHR to this source file.
+ ^ static chr newline(NOPARMS);
+ */
+static chr
+newline(void)
+{
+    return CHR('\n');
+}
+
+/*
+ - ch - return the chr sequence for regc_locale.c's fake collating element ch
+ * This helps confine use of CHR to this source file.  Beware that the caller
+ * knows how long the sequence is.
+ ^ #ifdef REG_DEBUG
+ ^ static const chr *ch(NOPARMS);
+ ^ #endif
+ */
+#ifdef REG_DEBUG
+static const chr *
+ch(void)
+{
+    static chr chstr[] = { CHR('c'), CHR('h'), CHR('\0') };
+
+    return chstr;
+}
+#endif
+
+/*
+ - chrnamed - return the chr known by a given (chr string) name
+ * The code is a bit clumsy, but this routine gets only such specialized
+ * use that it hardly matters.
+ ^ static chr chrnamed(struct vars *, const chr *, const chr *, pchr);
+ */
+static chr
+chrnamed(
+    struct vars *v,
+    const chr *startp,		/* start of name */
+    const chr *endp,		/* just past end of name */
+    pchr lastresort)		/* what to return if name lookup fails */
+{
+    celt c;
+    int errsave;
+    int e;
+    struct cvec *cv;
+
+    errsave = v->err;
+    v->err = 0;
+    c = element(v, startp, endp);
+    e = v->err;
+    v->err = errsave;
+
+    if (e != 0) {
+	return (chr)lastresort;
+    }
+
+    cv = range(v, c, c, 0);
+    if (cv->nchrs == 0) {
+	return (chr)lastresort;
+    }
+    return cv->chrs[0];
+}
+
+/*
+ * Local Variables:
+ * mode: c
+ * c-basic-offset: 4
+ * fill-column: 78
+ * End:
+ */
diff --git a/contrib/hsrex/regc_locale.c b/contrib/hsrex/regc_locale.c
new file mode 100644
index 0000000..a6bc3af
--- /dev/null
+++ b/contrib/hsrex/regc_locale.c
@@ -0,0 +1,1163 @@
+/*
+ * regc_locale.c --
+ *
+ *	This file contains the Unicode locale specific regexp routines.
+ *	This file is #included by regcomp.c.
+ *
+ * Copyright (c) 1998 by Scriptics Corporation.
+ *
+ * See the file "license.terms" for information on usage and redistribution of
+ * this file, and for a DISCLAIMER OF ALL WARRANTIES.
+ *
+ * RCS: @(#) $Id: regc_locale.c,v 1.20 2007/12/13 15:23:14 dgp Exp $
+ */
+
+/* ASCII character-name table */
+
+static const struct cname {
+    const char *name;
+    const char code;
+} cnames[] = {
+    {"NUL",		'\0'},
+    {"SOH",		'\001'},
+    {"STX",		'\002'},
+    {"ETX",		'\003'},
+    {"EOT",		'\004'},
+    {"ENQ",		'\005'},
+    {"ACK",		'\006'},
+    {"BEL",		'\007'},
+    {"alert",		'\007'},
+    {"BS",		'\010'},
+    {"backspace",	'\b'},
+    {"HT",		'\011'},
+    {"tab",		'\t'},
+    {"LF",		'\012'},
+    {"newline",		'\n'},
+    {"VT",		'\013'},
+    {"vertical-tab",	'\v'},
+    {"FF",		'\014'},
+    {"form-feed",	'\f'},
+    {"CR",		'\015'},
+    {"carriage-return",	'\r'},
+    {"SO",		'\016'},
+    {"SI",		'\017'},
+    {"DLE",		'\020'},
+    {"DC1",		'\021'},
+    {"DC2",		'\022'},
+    {"DC3",		'\023'},
+    {"DC4",		'\024'},
+    {"NAK",		'\025'},
+    {"SYN",		'\026'},
+    {"ETB",		'\027'},
+    {"CAN",		'\030'},
+    {"EM",		'\031'},
+    {"SUB",		'\032'},
+    {"ESC",		'\033'},
+    {"IS4",		'\034'},
+    {"FS",		'\034'},
+    {"IS3",		'\035'},
+    {"GS",		'\035'},
+    {"IS2",		'\036'},
+    {"RS",		'\036'},
+    {"IS1",		'\037'},
+    {"US",		'\037'},
+    {"space",		' '},
+    {"exclamation-mark",'!'},
+    {"quotation-mark",	'"'},
+    {"number-sign",	'#'},
+    {"dollar-sign",	'$'},
+    {"percent-sign",	'%'},
+    {"ampersand",	'&'},
+    {"apostrophe",	'\''},
+    {"left-parenthesis",'('},
+    {"right-parenthesis", ')'},
+    {"asterisk",	'*'},
+    {"plus-sign",	'+'},
+    {"comma",		','},
+    {"hyphen",		'-'},
+    {"hyphen-minus",	'-'},
+    {"period",		'.'},
+    {"full-stop",	'.'},
+    {"slash",		'/'},
+    {"solidus",		'/'},
+    {"zero",		'0'},
+    {"one",		'1'},
+    {"two",		'2'},
+    {"three",		'3'},
+    {"four",		'4'},
+    {"five",		'5'},
+    {"six",		'6'},
+    {"seven",		'7'},
+    {"eight",		'8'},
+    {"nine",		'9'},
+    {"colon",		':'},
+    {"semicolon",	';'},
+    {"less-than-sign",	'<'},
+    {"equals-sign",	'='},
+    {"greater-than-sign", '>'},
+    {"question-mark",	'?'},
+    {"commercial-at",	'@'},
+    {"left-square-bracket", '['},
+    {"backslash",	'\\'},
+    {"reverse-solidus",	'\\'},
+    {"right-square-bracket", ']'},
+    {"circumflex",	'^'},
+    {"circumflex-accent", '^'},
+    {"underscore",	'_'},
+    {"low-line",	'_'},
+    {"grave-accent",	'`'},
+    {"left-brace",	'{'},
+    {"left-curly-bracket", '{'},
+    {"vertical-line",	'|'},
+    {"right-brace",	'}'},
+    {"right-curly-bracket", '}'},
+    {"tilde",		'~'},
+    {"DEL",		'\177'},
+    {NULL,		0}
+};
+
+/*
+ * Unicode character-class tables.
+ */
+
+typedef struct crange {
+    chr start;
+    chr end;
+} crange;
+
+#if defined(REGEX_STANDALONE) && ! defined(REGEX_WCHAR)
+
+static const crange alphaRangeTable[] = {
+    {0x41, 0x5a}, {0x61, 0x7a}
+};
+
+#define NUM_ALPHA_RANGE (sizeof(alphaRangeTable)/sizeof(crange))
+
+static const chr alphaCharTable[] = {
+};
+
+#define NUM_ALPHA_CHAR (sizeof(alphaCharTable)/sizeof(chr))
+
+static const crange digitRangeTable[] = {
+    {0x30, 0x39}
+};
+
+#define NUM_DIGIT_RANGE (sizeof(digitRangeTable)/sizeof(crange))
+
+static const crange punctRangeTable[] = {
+    {0x21, 0x23}, {0x25, 0x2a}, {0x2c, 0x2f}, {0x5b, 0x5d},
+};
+
+#define NUM_PUNCT_RANGE (sizeof(punctRangeTable)/sizeof(crange))
+
+static const chr punctCharTable[] = {
+    0x3a, 0x3b, 0x3f, 0x40, 0x5f, 0x7b, 0x7d
+};
+
+#define NUM_PUNCT_CHAR (sizeof(punctCharTable)/sizeof(chr))
+
+static const crange spaceRangeTable[] = {
+    {0x09, 0x0d}
+};
+
+#define NUM_SPACE_RANGE (sizeof(spaceRangeTable)/sizeof(crange))
+
+static const chr spaceCharTable[] = {
+    0x20
+};
+
+#define NUM_SPACE_CHAR (sizeof(spaceCharTable)/sizeof(chr))
+
+static const crange lowerRangeTable[] = {
+    {0x61, 0x7a}
+};
+
+#define NUM_LOWER_RANGE (sizeof(lowerRangeTable)/sizeof(crange))
+
+static const chr lowerCharTable[] = {
+};
+
+#define NUM_LOWER_CHAR (sizeof(lowerCharTable)/sizeof(chr))
+
+static const crange upperRangeTable[] = {
+    {0x41, 0x5a}
+};
+
+#define NUM_UPPER_RANGE (sizeof(upperRangeTable)/sizeof(crange))
+
+static const chr upperCharTable[] = {
+};
+
+#define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(chr))
+
+static const crange graphRangeTable[] = {
+    {0x21, 0x7e}
+};
+
+#define NUM_GRAPH_RANGE (sizeof(graphRangeTable)/sizeof(crange))
+
+static const chr graphCharTable[] = {
+};
+
+#define NUM_GRAPH_CHAR (sizeof(graphCharTable)/sizeof(chr))
+
+static const crange printRangeTable[] = {
+    {0x20, 0x7E}
+};
+
+#define NUM_PRINT_RANGE (sizeof(printRangeTable)/sizeof(crange))
+
+static const chr printCharTable[] = {
+};
+
+#define NUM_PRINT_CHAR (sizeof(printCharTable)/sizeof(chr))
+#else
+
+/*
+ *	Declarations of Unicode character ranges.  This code
+ *	is automatically generated by the tools/uniClass.tcl script
+ *	and used in generic/regc_locale.c.  Do not modify by hand.
+ */
+
+/* Unicode: alphabetic characters */
+
+static const crange alphaRangeTable[] = {
+    {0x0041, 0x005a}, {0x0061, 0x007a}, {0x00c0, 0x00d6}, {0x00d8, 0x00f6},
+    {0x00f8, 0x021f}, {0x0222, 0x0233}, {0x0250, 0x02ad}, {0x02b0, 0x02b8},
+    {0x02bb, 0x02c1}, {0x02e0, 0x02e4}, {0x0388, 0x038a}, {0x038e, 0x03a1},
+    {0x03a3, 0x03ce}, {0x03d0, 0x03d7}, {0x03da, 0x03f5}, {0x0400, 0x0481},
+    {0x048c, 0x04c4}, {0x04d0, 0x04f5}, {0x0531, 0x0556}, {0x0561, 0x0587},
+    {0x05d0, 0x05ea}, {0x05f0, 0x05f2}, {0x0621, 0x063a}, {0x0640, 0x064a},
+    {0x0671, 0x06d3}, {0x06fa, 0x06fc}, {0x0712, 0x072c}, {0x0780, 0x07a5},
+    {0x0905, 0x0939}, {0x0958, 0x0961}, {0x0985, 0x098c}, {0x0993, 0x09a8},
+    {0x09aa, 0x09b0}, {0x09b6, 0x09b9}, {0x09df, 0x09e1}, {0x0a05, 0x0a0a},
+    {0x0a13, 0x0a28}, {0x0a2a, 0x0a30}, {0x0a59, 0x0a5c}, {0x0a72, 0x0a74},
+    {0x0a85, 0x0a8b}, {0x0a8f, 0x0a91}, {0x0a93, 0x0aa8}, {0x0aaa, 0x0ab0},
+    {0x0ab5, 0x0ab9}, {0x0b05, 0x0b0c}, {0x0b13, 0x0b28}, {0x0b2a, 0x0b30},
+    {0x0b36, 0x0b39}, {0x0b5f, 0x0b61}, {0x0b85, 0x0b8a}, {0x0b8e, 0x0b90},
+    {0x0b92, 0x0b95}, {0x0ba8, 0x0baa}, {0x0bae, 0x0bb5}, {0x0bb7, 0x0bb9},
+    {0x0c05, 0x0c0c}, {0x0c0e, 0x0c10}, {0x0c12, 0x0c28}, {0x0c2a, 0x0c33},
+    {0x0c35, 0x0c39}, {0x0c85, 0x0c8c}, {0x0c8e, 0x0c90}, {0x0c92, 0x0ca8},
+    {0x0caa, 0x0cb3}, {0x0cb5, 0x0cb9}, {0x0d05, 0x0d0c}, {0x0d0e, 0x0d10},
+    {0x0d12, 0x0d28}, {0x0d2a, 0x0d39}, {0x0d85, 0x0d96}, {0x0d9a, 0x0db1},
+    {0x0db3, 0x0dbb}, {0x0dc0, 0x0dc6}, {0x0e01, 0x0e30}, {0x0e40, 0x0e46},
+    {0x0e94, 0x0e97}, {0x0e99, 0x0e9f}, {0x0ea1, 0x0ea3}, {0x0ead, 0x0eb0},
+    {0x0ec0, 0x0ec4}, {0x0f40, 0x0f47}, {0x0f49, 0x0f6a}, {0x0f88, 0x0f8b},
+    {0x1000, 0x1021}, {0x1023, 0x1027}, {0x1050, 0x1055}, {0x10a0, 0x10c5},
+    {0x10d0, 0x10f6}, {0x1100, 0x1159}, {0x115f, 0x11a2}, {0x11a8, 0x11f9},
+    {0x1200, 0x1206}, {0x1208, 0x1246}, {0x124a, 0x124d}, {0x1250, 0x1256},
+    {0x125a, 0x125d}, {0x1260, 0x1286}, {0x128a, 0x128d}, {0x1290, 0x12ae},
+    {0x12b2, 0x12b5}, {0x12b8, 0x12be}, {0x12c2, 0x12c5}, {0x12c8, 0x12ce},
+    {0x12d0, 0x12d6}, {0x12d8, 0x12ee}, {0x12f0, 0x130e}, {0x1312, 0x1315},
+    {0x1318, 0x131e}, {0x1320, 0x1346}, {0x1348, 0x135a}, {0x13a0, 0x13f4},
+    {0x1401, 0x166c}, {0x166f, 0x1676}, {0x1681, 0x169a}, {0x16a0, 0x16ea},
+    {0x1780, 0x17b3}, {0x1820, 0x1877}, {0x1880, 0x18a8}, {0x1e00, 0x1e9b},
+    {0x1ea0, 0x1ef9}, {0x1f00, 0x1f15}, {0x1f18, 0x1f1d}, {0x1f20, 0x1f45},
+    {0x1f48, 0x1f4d}, {0x1f50, 0x1f57}, {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4},
+    {0x1fb6, 0x1fbc}, {0x1fc2, 0x1fc4}, {0x1fc6, 0x1fcc}, {0x1fd0, 0x1fd3},
+    {0x1fd6, 0x1fdb}, {0x1fe0, 0x1fec}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffc},
+    {0x210a, 0x2113}, {0x2119, 0x211d}, {0x212a, 0x212d}, {0x212f, 0x2131},
+    {0x2133, 0x2139}, {0x3031, 0x3035}, {0x3041, 0x3094}, {0x30a1, 0x30fa},
+    {0x30fc, 0x30fe}, {0x3105, 0x312c}, {0x3131, 0x318e}, {0x31a0, 0x31b7},
+    {0x3400, 0x4db5}, {0x4e00, 0x9fa5}, {0xa000, 0xa48c}, {0xac00, 0xd7a3},
+    {0xf900, 0xfa2d}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1f, 0xfb28},
+    {0xfb2a, 0xfb36}, {0xfb38, 0xfb3c}, {0xfb46, 0xfbb1}, {0xfbd3, 0xfd3d},
+    {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfb}, {0xfe70, 0xfe72},
+    {0xfe76, 0xfefc}, {0xff21, 0xff3a}, {0xff41, 0xff5a}, {0xff66, 0xffbe},
+    {0xffc2, 0xffc7}, {0xffca, 0xffcf}, {0xffd2, 0xffd7}, {0xffda, 0xffdc}
+};
+
+#define NUM_ALPHA_RANGE (sizeof(alphaRangeTable)/sizeof(crange))
+
+static const chr alphaCharTable[] = {
+    0x00aa, 0x00b5, 0x00ba, 0x02d0, 0x02d1, 0x02ee, 0x037a, 0x0386, 0x038c,
+    0x04c7, 0x04c8, 0x04cb, 0x04cc, 0x04f8, 0x04f9, 0x0559, 0x06d5, 0x06e5,
+    0x06e6, 0x0710, 0x093d, 0x0950, 0x098f, 0x0990, 0x09b2, 0x09dc, 0x09dd,
+    0x09f0, 0x09f1, 0x0a0f, 0x0a10, 0x0a32, 0x0a33, 0x0a35, 0x0a36, 0x0a38,
+    0x0a39, 0x0a5e, 0x0a8d, 0x0ab2, 0x0ab3, 0x0abd, 0x0ad0, 0x0ae0, 0x0b0f,
+    0x0b10, 0x0b32, 0x0b33, 0x0b3d, 0x0b5c, 0x0b5d, 0x0b99, 0x0b9a, 0x0b9c,
+    0x0b9e, 0x0b9f, 0x0ba3, 0x0ba4, 0x0c60, 0x0c61, 0x0cde, 0x0ce0, 0x0ce1,
+    0x0d60, 0x0d61, 0x0dbd, 0x0e32, 0x0e33, 0x0e81, 0x0e82, 0x0e84, 0x0e87,
+    0x0e88, 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0eaa, 0x0eab, 0x0eb2, 0x0eb3,
+    0x0ebd, 0x0ec6, 0x0edc, 0x0edd, 0x0f00, 0x1029, 0x102a, 0x1248, 0x1258,
+    0x1288, 0x12b0, 0x12c0, 0x1310, 0x1f59, 0x1f5b, 0x1f5d, 0x1fbe, 0x207f,
+    0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x3005, 0x3006, 0x309d,
+    0x309e, 0xfb1d, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfe74, 0xfffe
+};
+
+#define NUM_ALPHA_CHAR (sizeof(alphaCharTable)/sizeof(chr))
+
+/*
+ * Unicode: decimal digit characters
+ */
+
+static const crange digitRangeTable[] = {
+    {0x0030, 0x0039}, {0x0660, 0x0669}, {0x06f0, 0x06f9}, {0x0966, 0x096f},
+    {0x09e6, 0x09ef}, {0x0a66, 0x0a6f}, {0x0ae6, 0x0aef}, {0x0b66, 0x0b6f},
+    {0x0be7, 0x0bef}, {0x0c66, 0x0c6f}, {0x0ce6, 0x0cef}, {0x0d66, 0x0d6f},
+    {0x0e50, 0x0e59}, {0x0ed0, 0x0ed9}, {0x0f20, 0x0f29}, {0x1040, 0x1049},
+    {0x1369, 0x1371}, {0x17e0, 0x17e9}, {0x1810, 0x1819}, {0xff10, 0xff19}
+};
+
+#define NUM_DIGIT_RANGE (sizeof(digitRangeTable)/sizeof(crange))
+
+/*
+ * no singletons of digit characters.
+ */
+
+/*
+ * Unicode: punctuation characters.
+ */
+
+static const crange punctRangeTable[] = {
+    {0x0021, 0x0023}, {0x0025, 0x002a}, {0x002c, 0x002f}, {0x005b, 0x005d},
+    {0x055a, 0x055f}, {0x066a, 0x066d}, {0x0700, 0x070d}, {0x0f04, 0x0f12},
+    {0x0f3a, 0x0f3d}, {0x104a, 0x104f}, {0x1361, 0x1368}, {0x16eb, 0x16ed},
+    {0x17d4, 0x17da}, {0x1800, 0x180a}, {0x2010, 0x2027}, {0x2030, 0x2043},
+    {0x2048, 0x204d}, {0x3001, 0x3003}, {0x3008, 0x3011}, {0x3014, 0x301f},
+    {0xfe30, 0xfe44}, {0xfe49, 0xfe52}, {0xfe54, 0xfe61}, {0xff01, 0xff03},
+    {0xff05, 0xff0a}, {0xff0c, 0xff0f}, {0xff3b, 0xff3d}, {0xff61, 0xff65}
+};
+
+#define NUM_PUNCT_RANGE (sizeof(punctRangeTable)/sizeof(crange))
+
+static const chr punctCharTable[] = {
+    0x003a, 0x003b, 0x003f, 0x0040, 0x005f, 0x007b, 0x007d, 0x00a1, 0x00ab,
+    0x00ad, 0x00b7, 0x00bb, 0x00bf, 0x037e, 0x0387, 0x0589, 0x058a, 0x05be,
+    0x05c0, 0x05c3, 0x05f3, 0x05f4, 0x060c, 0x061b, 0x061f, 0x06d4, 0x0964,
+    0x0965, 0x0970, 0x0df4, 0x0e4f, 0x0e5a, 0x0e5b, 0x0f85, 0x10fb, 0x166d,
+    0x166e, 0x169b, 0x169c, 0x17dc, 0x2045, 0x2046, 0x207d, 0x207e, 0x208d,
+    0x208e, 0x2329, 0x232a, 0x3030, 0x30fb, 0xfd3e, 0xfd3f, 0xfe63, 0xfe68,
+    0xfe6a, 0xfe6b, 0xff1a, 0xff1b, 0xff1f, 0xff20, 0xff3f, 0xff5b, 0xff5d
+};
+
+#define NUM_PUNCT_CHAR (sizeof(punctCharTable)/sizeof(chr))
+
+/*
+ * Unicode: white space characters.
+ */
+
+static const crange spaceRangeTable[] = {
+    {0x0009, 0x000d}, {0x2000, 0x200b}
+};
+
+#define NUM_SPACE_RANGE (sizeof(spaceRangeTable)/sizeof(crange))
+
+static const chr spaceCharTable[] = {
+    0x0020, 0x00a0, 0x1680, 0x2028, 0x2029, 0x202f, 0x3000
+};
+
+#define NUM_SPACE_CHAR (sizeof(spaceCharTable)/sizeof(chr))
+
+/*
+ * Unicode: lowercase characters
+ */
+
+static const crange lowerRangeTable[] = {
+    {0x0061, 0x007a}, {0x00df, 0x00f6}, {0x00f8, 0x00ff}, {0x017e, 0x0180},
+    {0x0199, 0x019b}, {0x01bd, 0x01bf}, {0x0250, 0x02ad}, {0x03ac, 0x03ce},
+    {0x03d5, 0x03d7}, {0x03ef, 0x03f3}, {0x0430, 0x045f}, {0x0561, 0x0587},
+    {0x1e95, 0x1e9b}, {0x1f00, 0x1f07}, {0x1f10, 0x1f15}, {0x1f20, 0x1f27},
+    {0x1f30, 0x1f37}, {0x1f40, 0x1f45}, {0x1f50, 0x1f57}, {0x1f60, 0x1f67},
+    {0x1f70, 0x1f7d}, {0x1f80, 0x1f87}, {0x1f90, 0x1f97}, {0x1fa0, 0x1fa7},
+    {0x1fb0, 0x1fb4}, {0x1fc2, 0x1fc4}, {0x1fd0, 0x1fd3}, {0x1fe0, 0x1fe7},
+    {0x1ff2, 0x1ff4}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xff41, 0xff5a}
+};
+
+#define NUM_LOWER_RANGE (sizeof(lowerRangeTable)/sizeof(crange))
+
+static const chr lowerCharTable[] = {
+    0x00aa, 0x00b5, 0x00ba, 0x0101, 0x0103, 0x0105, 0x0107, 0x0109, 0x010b,
+    0x010d, 0x010f, 0x0111, 0x0113, 0x0115, 0x0117, 0x0119, 0x011b, 0x011d,
+    0x011f, 0x0121, 0x0123, 0x0125, 0x0127, 0x0129, 0x012b, 0x012d, 0x012f,
+    0x0131, 0x0133, 0x0135, 0x0137, 0x0138, 0x013a, 0x013c, 0x013e, 0x0140,
+    0x0142, 0x0144, 0x0146, 0x0148, 0x0149, 0x014b, 0x014d, 0x014f, 0x0151,
+    0x0153, 0x0155, 0x0157, 0x0159, 0x015b, 0x015d, 0x015f, 0x0161, 0x0163,
+    0x0165, 0x0167, 0x0169, 0x016b, 0x016d, 0x016f, 0x0171, 0x0173, 0x0175,
+    0x0177, 0x017a, 0x017c, 0x0183, 0x0185, 0x0188, 0x018c, 0x018d, 0x0192,
+    0x0195, 0x019e, 0x01a1, 0x01a3, 0x01a5, 0x01a8, 0x01aa, 0x01ab, 0x01ad,
+    0x01b0, 0x01b4, 0x01b6, 0x01b9, 0x01ba, 0x01c6, 0x01c9, 0x01cc, 0x01ce,
+    0x01d0, 0x01d2, 0x01d4, 0x01d6, 0x01d8, 0x01da, 0x01dc, 0x01dd, 0x01df,
+    0x01e1, 0x01e3, 0x01e5, 0x01e7, 0x01e9, 0x01eb, 0x01ed, 0x01ef, 0x01f0,
+    0x01f3, 0x01f5, 0x01f9, 0x01fb, 0x01fd, 0x01ff, 0x0201, 0x0203, 0x0205,
+    0x0207, 0x0209, 0x020b, 0x020d, 0x020f, 0x0211, 0x0213, 0x0215, 0x0217,
+    0x0219, 0x021b, 0x021d, 0x021f, 0x0223, 0x0225, 0x0227, 0x0229, 0x022b,
+    0x022d, 0x022f, 0x0231, 0x0233, 0x0390, 0x03d0, 0x03d1, 0x03db, 0x03dd,
+    0x03df, 0x03e1, 0x03e3, 0x03e5, 0x03e7, 0x03e9, 0x03eb, 0x03ed, 0x03f5,
+    0x0461, 0x0463, 0x0465, 0x0467, 0x0469, 0x046b, 0x046d, 0x046f, 0x0471,
+    0x0473, 0x0475, 0x0477, 0x0479, 0x047b, 0x047d, 0x047f, 0x0481, 0x048d,
+    0x048f, 0x0491, 0x0493, 0x0495, 0x0497, 0x0499, 0x049b, 0x049d, 0x049f,
+    0x04a1, 0x04a3, 0x04a5, 0x04a7, 0x04a9, 0x04ab, 0x04ad, 0x04af, 0x04b1,
+    0x04b3, 0x04b5, 0x04b7, 0x04b9, 0x04bb, 0x04bd, 0x04bf, 0x04c2, 0x04c4,
+    0x04c8, 0x04cc, 0x04d1, 0x04d3, 0x04d5, 0x04d7, 0x04d9, 0x04db, 0x04dd,
+    0x04df, 0x04e1, 0x04e3, 0x04e5, 0x04e7, 0x04e9, 0x04eb, 0x04ed, 0x04ef,
+    0x04f1, 0x04f3, 0x04f5, 0x04f9, 0x1e01, 0x1e03, 0x1e05, 0x1e07, 0x1e09,
+    0x1e0b, 0x1e0d, 0x1e0f, 0x1e11, 0x1e13, 0x1e15, 0x1e17, 0x1e19, 0x1e1b,
+    0x1e1d, 0x1e1f, 0x1e21, 0x1e23, 0x1e25, 0x1e27, 0x1e29, 0x1e2b, 0x1e2d,
+    0x1e2f, 0x1e31, 0x1e33, 0x1e35, 0x1e37, 0x1e39, 0x1e3b, 0x1e3d, 0x1e3f,
+    0x1e41, 0x1e43, 0x1e45, 0x1e47, 0x1e49, 0x1e4b, 0x1e4d, 0x1e4f, 0x1e51,
+    0x1e53, 0x1e55, 0x1e57, 0x1e59, 0x1e5b, 0x1e5d, 0x1e5f, 0x1e61, 0x1e63,
+    0x1e65, 0x1e67, 0x1e69, 0x1e6b, 0x1e6d, 0x1e6f, 0x1e71, 0x1e73, 0x1e75,
+    0x1e77, 0x1e79, 0x1e7b, 0x1e7d, 0x1e7f, 0x1e81, 0x1e83, 0x1e85, 0x1e87,
+    0x1e89, 0x1e8b, 0x1e8d, 0x1e8f, 0x1e91, 0x1e93, 0x1ea1, 0x1ea3, 0x1ea5,
+    0x1ea7, 0x1ea9, 0x1eab, 0x1ead, 0x1eaf, 0x1eb1, 0x1eb3, 0x1eb5, 0x1eb7,
+    0x1eb9, 0x1ebb, 0x1ebd, 0x1ebf, 0x1ec1, 0x1ec3, 0x1ec5, 0x1ec7, 0x1ec9,
+    0x1ecb, 0x1ecd, 0x1ecf, 0x1ed1, 0x1ed3, 0x1ed5, 0x1ed7, 0x1ed9, 0x1edb,
+    0x1edd, 0x1edf, 0x1ee1, 0x1ee3, 0x1ee5, 0x1ee7, 0x1ee9, 0x1eeb, 0x1eed,
+    0x1eef, 0x1ef1, 0x1ef3, 0x1ef5, 0x1ef7, 0x1ef9, 0x1fb6, 0x1fb7, 0x1fbe,
+    0x1fc6, 0x1fc7, 0x1fd6, 0x1fd7, 0x1ff6, 0x1ff7, 0x207f, 0x210a, 0x210e,
+    0x210f, 0x2113, 0x212f, 0x2134, 0x2139
+};
+
+#define NUM_LOWER_CHAR (sizeof(lowerCharTable)/sizeof(chr))
+
+/*
+ * Unicode: uppercase characters.
+ */
+
+static const crange upperRangeTable[] = {
+    {0x0041, 0x005a}, {0x00c0, 0x00d6}, {0x00d8, 0x00de}, {0x0189, 0x018b},
+    {0x018e, 0x0191}, {0x0196, 0x0198}, {0x01b1, 0x01b3}, {0x01f6, 0x01f8},
+    {0x0388, 0x038a}, {0x0391, 0x03a1}, {0x03a3, 0x03ab}, {0x03d2, 0x03d4},
+    {0x0400, 0x042f}, {0x0531, 0x0556}, {0x10a0, 0x10c5}, {0x1f08, 0x1f0f},
+    {0x1f18, 0x1f1d}, {0x1f28, 0x1f2f}, {0x1f38, 0x1f3f}, {0x1f48, 0x1f4d},
+    {0x1f68, 0x1f6f}, {0x1fb8, 0x1fbb}, {0x1fc8, 0x1fcb}, {0x1fd8, 0x1fdb},
+    {0x1fe8, 0x1fec}, {0x1ff8, 0x1ffb}, {0x210b, 0x210d}, {0x2110, 0x2112},
+    {0x2119, 0x211d}, {0x212a, 0x212d}, {0xff21, 0xff3a}
+};
+
+#define NUM_UPPER_RANGE (sizeof(upperRangeTable)/sizeof(crange))
+
+static const chr upperCharTable[] = {
+    0x0100, 0x0102, 0x0104, 0x0106, 0x0108, 0x010a, 0x010c, 0x010e, 0x0110,
+    0x0112, 0x0114, 0x0116, 0x0118, 0x011a, 0x011c, 0x011e, 0x0120, 0x0122,
+    0x0124, 0x0126, 0x0128, 0x012a, 0x012c, 0x012e, 0x0130, 0x0132, 0x0134,
+    0x0136, 0x0139, 0x013b, 0x013d, 0x013f, 0x0141, 0x0143, 0x0145, 0x0147,
+    0x014a, 0x014c, 0x014e, 0x0150, 0x0152, 0x0154, 0x0156, 0x0158, 0x015a,
+    0x015c, 0x015e, 0x0160, 0x0162, 0x0164, 0x0166, 0x0168, 0x016a, 0x016c,
+    0x016e, 0x0170, 0x0172, 0x0174, 0x0176, 0x0178, 0x0179, 0x017b, 0x017d,
+    0x0181, 0x0182, 0x0184, 0x0186, 0x0187, 0x0193, 0x0194, 0x019c, 0x019d,
+    0x019f, 0x01a0, 0x01a2, 0x01a4, 0x01a6, 0x01a7, 0x01a9, 0x01ac, 0x01ae,
+    0x01af, 0x01b5, 0x01b7, 0x01b8, 0x01bc, 0x01c4, 0x01c7, 0x01ca, 0x01cd,
+    0x01cf, 0x01d1, 0x01d3, 0x01d5, 0x01d7, 0x01d9, 0x01db, 0x01de, 0x01e0,
+    0x01e2, 0x01e4, 0x01e6, 0x01e8, 0x01ea, 0x01ec, 0x01ee, 0x01f1, 0x01f4,
+    0x01fa, 0x01fc, 0x01fe, 0x0200, 0x0202, 0x0204, 0x0206, 0x0208, 0x020a,
+    0x020c, 0x020e, 0x0210, 0x0212, 0x0214, 0x0216, 0x0218, 0x021a, 0x021c,
+    0x021e, 0x0222, 0x0224, 0x0226, 0x0228, 0x022a, 0x022c, 0x022e, 0x0230,
+    0x0232, 0x0386, 0x038c, 0x038e, 0x038f, 0x03da, 0x03dc, 0x03de, 0x03e0,
+    0x03e2, 0x03e4, 0x03e6, 0x03e8, 0x03ea, 0x03ec, 0x03ee, 0x03f4, 0x0460,
+    0x0462, 0x0464, 0x0466, 0x0468, 0x046a, 0x046c, 0x046e, 0x0470, 0x0472,
+    0x0474, 0x0476, 0x0478, 0x047a, 0x047c, 0x047e, 0x0480, 0x048c, 0x048e,
+    0x0490, 0x0492, 0x0494, 0x0496, 0x0498, 0x049a, 0x049c, 0x049e, 0x04a0,
+    0x04a2, 0x04a4, 0x04a6, 0x04a8, 0x04aa, 0x04ac, 0x04ae, 0x04b0, 0x04b2,
+    0x04b4, 0x04b6, 0x04b8, 0x04ba, 0x04bc, 0x04be, 0x04c0, 0x04c1, 0x04c3,
+    0x04c7, 0x04cb, 0x04d0, 0x04d2, 0x04d4, 0x04d6, 0x04d8, 0x04da, 0x04dc,
+    0x04de, 0x04e0, 0x04e2, 0x04e4, 0x04e6, 0x04e8, 0x04ea, 0x04ec, 0x04ee,
+    0x04f0, 0x04f2, 0x04f4, 0x04f8, 0x1e00, 0x1e02, 0x1e04, 0x1e06, 0x1e08,
+    0x1e0a, 0x1e0c, 0x1e0e, 0x1e10, 0x1e12, 0x1e14, 0x1e16, 0x1e18, 0x1e1a,
+    0x1e1c, 0x1e1e, 0x1e20, 0x1e22, 0x1e24, 0x1e26, 0x1e28, 0x1e2a, 0x1e2c,
+    0x1e2e, 0x1e30, 0x1e32, 0x1e34, 0x1e36, 0x1e38, 0x1e3a, 0x1e3c, 0x1e3e,
+    0x1e40, 0x1e42, 0x1e44, 0x1e46, 0x1e48, 0x1e4a, 0x1e4c, 0x1e4e, 0x1e50,
+    0x1e52, 0x1e54, 0x1e56, 0x1e58, 0x1e5a, 0x1e5c, 0x1e5e, 0x1e60, 0x1e62,
+    0x1e64, 0x1e66, 0x1e68, 0x1e6a, 0x1e6c, 0x1e6e, 0x1e70, 0x1e72, 0x1e74,
+    0x1e76, 0x1e78, 0x1e7a, 0x1e7c, 0x1e7e, 0x1e80, 0x1e82, 0x1e84, 0x1e86,
+    0x1e88, 0x1e8a, 0x1e8c, 0x1e8e, 0x1e90, 0x1e92, 0x1e94, 0x1ea0, 0x1ea2,
+    0x1ea4, 0x1ea6, 0x1ea8, 0x1eaa, 0x1eac, 0x1eae, 0x1eb0, 0x1eb2, 0x1eb4,
+    0x1eb6, 0x1eb8, 0x1eba, 0x1ebc, 0x1ebe, 0x1ec0, 0x1ec2, 0x1ec4, 0x1ec6,
+    0x1ec8, 0x1eca, 0x1ecc, 0x1ece, 0x1ed0, 0x1ed2, 0x1ed4, 0x1ed6, 0x1ed8,
+    0x1eda, 0x1edc, 0x1ede, 0x1ee0, 0x1ee2, 0x1ee4, 0x1ee6, 0x1ee8, 0x1eea,
+    0x1eec, 0x1eee, 0x1ef0, 0x1ef2, 0x1ef4, 0x1ef6, 0x1ef8, 0x1f59, 0x1f5b,
+    0x1f5d, 0x1f5f, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x2130,
+    0x2131, 0x2133
+};
+
+#define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(chr))
+
+/*
+ * Unicode: unicode print characters excluding space.
+ */
+
+static const crange graphRangeTable[] = {
+    {0x0021, 0x007e}, {0x00a0, 0x011f}, {0x0121, 0x021f}, {0x0222, 0x0233},
+    {0x0250, 0x02ad}, {0x02b0, 0x02ee}, {0x0300, 0x031f}, {0x0321, 0x034e},
+    {0x0360, 0x0362}, {0x0384, 0x038a}, {0x038e, 0x03a1}, {0x03a3, 0x03ce},
+    {0x03d0, 0x03d7}, {0x03da, 0x03f5}, {0x0400, 0x041f}, {0x0421, 0x0486},
+    {0x048c, 0x04c4}, {0x04d0, 0x04f5}, {0x0531, 0x0556}, {0x0559, 0x055f},
+    {0x0561, 0x0587}, {0x0591, 0x05a1}, {0x05a3, 0x05b9}, {0x05bb, 0x05c4},
+    {0x05d0, 0x05ea}, {0x05f0, 0x05f4}, {0x0621, 0x063a}, {0x0640, 0x0655},
+    {0x0660, 0x066d}, {0x0670, 0x06ed}, {0x06f0, 0x06fe}, {0x0700, 0x070d},
+    {0x0710, 0x071f}, {0x0721, 0x072c}, {0x0730, 0x074a}, {0x0780, 0x07b0},
+    {0x0901, 0x0903}, {0x0905, 0x091f}, {0x0921, 0x0939}, {0x093c, 0x094d},
+    {0x0950, 0x0954}, {0x0958, 0x0970}, {0x0981, 0x0983}, {0x0985, 0x098c},
+    {0x0993, 0x09a8}, {0x09aa, 0x09b0}, {0x09b6, 0x09b9}, {0x09be, 0x09c4},
+    {0x09cb, 0x09cd}, {0x09df, 0x09e3}, {0x09e6, 0x09fa}, {0x0a05, 0x0a0a},
+    {0x0a13, 0x0a1f}, {0x0a21, 0x0a28}, {0x0a2a, 0x0a30}, {0x0a3e, 0x0a42},
+    {0x0a4b, 0x0a4d}, {0x0a59, 0x0a5c}, {0x0a66, 0x0a74}, {0x0a81, 0x0a83},
+    {0x0a85, 0x0a8b}, {0x0a8f, 0x0a91}, {0x0a93, 0x0aa8}, {0x0aaa, 0x0ab0},
+    {0x0ab5, 0x0ab9}, {0x0abc, 0x0ac5}, {0x0ac7, 0x0ac9}, {0x0acb, 0x0acd},
+    {0x0ae6, 0x0aef}, {0x0b01, 0x0b03}, {0x0b05, 0x0b0c}, {0x0b13, 0x0b1f},
+    {0x0b21, 0x0b28}, {0x0b2a, 0x0b30}, {0x0b36, 0x0b39}, {0x0b3c, 0x0b43},
+    {0x0b4b, 0x0b4d}, {0x0b5f, 0x0b61}, {0x0b66, 0x0b70}, {0x0b85, 0x0b8a},
+    {0x0b8e, 0x0b90}, {0x0b92, 0x0b95}, {0x0ba8, 0x0baa}, {0x0bae, 0x0bb5},
+    {0x0bb7, 0x0bb9}, {0x0bbe, 0x0bc2}, {0x0bc6, 0x0bc8}, {0x0bca, 0x0bcd},
+    {0x0be7, 0x0bf2}, {0x0c01, 0x0c03}, {0x0c05, 0x0c0c}, {0x0c0e, 0x0c10},
+    {0x0c12, 0x0c1f}, {0x0c21, 0x0c28}, {0x0c2a, 0x0c33}, {0x0c35, 0x0c39},
+    {0x0c3e, 0x0c44}, {0x0c46, 0x0c48}, {0x0c4a, 0x0c4d}, {0x0c66, 0x0c6f},
+    {0x0c85, 0x0c8c}, {0x0c8e, 0x0c90}, {0x0c92, 0x0ca8}, {0x0caa, 0x0cb3},
+    {0x0cb5, 0x0cb9}, {0x0cbe, 0x0cc4}, {0x0cc6, 0x0cc8}, {0x0cca, 0x0ccd},
+    {0x0ce6, 0x0cef}, {0x0d05, 0x0d0c}, {0x0d0e, 0x0d10}, {0x0d12, 0x0d1f},
+    {0x0d21, 0x0d28}, {0x0d2a, 0x0d39}, {0x0d3e, 0x0d43}, {0x0d46, 0x0d48},
+    {0x0d4a, 0x0d4d}, {0x0d66, 0x0d6f}, {0x0d85, 0x0d96}, {0x0d9a, 0x0db1},
+    {0x0db3, 0x0dbb}, {0x0dc0, 0x0dc6}, {0x0dcf, 0x0dd4}, {0x0dd8, 0x0ddf},
+    {0x0df2, 0x0df4}, {0x0e01, 0x0e1f}, {0x0e21, 0x0e3a}, {0x0e3f, 0x0e5b},
+    {0x0e94, 0x0e97}, {0x0e99, 0x0e9f}, {0x0ea1, 0x0ea3}, {0x0ead, 0x0eb9},
+    {0x0ebb, 0x0ebd}, {0x0ec0, 0x0ec4}, {0x0ec8, 0x0ecd}, {0x0ed0, 0x0ed9},
+    {0x0f00, 0x0f1f}, {0x0f21, 0x0f47}, {0x0f49, 0x0f6a}, {0x0f71, 0x0f8b},
+    {0x0f90, 0x0f97}, {0x0f99, 0x0fbc}, {0x0fbe, 0x0fcc}, {0x1000, 0x101f},
+    {0x1023, 0x1027}, {0x102c, 0x1032}, {0x1036, 0x1039}, {0x1040, 0x1059},
+    {0x10a0, 0x10c5}, {0x10d0, 0x10f6}, {0x1100, 0x111f}, {0x1121, 0x1159},
+    {0x115f, 0x11a2}, {0x11a8, 0x11f9}, {0x1200, 0x1206}, {0x1208, 0x121f},
+    {0x1221, 0x1246}, {0x124a, 0x124d}, {0x1250, 0x1256}, {0x125a, 0x125d},
+    {0x1260, 0x1286}, {0x128a, 0x128d}, {0x1290, 0x12ae}, {0x12b2, 0x12b5},
+    {0x12b8, 0x12be}, {0x12c2, 0x12c5}, {0x12c8, 0x12ce}, {0x12d0, 0x12d6},
+    {0x12d8, 0x12ee}, {0x12f0, 0x130e}, {0x1312, 0x1315}, {0x1318, 0x131e},
+    {0x1321, 0x1346}, {0x1348, 0x135a}, {0x1361, 0x137c}, {0x13a0, 0x13f4},
+    {0x1401, 0x141f}, {0x1421, 0x151f}, {0x1521, 0x161f}, {0x1621, 0x1676},
+    {0x1680, 0x169c}, {0x16a0, 0x16f0}, {0x1780, 0x17dc}, {0x17e0, 0x17e9},
+    {0x1800, 0x180a}, {0x1810, 0x1819}, {0x1821, 0x1877}, {0x1880, 0x18a9},
+    {0x1e00, 0x1e1f}, {0x1e21, 0x1e9b}, {0x1ea0, 0x1ef9}, {0x1f00, 0x1f15},
+    {0x1f18, 0x1f1d}, {0x1f21, 0x1f45}, {0x1f48, 0x1f4d}, {0x1f50, 0x1f57},
+    {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, {0x1fb6, 0x1fc4}, {0x1fc6, 0x1fd3},
+    {0x1fd6, 0x1fdb}, {0x1fdd, 0x1fef}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffe},
+    {0x2000, 0x200b}, {0x2010, 0x201f}, {0x2021, 0x2029}, {0x202f, 0x2046},
+    {0x2048, 0x204d}, {0x2074, 0x208e}, {0x20a0, 0x20af}, {0x20d0, 0x20e3},
+    {0x2100, 0x211f}, {0x2121, 0x213a}, {0x2153, 0x2183}, {0x2190, 0x21f3},
+    {0x2200, 0x221f}, {0x2221, 0x22f1}, {0x2300, 0x231f}, {0x2321, 0x237b},
+    {0x237d, 0x239a}, {0x2400, 0x241f}, {0x2421, 0x2426}, {0x2440, 0x244a},
+    {0x2460, 0x24ea}, {0x2500, 0x251f}, {0x2521, 0x2595}, {0x25a0, 0x25f7},
+    {0x2600, 0x2613}, {0x2619, 0x261f}, {0x2621, 0x2671}, {0x2701, 0x2704},
+    {0x2706, 0x2709}, {0x270c, 0x271f}, {0x2721, 0x2727}, {0x2729, 0x274b},
+    {0x274f, 0x2752}, {0x2758, 0x275e}, {0x2761, 0x2767}, {0x2776, 0x2794},
+    {0x2798, 0x27af}, {0x27b1, 0x27be}, {0x2800, 0x281f}, {0x2821, 0x28ff},
+    {0x2e80, 0x2e99}, {0x2e9b, 0x2ef3}, {0x2f00, 0x2f1f}, {0x2f21, 0x2fd5},
+    {0x2ff0, 0x2ffb}, {0x3000, 0x301f}, {0x3021, 0x303a}, {0x3041, 0x3094},
+    {0x3099, 0x309e}, {0x30a1, 0x30fe}, {0x3105, 0x311f}, {0x3121, 0x312c},
+    {0x3131, 0x318e}, {0x3190, 0x31b7}, {0x3200, 0x321c}, {0x3221, 0x3243},
+    {0x3260, 0x327b}, {0x327f, 0x32b0}, {0x32c0, 0x32cb}, {0x32d0, 0x32fe},
+    {0x3300, 0x331f}, {0x3321, 0x3376}, {0x337b, 0x33dd}, {0x33e0, 0x33fe},
+    {0x3400, 0x341f}, {0x3421, 0x351f}, {0x3521, 0x361f}, {0x3621, 0x371f},
+    {0x3721, 0x381f}, {0x3821, 0x391f}, {0x3921, 0x3a1f}, {0x3a21, 0x3b1f},
+    {0x3b21, 0x3c1f}, {0x3c21, 0x3d1f}, {0x3d21, 0x3e1f}, {0x3e21, 0x3f1f},
+    {0x3f21, 0x401f}, {0x4021, 0x411f}, {0x4121, 0x421f}, {0x4221, 0x431f},
+    {0x4321, 0x441f}, {0x4421, 0x451f}, {0x4521, 0x461f}, {0x4621, 0x471f},
+    {0x4721, 0x481f}, {0x4821, 0x491f}, {0x4921, 0x4a1f}, {0x4a21, 0x4b1f},
+    {0x4b21, 0x4c1f}, {0x4c21, 0x4d1f}, {0x4d21, 0x4db5}, {0x4e00, 0x4e1f},
+    {0x4e21, 0x4f1f}, {0x4f21, 0x501f}, {0x5021, 0x511f}, {0x5121, 0x521f},
+    {0x5221, 0x531f}, {0x5321, 0x541f}, {0x5421, 0x551f}, {0x5521, 0x561f},
+    {0x5621, 0x571f}, {0x5721, 0x581f}, {0x5821, 0x591f}, {0x5921, 0x5a1f},
+    {0x5a21, 0x5b1f}, {0x5b21, 0x5c1f}, {0x5c21, 0x5d1f}, {0x5d21, 0x5e1f},
+    {0x5e21, 0x5f1f}, {0x5f21, 0x601f}, {0x6021, 0x611f}, {0x6121, 0x621f},
+    {0x6221, 0x631f}, {0x6321, 0x641f}, {0x6421, 0x651f}, {0x6521, 0x661f},
+    {0x6621, 0x671f}, {0x6721, 0x681f}, {0x6821, 0x691f}, {0x6921, 0x6a1f},
+    {0x6a21, 0x6b1f}, {0x6b21, 0x6c1f}, {0x6c21, 0x6d1f}, {0x6d21, 0x6e1f},
+    {0x6e21, 0x6f1f}, {0x6f21, 0x701f}, {0x7021, 0x711f}, {0x7121, 0x721f},
+    {0x7221, 0x731f}, {0x7321, 0x741f}, {0x7421, 0x751f}, {0x7521, 0x761f},
+    {0x7621, 0x771f}, {0x7721, 0x781f}, {0x7821, 0x791f}, {0x7921, 0x7a1f},
+    {0x7a21, 0x7b1f}, {0x7b21, 0x7c1f}, {0x7c21, 0x7d1f}, {0x7d21, 0x7e1f},
+    {0x7e21, 0x7f1f}, {0x7f21, 0x801f}, {0x8021, 0x811f}, {0x8121, 0x821f},
+    {0x8221, 0x831f}, {0x8321, 0x841f}, {0x8421, 0x851f}, {0x8521, 0x861f},
+    {0x8621, 0x871f}, {0x8721, 0x881f}, {0x8821, 0x891f}, {0x8921, 0x8a1f},
+    {0x8a21, 0x8b1f}, {0x8b21, 0x8c1f}, {0x8c21, 0x8d1f}, {0x8d21, 0x8e1f},
+    {0x8e21, 0x8f1f}, {0x8f21, 0x901f}, {0x9021, 0x911f}, {0x9121, 0x921f},
+    {0x9221, 0x931f}, {0x9321, 0x941f}, {0x9421, 0x951f}, {0x9521, 0x961f},
+    {0x9621, 0x971f}, {0x9721, 0x981f}, {0x9821, 0x991f}, {0x9921, 0x9a1f},
+    {0x9a21, 0x9b1f}, {0x9b21, 0x9c1f}, {0x9c21, 0x9d1f}, {0x9d21, 0x9e1f},
+    {0x9e21, 0x9f1f}, {0x9f21, 0x9fa5}, {0xa000, 0xa01f}, {0xa021, 0xa11f},
+    {0xa121, 0xa21f}, {0xa221, 0xa31f}, {0xa321, 0xa41f}, {0xa421, 0xa48c},
+    {0xa490, 0xa4a1}, {0xa4a4, 0xa4b3}, {0xa4b5, 0xa4c0}, {0xa4c2, 0xa4c4},
+    {0xac00, 0xac1f}, {0xac21, 0xad1f}, {0xad21, 0xae1f}, {0xae21, 0xaf1f},
+    {0xaf21, 0xb01f}, {0xb021, 0xb11f}, {0xb121, 0xb21f}, {0xb221, 0xb31f},
+    {0xb321, 0xb41f}, {0xb421, 0xb51f}, {0xb521, 0xb61f}, {0xb621, 0xb71f},
+    {0xb721, 0xb81f}, {0xb821, 0xb91f}, {0xb921, 0xba1f}, {0xba21, 0xbb1f},
+    {0xbb21, 0xbc1f}, {0xbc21, 0xbd1f}, {0xbd21, 0xbe1f}, {0xbe21, 0xbf1f},
+    {0xbf21, 0xc01f}, {0xc021, 0xc11f}, {0xc121, 0xc21f}, {0xc221, 0xc31f},
+    {0xc321, 0xc41f}, {0xc421, 0xc51f}, {0xc521, 0xc61f}, {0xc621, 0xc71f},
+    {0xc721, 0xc81f}, {0xc821, 0xc91f}, {0xc921, 0xca1f}, {0xca21, 0xcb1f},
+    {0xcb21, 0xcc1f}, {0xcc21, 0xcd1f}, {0xcd21, 0xce1f}, {0xce21, 0xcf1f},
+    {0xcf21, 0xd01f}, {0xd021, 0xd11f}, {0xd121, 0xd21f}, {0xd221, 0xd31f},
+    {0xd321, 0xd41f}, {0xd421, 0xd51f}, {0xd521, 0xd61f}, {0xd621, 0xd71f},
+    {0xd721, 0xd7a3}, {0xf900, 0xf91f}, {0xf921, 0xfa1f}, {0xfa21, 0xfa2d},
+    {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1d, 0xfb1f}, {0xfb21, 0xfb36},
+    {0xfb38, 0xfb3c}, {0xfb46, 0xfbb1}, {0xfbd3, 0xfc1f}, {0xfc21, 0xfd1f},
+    {0xfd21, 0xfd3f}, {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfb},
+    {0xfe21, 0xfe23}, {0xfe30, 0xfe44}, {0xfe49, 0xfe52}, {0xfe54, 0xfe66},
+    {0xfe68, 0xfe6b}, {0xfe70, 0xfe72}, {0xfe76, 0xfefc}, {0xff01, 0xff1f},
+    {0xff21, 0xff5e}, {0xff61, 0xffbe}, {0xffc2, 0xffc7}, {0xffca, 0xffcf},
+    {0xffd2, 0xffd7}, {0xffda, 0xffdc}, {0xffe0, 0xffe6}, {0xffe8, 0xffee},
+    {0xfffc, 0xffff}
+};
+
+#define NUM_GRAPH_RANGE (sizeof(graphRangeTable)/sizeof(crange))
+
+static const chr graphCharTable[] = {
+    0x0374, 0x0375, 0x037a, 0x037e, 0x038c, 0x0488, 0x0489, 0x04c7, 0x04c8,
+    0x04cb, 0x04cc, 0x04f8, 0x04f9, 0x0589, 0x058a, 0x060c, 0x061b, 0x061f,
+    0x098f, 0x0990, 0x09b2, 0x09bc, 0x09c7, 0x09c8, 0x09d7, 0x09dc, 0x09dd,
+    0x0a02, 0x0a0f, 0x0a10, 0x0a32, 0x0a33, 0x0a35, 0x0a36, 0x0a38, 0x0a39,
+    0x0a3c, 0x0a47, 0x0a48, 0x0a5e, 0x0a8d, 0x0ab2, 0x0ab3, 0x0ad0, 0x0ae0,
+    0x0b0f, 0x0b10, 0x0b32, 0x0b33, 0x0b47, 0x0b48, 0x0b56, 0x0b57, 0x0b5c,
+    0x0b5d, 0x0b82, 0x0b83, 0x0b99, 0x0b9a, 0x0b9c, 0x0b9e, 0x0b9f, 0x0ba3,
+    0x0ba4, 0x0bd7, 0x0c55, 0x0c56, 0x0c60, 0x0c61, 0x0c82, 0x0c83, 0x0cd5,
+    0x0cd6, 0x0cde, 0x0ce0, 0x0ce1, 0x0d02, 0x0d03, 0x0d57, 0x0d60, 0x0d61,
+    0x0d82, 0x0d83, 0x0dbd, 0x0dca, 0x0dd6, 0x0e81, 0x0e82, 0x0e84, 0x0e87,
+    0x0e88, 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0eaa, 0x0eab, 0x0ec6, 0x0edc,
+    0x0edd, 0x0fcf, 0x1021, 0x1029, 0x102a, 0x10fb, 0x1248, 0x1258, 0x1288,
+    0x12b0, 0x12c0, 0x1310, 0x1f59, 0x1f5b, 0x1f5d, 0x2070, 0x274d, 0x2756,
+    0x303e, 0x303f, 0xa4c6, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfe74
+};
+
+#define NUM_GRAPH_CHAR (sizeof(graphCharTable)/sizeof(chr))
+
+/*
+ * Unicode: unicode print characters including space, i.e. all Letters (class
+ * L*), Numbers (N*), Punctuation (P*), Symbols (S*) and Spaces (Zs).
+ */
+
+static const crange printRangeTable[] = {
+    {0x0020, 0x007E}, {0x00A0, 0x01F5}, {0x01FA, 0x0217}, {0x0250, 0x02A8},
+    {0x02B0, 0x02DE}, {0x02E0, 0x02E9}, {0x0374, 0x0375}, {0x0384, 0x038A},
+    {0x038E, 0x03A1}, {0x03A3, 0x03CE}, {0x03D0, 0x03D6}, {0x03E2, 0x03F3},
+    {0x0401, 0x040C}, {0x040E, 0x044F}, {0x0451, 0x045C}, {0x045E, 0x0482},
+    {0x0490, 0x04C4}, {0x04C7, 0x04C8}, {0x04CB, 0x04CC}, {0x04D0, 0x04EB},
+    {0x04EE, 0x04F5}, {0x04F8, 0x04F9}, {0x0531, 0x0556}, {0x0559, 0x055F},
+    {0x0561, 0x0587}, {0x05D0, 0x05EA}, {0x05F0, 0x05F4}, {0x0621, 0x063A},
+    {0x0640, 0x064A}, {0x0660, 0x066D}, {0x0671, 0x06B7}, {0x06BA, 0x06BE},
+    {0x06C0, 0x06CE}, {0x06D0, 0x06D5}, {0x06E5, 0x06E6}, {0x06F0, 0x06F9},
+    {0x0905, 0x0939}, {0x0958, 0x0961}, {0x0964, 0x0970}, {0x0985, 0x098C},
+    {0x098F, 0x0990}, {0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B6, 0x09B9},
+    {0x09DC, 0x09DD}, {0x09DF, 0x09E1}, {0x09E6, 0x09FA}, {0x0A05, 0x0A0A},
+    {0x0A0F, 0x0A10}, {0x0A13, 0x0A28}, {0x0A2A, 0x0A30}, {0x0A32, 0x0A33},
+    {0x0A35, 0x0A36}, {0x0A38, 0x0A39}, {0x0A59, 0x0A5C}, {0x0A66, 0x0A6F},
+    {0x0A72, 0x0A74}, {0x0A85, 0x0A8B}, {0x0A8F, 0x0A91}, {0x0A93, 0x0AA8},
+    {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, {0x0AB5, 0x0AB9}, {0x0AE6, 0x0AEF},
+    {0x0B05, 0x0B0C}, {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, {0x0B2A, 0x0B30},
+    {0x0B32, 0x0B33}, {0x0B36, 0x0B39}, {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B61},
+    {0x0B66, 0x0B70}, {0x0B85, 0x0B8A}, {0x0B8E, 0x0B90}, {0x0B92, 0x0B95},
+    {0x0B99, 0x0B9A}, {0x0B9E, 0x0B9F}, {0x0BA3, 0x0BA4}, {0x0BA8, 0x0BAA},
+    {0x0BAE, 0x0BB5}, {0x0BB7, 0x0BB9}, {0x0BE7, 0x0BF2}, {0x0C05, 0x0C0C},
+    {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C33}, {0x0C35, 0x0C39},
+    {0x0C60, 0x0C61}, {0x0C66, 0x0C6F}, {0x0C85, 0x0C8C}, {0x0C8E, 0x0C90},
+    {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9}, {0x0CE0, 0x0CE1},
+    {0x0CE6, 0x0CEF}, {0x0D05, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D28},
+    {0x0D2A, 0x0D39}, {0x0D60, 0x0D61}, {0x0D66, 0x0D6F}, {0x0E3F, 0x0E46},
+    {0x0E4F, 0x0E5B}, {0x0E99, 0x0E9F}, {0x0EA1, 0x0EA3}, {0x0EAA, 0x0EAB},
+    {0x0EAD, 0x0EB0}, {0x0EB2, 0x0EB3}, {0x0EC0, 0x0EC4}, {0x0ED0, 0x0ED9},
+    {0x0EDC, 0x0EDD}, {0x0F00, 0x0F17}, {0x0F1A, 0x0F34}, {0x0F3A, 0x0F3D},
+    {0x0F40, 0x0F47}, {0x0F49, 0x0F69}, {0x0F88, 0x0F8B}, {0x10A0, 0x10C5},
+    {0x10D0, 0x10F6}, {0x1100, 0x1159}, {0x115F, 0x11A2}, {0x11A8, 0x11F9},
+    {0x1E00, 0x1E9B}, {0x1EA0, 0x1EF9}, {0x1F00, 0x1F15}, {0x1F18, 0x1F1D},
+    {0x1F20, 0x1F45}, {0x1F48, 0x1F4D}, {0x1F50, 0x1F57}, {0x1F5F, 0x1F7D},
+    {0x1F80, 0x1FB4}, {0x1FB6, 0x1FC4}, {0x1FC6, 0x1FD3}, {0x1FD6, 0x1FDB},
+    {0x1FDD, 0x1FEF}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFE}, {0x2000, 0x200B},
+    {0x2010, 0x2027}, {0x2030, 0x2046}, {0x2074, 0x208E}, {0x20A0, 0x20AC},
+    {0x2100, 0x2138}, {0x2153, 0x2182}, {0x2190, 0x21EA}, {0x2200, 0x22F1},
+    {0x2302, 0x237A}, {0x2400, 0x2424}, {0x2440, 0x244A}, {0x2460, 0x24EA},
+    {0x2500, 0x2595}, {0x25A0, 0x25EF}, {0x2600, 0x2613}, {0x261A, 0x266F},
+    {0x2701, 0x2704}, {0x2706, 0x2709}, {0x270C, 0x2727}, {0x2729, 0x274B},
+    {0x274F, 0x2752}, {0x2758, 0x275E}, {0x2761, 0x2767}, {0x2776, 0x2794},
+    {0x2798, 0x27AF}, {0x27B1, 0x27BE}, {0x3000, 0x3029}, {0x3030, 0x3037},
+    {0x3041, 0x3094}, {0x309B, 0x309E}, {0x30A1, 0x30FE}, {0x3105, 0x312C},
+    {0x3131, 0x318E}, {0x3190, 0x319F}, {0x3200, 0x321C}, {0x3220, 0x3243},
+    {0x3260, 0x327B}, {0x327F, 0x32B0}, {0x32C0, 0x32CB}, {0x32D0, 0x32FE},
+    {0x3300, 0x3376}, {0x337B, 0x33DD}, {0x33E0, 0x33FE}, {0x4E00, 0x9FA5},
+    {0xAC00, 0xD7A3}, {0xF900, 0xFA2D}, {0xFB00, 0xFB06}, {0xFB13, 0xFB17},
+    {0xFB1F, 0xFB36}, {0xFB38, 0xFB3C}, {0xFB40, 0xFB41}, {0xFB43, 0xFB44},
+    {0xFB46, 0xFBB1}, {0xFBD3, 0xFD3F}, {0xFD50, 0xFD8F}, {0xFD92, 0xFDC7},
+    {0xFDF0, 0xFDFB}, {0xFE30, 0xFE44}, {0xFE49, 0xFE52}, {0xFE54, 0xFE66},
+    {0xFE68, 0xFE6B}, {0xFE70, 0xFE72}, {0xFE76, 0xFEFC}, {0xFF01, 0xFF5E},
+    {0xFF61, 0xFFBE}, {0xFFC2, 0xFFC7}, {0xFFCA, 0xFFCF}, {0xFFD2, 0xFFD7},
+    {0xFFDA, 0xFFDC}, {0xFFE0, 0xFFE6}, {0xFFE8, 0xFFEE}, {0xFFFC, 0xFFFD}
+};
+
+#define NUM_PRINT_RANGE (sizeof(printRangeTable)/sizeof(crange))
+
+static const chr printCharTable[] = {
+    0x037A, 0x037E, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0589, 0x05BE,
+    0x05C0, 0x05C3, 0x060C, 0x061B, 0x061F, 0x06E9, 0x093D, 0x0950, 0x09B2,
+    0x0A5E, 0x0A8D, 0x0ABD, 0x0AD0, 0x0AE0, 0x0B3D, 0x0B9C, 0x0CDE, 0x0E01,
+    0x0E32, 0x0E81, 0x0E84, 0x0E87, 0x0E8A, 0x0E8D, 0x0E94, 0x0EA5, 0x0EA7,
+    0x0EBD, 0x0EC6, 0x0F36, 0x0F38, 0x0F85, 0x10FB, 0x1F59, 0x1F5B, 0x1F5D,
+    0x2070, 0x2300, 0x274D, 0x2756, 0x303F, 0xFB3E, 0xFE74
+};
+
+#define NUM_PRINT_CHAR (sizeof(printCharTable)/sizeof(chr))
+#endif
+
+/*
+ *	End of auto-generated Unicode character ranges declarations.
+ */
+
+#define	CH	NOCELT
+
+/*
+ - element - map collating-element name to celt
+ ^ static celt element(struct vars *, const chr *, const chr *);
+ */
+static celt
+element(
+    struct vars *v,		/* context */
+    const chr *startp,		/* points to start of name */
+    const chr *endp)		/* points just past end of name */
+{
+    const struct cname *cn;
+    size_t len;
+    Tcl_DString ds;
+    const char *np;
+
+    /*
+     * Generic: one-chr names stand for themselves.
+     */
+
+    assert(startp < endp);
+    len = endp - startp;
+    if (len == 1) {
+	return *startp;
+    }
+
+    NOTE(REG_ULOCALE);
+
+    /*
+     * Search table.
+     */
+
+    Tcl_DStringInit(&ds);
+    np = Tcl_UniCharToUtfDString(startp, (int)len, &ds);
+    for (cn=cnames; cn->name!=NULL; cn++) {
+	if (strlen(cn->name)==len && strncmp(cn->name, np, len)==0) {
+	    break;			/* NOTE BREAK OUT */
+	}
+    }
+    Tcl_DStringFree(&ds);
+    if (cn->name != NULL) {
+	return CHR(cn->code);
+    }
+
+    /*
+     * Couldn't find it.
+     */
+
+    ERR(REG_ECOLLATE);
+    return 0;
+}
+
+/*
+ - range - supply cvec for a range, including legality check
+ ^ static struct cvec *range(struct vars *, celt, celt, int);
+ */
+static struct cvec *
+range(
+    struct vars *v,		/* context */
+    celt a,			/* range start */
+    celt b,			/* range end, might equal a */
+    int cases)			/* case-independent? */
+{
+    int nchrs;
+    struct cvec *cv;
+    celt c, lc, uc, tc;
+
+    if (a != b && !before(a, b)) {
+	ERR(REG_ERANGE);
+	return NULL;
+    }
+
+    if (!cases) {		/* easy version */
+	cv = getcvec(v, 0, 1);
+	NOERRN();
+	addrange(cv, a, b);
+	return cv;
+    }
+
+    /*
+     * When case-independent, it's hard to decide when cvec ranges are usable,
+     * so for now at least, we won't try. We allocate enough space for two
+     * case variants plus a little extra for the two title case variants.
+     */
+
+    nchrs = (b - a + 1)*2 + 4;
+
+    cv = getcvec(v, nchrs, 0);
+    NOERRN();
+
+    for (c=a; c<=b; c++) {
+	addchr(cv, c);
+	lc = Tcl_UniCharToLower((chr)c);
+	uc = Tcl_UniCharToUpper((chr)c);
+	tc = Tcl_UniCharToTitle((chr)c);
+	if (c != lc) {
+	    addchr(cv, lc);
+	}
+	if (c != uc) {
+	    addchr(cv, uc);
+	}
+	if (c != tc && tc != uc) {
+	    addchr(cv, tc);
+	}
+    }
+
+    return cv;
+}
+
+/*
+ - before - is celt x before celt y, for purposes of range legality?
+ ^ static int before(celt, celt);
+ */
+static int			/* predicate */
+before(
+    celt x, celt y)		/* collating elements */
+{
+    if (x < y) {
+	return 1;
+    }
+    return 0;
+}
+
+/*
+ - eclass - supply cvec for an equivalence class
+ * Must include case counterparts on request.
+ ^ static struct cvec *eclass(struct vars *, celt, int);
+ */
+static struct cvec *
+eclass(
+    struct vars *v,		/* context */
+    celt c,			/* Collating element representing the
+				 * equivalence class. */
+    int cases)			/* all cases? */
+{
+    struct cvec *cv;
+
+    /*
+     * Crude fake equivalence class for testing.
+     */
+
+    if ((v->cflags&REG_FAKE) && c == 'x') {
+	cv = getcvec(v, 4, 0);
+	addchr(cv, (chr)'x');
+	addchr(cv, (chr)'y');
+	if (cases) {
+	    addchr(cv, (chr)'X');
+	    addchr(cv, (chr)'Y');
+	}
+	return cv;
+    }
+
+    /*
+     * Otherwise, none.
+     */
+
+    if (cases) {
+	return allcases(v, c);
+    }
+    cv = getcvec(v, 1, 0);
+    assert(cv != NULL);
+    addchr(cv, (chr)c);
+    return cv;
+}
+
+/*
+ - cclass - supply cvec for a character class
+ * Must include case counterparts on request.
+ ^ static struct cvec *cclass(struct vars *, const chr *, const chr *, int);
+ */
+static struct cvec *
+cclass(
+    struct vars *v,		/* context */
+    const chr *startp,		/* where the name starts */
+    const chr *endp,		/* just past the end of the name */
+    int cases)			/* case-independent? */
+{
+    size_t len;
+    struct cvec *cv = NULL;
+    Tcl_DString ds;
+    const char *np;
+    const char **namePtr;
+    int i, index;
+
+    /*
+     * The following arrays define the valid character class names.
+     */
+
+    static const char *classNames[] = {
+	"alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph",
+	"lower", "print", "punct", "space", "upper", "xdigit", NULL
+    };
+
+    enum classes {
+	CC_ALNUM, CC_ALPHA, CC_ASCII, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH,
+	CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_XDIGIT
+    };
+
+
+    /*
+     * Extract the class name
+     */
+
+    len = endp - startp;
+    Tcl_DStringInit(&ds);
+    np = Tcl_UniCharToUtfDString(startp, (int)len, &ds);
+
+    /*
+     * Remap lower and upper to alpha if the match is case insensitive.
+     */
+
+    if (cases && len == 5 && (strncmp("lower", np, 5) == 0
+	    || strncmp("upper", np, 5) == 0)) {
+	np = "alpha";
+    }
+
+    /*
+     * Map the name to the corresponding enumerated value.
+     */
+
+    index = -1;
+    for (namePtr=classNames,i=0 ; *namePtr!=NULL ; namePtr++,i++) {
+	if ((strlen(*namePtr) == len) && (strncmp(*namePtr, np, len) == 0)) {
+	    index = i;
+	    break;
+	}
+    }
+    Tcl_DStringFree(&ds);
+    if (index == -1) {
+	ERR(REG_ECTYPE);
+	return NULL;
+    }
+
+    /*
+     * Now compute the character class contents.
+     */
+
+    switch((enum classes) index) {
+    case CC_PRINT:
+	cv = getcvec(v, NUM_PRINT_CHAR, NUM_PRINT_RANGE);
+	if (cv) {
+	    for (i=0 ; (size_t)i<NUM_PRINT_CHAR ; i++) {
+		addchr(cv, printCharTable[i]);
+	    }
+	    for (i=0 ; (size_t)i<NUM_PRINT_RANGE ; i++) {
+		addrange(cv, printRangeTable[i].start,
+			printRangeTable[i].end);
+	    }
+	}
+	break;
+    case CC_ALNUM:
+	cv = getcvec(v, NUM_ALPHA_CHAR, NUM_DIGIT_RANGE + NUM_ALPHA_RANGE);
+	if (cv) {
+	    for (i=0 ; (size_t)i<NUM_ALPHA_CHAR ; i++) {
+		addchr(cv, alphaCharTable[i]);
+	    }
+	    for (i=0 ; (size_t)i<NUM_ALPHA_RANGE ; i++) {
+		addrange(cv, alphaRangeTable[i].start,
+			alphaRangeTable[i].end);
+	    }
+	    for (i=0 ; (size_t)i<NUM_DIGIT_RANGE ; i++) {
+		addrange(cv, digitRangeTable[i].start,
+			digitRangeTable[i].end);
+	    }
+	}
+	break;
+    case CC_ALPHA:
+	cv = getcvec(v, NUM_ALPHA_CHAR, NUM_ALPHA_RANGE);
+	if (cv) {
+	    for (i=0 ; (size_t)i<NUM_ALPHA_RANGE ; i++) {
+		addrange(cv, alphaRangeTable[i].start,
+			alphaRangeTable[i].end);
+	    }
+	    for (i=0 ; (size_t)i<NUM_ALPHA_CHAR ; i++) {
+		addchr(cv, alphaCharTable[i]);
+	    }
+	}
+	break;
+    case CC_ASCII:
+	cv = getcvec(v, 0, 1);
+	if (cv) {
+	    addrange(cv, 0, 0x7f);
+	}
+	break;
+    case CC_BLANK:
+	cv = getcvec(v, 2, 0);
+	addchr(cv, '\t');
+	addchr(cv, ' ');
+	break;
+    case CC_CNTRL:
+	cv = getcvec(v, 0, 2);
+	addrange(cv, 0x0, 0x1f);
+	addrange(cv, 0x7f, 0x9f);
+	break;
+    case CC_DIGIT:
+	cv = getcvec(v, 0, NUM_DIGIT_RANGE);
+	if (cv) {
+	    for (i=0 ; (size_t)i<NUM_DIGIT_RANGE ; i++) {
+		addrange(cv, digitRangeTable[i].start,
+			digitRangeTable[i].end);
+	    }
+	}
+	break;
+    case CC_PUNCT:
+	cv = getcvec(v, NUM_PUNCT_CHAR, NUM_PUNCT_RANGE);
+	if (cv) {
+	    for (i=0 ; (size_t)i<NUM_PUNCT_RANGE ; i++) {
+		addrange(cv, punctRangeTable[i].start,
+			punctRangeTable[i].end);
+	    }
+	    for (i=0 ; (size_t)i<NUM_PUNCT_CHAR ; i++) {
+		addchr(cv, punctCharTable[i]);
+	    }
+	}
+	break;
+    case CC_XDIGIT:
+	/*
+	 * This is a 3 instead of (NUM_DIGIT_RANGE+2) because I've no idea how
+	 * to define the digits 'a' through 'f' in non-western locales. The
+	 * concept is quite possibly non portable, or only used in contextx
+	 * where the characters used would be the western ones anyway!
+	 * Whatever is actually the case, the number of ranges is fixed (until
+	 * someone comes up with a better arrangement!)
+	 */
+
+	cv = getcvec(v, 0, 3);
+	if (cv) {
+	    addrange(cv, '0', '9');
+	    addrange(cv, 'a', 'f');
+	    addrange(cv, 'A', 'F');
+	}
+	break;
+    case CC_SPACE:
+	cv = getcvec(v, NUM_SPACE_CHAR, NUM_SPACE_RANGE);
+	if (cv) {
+	    for (i=0 ; (size_t)i<NUM_SPACE_RANGE ; i++) {
+		addrange(cv, spaceRangeTable[i].start,
+			spaceRangeTable[i].end);
+	    }
+	    for (i=0 ; (size_t)i<NUM_SPACE_CHAR ; i++) {
+		addchr(cv, spaceCharTable[i]);
+	    }
+	}
+	break;
+    case CC_LOWER:
+	cv  = getcvec(v, NUM_LOWER_CHAR, NUM_LOWER_RANGE);
+	if (cv) {
+	    for (i=0 ; (size_t)i<NUM_LOWER_RANGE ; i++) {
+		addrange(cv, lowerRangeTable[i].start,
+			lowerRangeTable[i].end);
+	    }
+	    for (i=0 ; (size_t)i<NUM_LOWER_CHAR ; i++) {
+		addchr(cv, lowerCharTable[i]);
+	    }
+	}
+	break;
+    case CC_UPPER:
+	cv  = getcvec(v, NUM_UPPER_CHAR, NUM_UPPER_RANGE);
+	if (cv) {
+	    for (i=0 ; (size_t)i<NUM_UPPER_RANGE ; i++) {
+		addrange(cv, upperRangeTable[i].start,
+			upperRangeTable[i].end);
+	    }
+	    for (i=0 ; (size_t)i<NUM_UPPER_CHAR ; i++) {
+		addchr(cv, upperCharTable[i]);
+	    }
+	}
+	break;
+    case CC_GRAPH:
+	cv  = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE);
+	if (cv) {
+	    for (i=0 ; (size_t)i<NUM_GRAPH_RANGE ; i++) {
+		addrange(cv, graphRangeTable[i].start,
+			graphRangeTable[i].end);
+	    }
+	    for (i=0 ; (size_t)i<NUM_GRAPH_CHAR ; i++) {
+		addchr(cv, graphCharTable[i]);
+	    }
+	}
+	break;
+    }
+    if (cv == NULL) {
+	ERR(REG_ESPACE);
+    }
+    return cv;
+}
+
+/*
+ - allcases - supply cvec for all case counterparts of a chr (including itself)
+ * This is a shortcut, preferably an efficient one, for simple characters;
+ * messy cases are done via range().
+ ^ static struct cvec *allcases(struct vars *, pchr);
+ */
+static struct cvec *
+allcases(
+    struct vars *v,		/* context */
+    pchr pc)			/* character to get case equivs of */
+{
+    struct cvec *cv;
+    chr c = (chr)pc;
+    chr lc, uc, tc;
+
+    lc = Tcl_UniCharToLower((chr)c);
+    uc = Tcl_UniCharToUpper((chr)c);
+    tc = Tcl_UniCharToTitle((chr)c);
+
+    if (tc != uc) {
+	cv = getcvec(v, 3, 0);
+	addchr(cv, tc);
+    } else {
+	cv = getcvec(v, 2, 0);
+    }
+    addchr(cv, lc);
+    if (lc != uc) {
+	addchr(cv, uc);
+    }
+    return cv;
+}
+
+/*
+ - cmp - chr-substring compare
+ * Backrefs need this.  It should preferably be efficient.
+ * Note that it does not need to report anything except equal/unequal.
+ * Note also that the length is exact, and the comparison should not
+ * stop at embedded NULs!
+ ^ static int cmp(const chr *, const chr *, size_t);
+ */
+static int			/* 0 for equal, nonzero for unequal */
+cmp(
+    const chr *x, const chr *y,	/* strings to compare */
+    size_t len)			/* exact length of comparison */
+{
+    return memcmp(VS(x), VS(y), len*sizeof(chr));
+}
+
+/*
+ - casecmp - case-independent chr-substring compare
+ * REG_ICASE backrefs need this.  It should preferably be efficient.
+ * Note that it does not need to report anything except equal/unequal.
+ * Note also that the length is exact, and the comparison should not
+ * stop at embedded NULs!
+ ^ static int casecmp(const chr *, const chr *, size_t);
+ */
+static int			/* 0 for equal, nonzero for unequal */
+casecmp(
+    const chr *x, const chr *y,	/* strings to compare */
+    size_t len)			/* exact length of comparison */
+{
+    for (; len > 0; len--, x++, y++) {
+	if ((*x!=*y) && (Tcl_UniCharToLower(*x) != Tcl_UniCharToLower(*y))) {
+	    return 1;
+	}
+    }
+    return 0;
+}
+
+/*
+ * Local Variables:
+ * mode: c
+ * c-basic-offset: 4
+ * fill-column: 78
+ * End:
+ */
diff --git a/contrib/hsrex/regc_nfa.c b/contrib/hsrex/regc_nfa.c
new file mode 100644
index 0000000..04d2f46
--- /dev/null
+++ b/contrib/hsrex/regc_nfa.c
@@ -0,0 +1,1873 @@
+/*
+ * NFA utilities.
+ * This file is #included by regcomp.c.
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation of
+ * software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * One or two things that technically ought to be in here are actually in
+ * color.c, thanks to some incestuous relationships in the color chains.
+ */
+
+#define	NISERR()	VISERR(nfa->v)
+#define	NERR(e)		VERR(nfa->v, (e))
+
+/*
+ - newnfa - set up an NFA
+ ^ static struct nfa *newnfa(struct vars *, struct colormap *, struct nfa *);
+ */
+static struct nfa *		/* the NFA, or NULL */
+newnfa(
+    struct vars *v,
+    struct colormap *cm,
+    struct nfa *parent)		/* NULL if primary NFA */
+{
+    struct nfa *nfa;
+
+    nfa = (struct nfa *) MALLOC(sizeof(struct nfa));
+    if (nfa == NULL) {
+	return NULL;
+    }
+
+    nfa->states = NULL;
+    nfa->slast = NULL;
+    nfa->free = NULL;
+    nfa->nstates = 0;
+    nfa->cm = cm;
+    nfa->v = v;
+    nfa->size = 0;
+    nfa->bos[0] = nfa->bos[1] = COLORLESS;
+    nfa->eos[0] = nfa->eos[1] = COLORLESS;
+    nfa->parent = parent;	/* Precedes newfstate so parent is valid. */
+    nfa->post = newfstate(nfa, '@');	/* number 0 */
+    nfa->pre = newfstate(nfa, '>');	/* number 1 */
+
+    nfa->init = newstate(nfa);	/* May become invalid later. */
+    nfa->final = newstate(nfa);
+    if (ISERR()) {
+	freenfa(nfa);
+	return NULL;
+    }
+    rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->pre, nfa->init);
+    newarc(nfa, '^', 1, nfa->pre, nfa->init);
+    newarc(nfa, '^', 0, nfa->pre, nfa->init);
+    rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->final, nfa->post);
+    newarc(nfa, '$', 1, nfa->final, nfa->post);
+    newarc(nfa, '$', 0, nfa->final, nfa->post);
+
+    if (ISERR()) {
+	freenfa(nfa);
+	return NULL;
+    }
+    return nfa;
+}
+
+/*
+ - TooManyStates - checks if the max states exceeds the compile-time value
+ ^ static int TooManyStates(struct nfa *);
+ */
+static int
+TooManyStates(
+    struct nfa *nfa)
+{
+    struct nfa *parent = nfa->parent;
+    size_t sz = nfa->size;
+
+    while (parent != NULL) {
+	sz = parent->size;
+	parent = parent->parent;
+    }
+    if (sz > REG_MAX_STATES) {
+	return 1;
+    }
+    return 0;
+}
+
+/*
+ - IncrementSize - increases the tracked size of the NFA and its parents.
+ ^ static void IncrementSize(struct nfa *);
+ */
+static void
+IncrementSize(
+    struct nfa *nfa)
+{
+    struct nfa *parent = nfa->parent;
+
+    nfa->size++;
+    while (parent != NULL) {
+	parent->size++;
+	parent = parent->parent;
+    }
+}
+
+/*
+ - DecrementSize - increases the tracked size of the NFA and its parents.
+ ^ static void DecrementSize(struct nfa *);
+ */
+static void
+DecrementSize(
+    struct nfa *nfa)
+{
+    struct nfa *parent = nfa->parent;
+
+    nfa->size--;
+    while (parent != NULL) {
+	parent->size--;
+	parent = parent->parent;
+    }
+}
+
+/*
+ - freenfa - free an entire NFA
+ ^ static void freenfa(struct nfa *);
+ */
+static void
+freenfa(
+    struct nfa *nfa)
+{
+    struct state *s;
+
+    while ((s = nfa->states) != NULL) {
+	s->nins = s->nouts = 0;	/* don't worry about arcs */
+	freestate(nfa, s);
+    }
+    while ((s = nfa->free) != NULL) {
+	nfa->free = s->next;
+	destroystate(nfa, s);
+    }
+
+    nfa->slast = NULL;
+    nfa->nstates = -1;
+    nfa->pre = NULL;
+    nfa->post = NULL;
+    FREE(nfa);
+}
+
+/*
+ - newstate - allocate an NFA state, with zero flag value
+ ^ static struct state *newstate(struct nfa *);
+ */
+static struct state *		/* NULL on error */
+newstate(
+    struct nfa *nfa)
+{
+    struct state *s;
+
+    if (TooManyStates(nfa)) {
+	/* XXX: add specific error for this */
+	NERR(REG_ETOOBIG);
+	return NULL;
+    }
+    if (nfa->free != NULL) {
+	s = nfa->free;
+	nfa->free = s->next;
+    } else {
+	s = (struct state *) MALLOC(sizeof(struct state));
+	if (s == NULL) {
+	    NERR(REG_ESPACE);
+	    return NULL;
+	}
+	s->oas.next = NULL;
+	s->free = NULL;
+	s->noas = 0;
+    }
+
+    assert(nfa->nstates >= 0);
+    s->no = nfa->nstates++;
+    s->flag = 0;
+    if (nfa->states == NULL) {
+	nfa->states = s;
+    }
+    s->nins = 0;
+    s->ins = NULL;
+    s->nouts = 0;
+    s->outs = NULL;
+    s->tmp = NULL;
+    s->next = NULL;
+    if (nfa->slast != NULL) {
+	assert(nfa->slast->next == NULL);
+	nfa->slast->next = s;
+    }
+    s->prev = nfa->slast;
+    nfa->slast = s;
+
+    /*
+     * Track the current size and the parent size.
+     */
+
+    IncrementSize(nfa);
+    return s;
+}
+
+/*
+ - newfstate - allocate an NFA state with a specified flag value
+ ^ static struct state *newfstate(struct nfa *, int flag);
+ */
+static struct state *		/* NULL on error */
+newfstate(
+    struct nfa *nfa,
+    int flag)
+{
+    struct state *s;
+
+    s = newstate(nfa);
+    if (s != NULL) {
+	s->flag = (char) flag;
+    }
+    return s;
+}
+
+/*
+ - dropstate - delete a state's inarcs and outarcs and free it
+ ^ static void dropstate(struct nfa *, struct state *);
+ */
+static void
+dropstate(
+    struct nfa *nfa,
+    struct state *s)
+{
+    struct arc *a;
+
+    while ((a = s->ins) != NULL) {
+	freearc(nfa, a);
+    }
+    while ((a = s->outs) != NULL) {
+	freearc(nfa, a);
+    }
+    freestate(nfa, s);
+}
+
+/*
+ - freestate - free a state, which has no in-arcs or out-arcs
+ ^ static void freestate(struct nfa *, struct state *);
+ */
+static void
+freestate(
+    struct nfa *nfa,
+    struct state *s)
+{
+    assert(s != NULL);
+    assert(s->nins == 0 && s->nouts == 0);
+
+    s->no = FREESTATE;
+    s->flag = 0;
+    if (s->next != NULL) {
+	s->next->prev = s->prev;
+    } else {
+	assert(s == nfa->slast);
+	nfa->slast = s->prev;
+    }
+    if (s->prev != NULL) {
+	s->prev->next = s->next;
+    } else {
+	assert(s == nfa->states);
+	nfa->states = s->next;
+    }
+    s->prev = NULL;
+    s->next = nfa->free;	/* don't delete it, put it on the free list */
+    nfa->free = s;
+    DecrementSize(nfa);
+}
+
+/*
+ - destroystate - really get rid of an already-freed state
+ ^ static void destroystate(struct nfa *, struct state *);
+ */
+static void
+destroystate(
+    struct nfa *nfa,
+    struct state *s)
+{
+    struct arcbatch *ab;
+    struct arcbatch *abnext;
+
+    assert(s->no == FREESTATE);
+    for (ab=s->oas.next ; ab!=NULL ; ab=abnext) {
+	abnext = ab->next;
+	FREE(ab);
+    }
+    s->ins = NULL;
+    s->outs = NULL;
+    s->next = NULL;
+    FREE(s);
+}
+
+/*
+ - newarc - set up a new arc within an NFA
+ ^ static void newarc(struct nfa *, int, pcolor, struct state *,
+ ^	struct state *);
+ */
+static void
+newarc(
+    struct nfa *nfa,
+    int t,
+    pcolor co,
+    struct state *from,
+    struct state *to)
+{
+    struct arc *a;
+
+    assert(from != NULL && to != NULL);
+
+    /*
+     * Check for duplicates.
+     */
+
+    for (a=from->outs ; a!=NULL ; a=a->outchain) {
+	if (a->to == to && a->co == co && a->type == t) {
+	    return;
+	}
+    }
+
+    a = allocarc(nfa, from);
+    if (NISERR()) {
+	return;
+    }
+    assert(a != NULL);
+
+    a->type = t;
+    a->co = (color) co;
+    a->to = to;
+    a->from = from;
+
+    /*
+     * Put the new arc on the beginning, not the end, of the chains. Not only
+     * is this easier, it has the very useful side effect that deleting the
+     * most-recently-added arc is the cheapest case rather than the most
+     * expensive one.
+     */
+
+    a->inchain = to->ins;
+    to->ins = a;
+    a->outchain = from->outs;
+    from->outs = a;
+
+    from->nouts++;
+    to->nins++;
+
+    if (COLORED(a) && nfa->parent == NULL) {
+	colorchain(nfa->cm, a);
+    }
+}
+
+/*
+ - allocarc - allocate a new out-arc within a state
+ ^ static struct arc *allocarc(struct nfa *, struct state *);
+ */
+static struct arc *		/* NULL for failure */
+allocarc(
+    struct nfa *nfa,
+    struct state *s)
+{
+    struct arc *a;
+
+    /*
+     * Shortcut
+     */
+
+    if (s->free == NULL && s->noas < ABSIZE) {
+	a = &s->oas.a[s->noas];
+	s->noas++;
+	return a;
+    }
+
+    /*
+     * if none at hand, get more
+     */
+
+    if (s->free == NULL) {
+	struct arcbatch *newAb = (struct arcbatch *)
+		MALLOC(sizeof(struct arcbatch));
+	int i;
+
+	if (newAb == NULL) {
+	    NERR(REG_ESPACE);
+	    return NULL;
+	}
+	newAb->next = s->oas.next;
+	s->oas.next = newAb;
+
+	for (i=0 ; i<ABSIZE ; i++) {
+	    newAb->a[i].type = 0;
+	    newAb->a[i].freechain = &newAb->a[i+1];
+	}
+	newAb->a[ABSIZE-1].freechain = NULL;
+	s->free = &newAb->a[0];
+    }
+    assert(s->free != NULL);
+
+    a = s->free;
+    s->free = a->freechain;
+    return a;
+}
+
+/*
+ - freearc - free an arc
+ ^ static void freearc(struct nfa *, struct arc *);
+ */
+static void
+freearc(
+    struct nfa *nfa,
+    struct arc *victim)
+{
+    struct state *from = victim->from;
+    struct state *to = victim->to;
+    struct arc *a;
+
+    assert(victim->type != 0);
+
+    /*
+     * Take it off color chain if necessary.
+     */
+
+    if (COLORED(victim) && nfa->parent == NULL) {
+	uncolorchain(nfa->cm, victim);
+    }
+
+    /*
+     * Take it off source's out-chain.
+     */
+
+    assert(from != NULL);
+    assert(from->outs != NULL);
+    a = from->outs;
+    if (a == victim) {		/* simple case: first in chain */
+	from->outs = victim->outchain;
+    } else {
+	for (; a!=NULL && a->outchain!=victim ; a=a->outchain) {
+	    continue;
+	}
+	assert(a != NULL);
+	a->outchain = victim->outchain;
+    }
+    from->nouts--;
+
+    /*
+     * Take it off target's in-chain.
+     */
+
+    assert(to != NULL);
+    assert(to->ins != NULL);
+    a = to->ins;
+    if (a == victim) {		/* simple case: first in chain */
+	to->ins = victim->inchain;
+    } else {
+	for (; a->inchain!=victim ; a=a->inchain) {
+	    assert(a->inchain != NULL);
+	    continue;
+	}
+	a->inchain = victim->inchain;
+    }
+    to->nins--;
+
+    /*
+     * Clean up and place on free list.
+     */
+
+    victim->type = 0;
+    victim->from = NULL;	/* precautions... */
+    victim->to = NULL;
+    victim->inchain = NULL;
+    victim->outchain = NULL;
+    victim->freechain = from->free;
+    from->free = victim;
+}
+
+/*
+ - findarc - find arc, if any, from given source with given type and color
+ * If there is more than one such arc, the result is random.
+ ^ static struct arc *findarc(struct state *, int, pcolor);
+ */
+static struct arc *
+findarc(
+    struct state *s,
+    int type,
+    pcolor co)
+{
+    struct arc *a;
+
+    for (a=s->outs ; a!=NULL ; a=a->outchain) {
+	if (a->type == type && a->co == co) {
+	    return a;
+	}
+    }
+    return NULL;
+}
+
+/*
+ - cparc - allocate a new arc within an NFA, copying details from old one
+ ^ static void cparc(struct nfa *, struct arc *, struct state *,
+ ^ 	struct state *);
+ */
+static void
+cparc(
+    struct nfa *nfa,
+    struct arc *oa,
+    struct state *from,
+    struct state *to)
+{
+    newarc(nfa, oa->type, oa->co, from, to);
+}
+
+/*
+ - moveins - move all in arcs of a state to another state
+ * You might think this could be done better by just updating the
+ * existing arcs, and you would be right if it weren't for the desire
+ * for duplicate suppression, which makes it easier to just make new
+ * ones to exploit the suppression built into newarc.
+ ^ static void moveins(struct nfa *, struct state *, struct state *);
+ */
+static void
+moveins(
+    struct nfa *nfa,
+    struct state *oldState,
+    struct state *newState)
+{
+    struct arc *a;
+
+    assert(oldState != newState);
+
+    while ((a = oldState->ins) != NULL) {
+	cparc(nfa, a, a->from, newState);
+	freearc(nfa, a);
+    }
+    assert(oldState->nins == 0);
+    assert(oldState->ins == NULL);
+}
+
+/*
+ - copyins - copy all in arcs of a state to another state
+ ^ static void copyins(struct nfa *, struct state *, struct state *);
+ */
+static void
+copyins(
+    struct nfa *nfa,
+    struct state *oldState,
+    struct state *newState)
+{
+    struct arc *a;
+
+    assert(oldState != newState);
+
+    for (a=oldState->ins ; a!=NULL ; a=a->inchain) {
+	cparc(nfa, a, a->from, newState);
+    }
+}
+
+/*
+ - moveouts - move all out arcs of a state to another state
+ ^ static void moveouts(struct nfa *, struct state *, struct state *);
+ */
+static void
+moveouts(
+    struct nfa *nfa,
+    struct state *oldState,
+    struct state *newState)
+{
+    struct arc *a;
+
+    assert(oldState != newState);
+
+    while ((a = oldState->outs) != NULL) {
+	cparc(nfa, a, newState, a->to);
+	freearc(nfa, a);
+    }
+}
+
+/*
+ - copyouts - copy all out arcs of a state to another state
+ ^ static void copyouts(struct nfa *, struct state *, struct state *);
+ */
+static void
+copyouts(
+    struct nfa *nfa,
+    struct state *oldState,
+    struct state *newState)
+{
+    struct arc *a;
+
+    assert(oldState != newState);
+
+    for (a=oldState->outs ; a!=NULL ; a=a->outchain) {
+	cparc(nfa, a, newState, a->to);
+    }
+}
+
+/*
+ - cloneouts - copy out arcs of a state to another state pair, modifying type
+ ^ static void cloneouts(struct nfa *, struct state *, struct state *,
+ ^ 	struct state *, int);
+ */
+static void
+cloneouts(
+    struct nfa *nfa,
+    struct state *old,
+    struct state *from,
+    struct state *to,
+    int type)
+{
+    struct arc *a;
+
+    assert(old != from);
+
+    for (a=old->outs ; a!=NULL ; a=a->outchain) {
+	newarc(nfa, type, a->co, from, to);
+    }
+}
+
+/*
+ - delsub - delete a sub-NFA, updating subre pointers if necessary
+ * This uses a recursive traversal of the sub-NFA, marking already-seen
+ * states using their tmp pointer.
+ ^ static void delsub(struct nfa *, struct state *, struct state *);
+ */
+static void
+delsub(
+    struct nfa *nfa,
+    struct state *lp,		/* the sub-NFA goes from here... */
+    struct state *rp)		/* ...to here, *not* inclusive */
+{
+    assert(lp != rp);
+
+    rp->tmp = rp;		/* mark end */
+
+    deltraverse(nfa, lp, lp);
+    assert(lp->nouts == 0 && rp->nins == 0);	/* did the job */
+    assert(lp->no != FREESTATE && rp->no != FREESTATE);	/* no more */
+
+    rp->tmp = NULL;		/* unmark end */
+    lp->tmp = NULL;		/* and begin, marked by deltraverse */
+}
+
+/*
+ - deltraverse - the recursive heart of delsub
+ * This routine's basic job is to destroy all out-arcs of the state.
+ ^ static void deltraverse(struct nfa *, struct state *, struct state *);
+ */
+static void
+deltraverse(
+    struct nfa *nfa,
+    struct state *leftend,
+    struct state *s)
+{
+    struct arc *a;
+    struct state *to;
+
+    if (s->nouts == 0) {
+	return;			/* nothing to do */
+    }
+    if (s->tmp != NULL) {
+	return;			/* already in progress */
+    }
+
+    s->tmp = s;			/* mark as in progress */
+
+    while ((a = s->outs) != NULL) {
+	to = a->to;
+	deltraverse(nfa, leftend, to);
+	assert(to->nouts == 0 || to->tmp != NULL);
+	freearc(nfa, a);
+	if (to->nins == 0 && to->tmp == NULL) {
+	    assert(to->nouts == 0);
+	    freestate(nfa, to);
+	}
+    }
+
+    assert(s->no != FREESTATE);	/* we're still here */
+    assert(s == leftend || s->nins != 0);	/* and still reachable */
+    assert(s->nouts == 0);	/* but have no outarcs */
+
+    s->tmp = NULL;		/* we're done here */
+}
+
+/*
+ - dupnfa - duplicate sub-NFA
+ * Another recursive traversal, this time using tmp to point to duplicates as
+ * well as mark already-seen states. (You knew there was a reason why it's a
+ * state pointer, didn't you? :-))
+ ^ static void dupnfa(struct nfa *, struct state *, struct state *,
+ ^ 	struct state *, struct state *);
+ */
+static void
+dupnfa(
+    struct nfa *nfa,
+    struct state *start,	/* duplicate of subNFA starting here */
+    struct state *stop,		/* and stopping here */
+    struct state *from,		/* stringing duplicate from here */
+    struct state *to)		/* to here */
+{
+    if (start == stop) {
+	newarc(nfa, EMPTY, 0, from, to);
+	return;
+    }
+
+    stop->tmp = to;
+    duptraverse(nfa, start, from, 0);
+    /* done, except for clearing out the tmp pointers */
+
+    stop->tmp = NULL;
+    cleartraverse(nfa, start);
+}
+
+/*
+ - duptraverse - recursive heart of dupnfa
+ ^ static void duptraverse(struct nfa *, struct state *, struct state *);
+ */
+static void
+duptraverse(
+    struct nfa *nfa,
+    struct state *s,
+    struct state *stmp,		/* s's duplicate, or NULL */
+    int depth)
+{
+    struct arc *a;
+
+    if (s->tmp != NULL) {
+	return;			/* already done */
+    }
+
+    s->tmp = (stmp == NULL) ? newstate(nfa) : stmp;
+    if (s->tmp == NULL) {
+	assert(NISERR());
+	return;
+    }
+
+    /*
+     * Arbitrary depth limit. Needs tuning, but this value is sufficient to
+     * make all normal tests (not reg-33.14) pass.
+     */
+    /* Updated from 500 to 1204 to support REs with 99 group patterns.
+     * Why to limit the tree depth ?
+     * If long REs are not needed then just don't write long REs.
+     */
+#define DUPTRAVERSE_MAX_DEPTH 1204
+
+    if (depth++ > DUPTRAVERSE_MAX_DEPTH) {
+	NERR(REG_ESPACE);
+    }
+
+    for (a=s->outs ; a!=NULL && !NISERR() ; a=a->outchain) {
+	duptraverse(nfa, a->to, NULL, depth);
+	if (NISERR()) {
+	    break;
+	}
+	assert(a->to->tmp != NULL);
+	cparc(nfa, a, s->tmp, a->to->tmp);
+    }
+}
+
+/*
+ - cleartraverse - recursive cleanup for algorithms that leave tmp ptrs set
+ ^ static void cleartraverse(struct nfa *, struct state *);
+ */
+static void
+cleartraverse(
+    struct nfa *nfa,
+    struct state *s)
+{
+    struct arc *a;
+
+    if (s->tmp == NULL) {
+	return;
+    }
+    s->tmp = NULL;
+
+    for (a=s->outs ; a!=NULL ; a=a->outchain) {
+	cleartraverse(nfa, a->to);
+    }
+}
+
+/*
+ - specialcolors - fill in special colors for an NFA
+ ^ static void specialcolors(struct nfa *);
+ */
+static void
+specialcolors(
+    struct nfa *nfa)
+{
+    /*
+     * False colors for BOS, BOL, EOS, EOL
+     */
+
+    if (nfa->parent == NULL) {
+	nfa->bos[0] = pseudocolor(nfa->cm);
+	nfa->bos[1] = pseudocolor(nfa->cm);
+	nfa->eos[0] = pseudocolor(nfa->cm);
+	nfa->eos[1] = pseudocolor(nfa->cm);
+    } else {
+	assert(nfa->parent->bos[0] != COLORLESS);
+	nfa->bos[0] = nfa->parent->bos[0];
+	assert(nfa->parent->bos[1] != COLORLESS);
+	nfa->bos[1] = nfa->parent->bos[1];
+	assert(nfa->parent->eos[0] != COLORLESS);
+	nfa->eos[0] = nfa->parent->eos[0];
+	assert(nfa->parent->eos[1] != COLORLESS);
+	nfa->eos[1] = nfa->parent->eos[1];
+    }
+}
+
+/*
+ - optimize - optimize an NFA
+ ^ static long optimize(struct nfa *, FILE *);
+ */
+static long			/* re_info bits */
+optimize(
+    struct nfa *nfa,
+    FILE *f)			/* for debug output; NULL none */
+{
+    int verbose = (f != NULL) ? 1 : 0;
+
+    if (verbose) {
+	fprintf(f, "\ninitial cleanup:\n");
+    }
+    cleanup(nfa);		/* may simplify situation */
+    if (verbose) {
+	dumpnfa(nfa, f);
+    }
+    if (verbose) {
+	fprintf(f, "\nempties:\n");
+    }
+    fixempties(nfa, f);		/* get rid of EMPTY arcs */
+    if (verbose) {
+	fprintf(f, "\nconstraints:\n");
+    }
+    pullback(nfa, f);		/* pull back constraints backward */
+    pushfwd(nfa, f);		/* push fwd constraints forward */
+    if (verbose) {
+	fprintf(f, "\nfinal cleanup:\n");
+    }
+    cleanup(nfa);		/* final tidying */
+    return analyze(nfa);	/* and analysis */
+}
+
+/*
+ - pullback - pull back constraints backward to (with luck) eliminate them
+ ^ static void pullback(struct nfa *, FILE *);
+ */
+static void
+pullback(
+    struct nfa *nfa,
+    FILE *f)			/* for debug output; NULL none */
+{
+    struct state *s;
+    struct state *nexts;
+    struct arc *a;
+    struct arc *nexta;
+    int progress;
+
+    /*
+     * Find and pull until there are no more.
+     */
+
+    do {
+	progress = 0;
+	for (s=nfa->states ; s!=NULL && !NISERR() ; s=nexts) {
+	    nexts = s->next;
+	    for (a=s->outs ; a!=NULL && !NISERR() ; a=nexta) {
+		nexta = a->outchain;
+		if (a->type == '^' || a->type == BEHIND) {
+		    if (pull(nfa, a)) {
+			progress = 1;
+		    }
+		}
+		assert(nexta == NULL || s->no != FREESTATE);
+	    }
+	}
+	if (progress && f != NULL) {
+	    dumpnfa(nfa, f);
+	}
+    } while (progress && !NISERR());
+    if (NISERR()) {
+	return;
+    }
+
+    for (a=nfa->pre->outs ; a!=NULL ; a=nexta) {
+	nexta = a->outchain;
+	if (a->type == '^') {
+	    assert(a->co == 0 || a->co == 1);
+	    newarc(nfa, PLAIN, nfa->bos[a->co], a->from, a->to);
+	    freearc(nfa, a);
+	}
+    }
+}
+
+/*
+ - pull - pull a back constraint backward past its source state
+ * A significant property of this function is that it deletes at most
+ * one state -- the constraint's from state -- and only if the constraint
+ * was that state's last outarc.
+ ^ static int pull(struct nfa *, struct arc *);
+ */
+static int			/* 0 couldn't, 1 could */
+pull(
+    struct nfa *nfa,
+    struct arc *con)
+{
+    struct state *from = con->from;
+    struct state *to = con->to;
+    struct arc *a;
+    struct arc *nexta;
+    struct state *s;
+
+    if (from == to) {		/* circular constraint is pointless */
+	freearc(nfa, con);
+	return 1;
+    }
+    if (from->flag) {		/* can't pull back beyond start */
+	return 0;
+    }
+    if (from->nins == 0) {	/* unreachable */
+	freearc(nfa, con);
+	return 1;
+    }
+
+    /*
+     * DGP 2007-11-15: Cloning a state with a circular constraint on its list
+     * of outs can lead to trouble [Bug 1810038], so get rid of them first.
+     */
+
+    for (a = from->outs; a != NULL; a = nexta) {
+	nexta = a->outchain;
+	switch (a->type) {
+	case '^':
+	case '$':
+	case BEHIND:
+	case AHEAD:
+	    if (from == a->to) {
+		freearc(nfa, a);
+	    }
+	    break;
+	}
+    }
+
+    /*
+     * First, clone from state if necessary to avoid other outarcs.
+     */
+
+    if (from->nouts > 1) {
+	s = newstate(nfa);
+	if (NISERR()) {
+	    return 0;
+	}
+	assert(to != from);	/* con is not an inarc */
+	copyins(nfa, from, s);	/* duplicate inarcs */
+	cparc(nfa, con, s, to);	/* move constraint arc */
+	freearc(nfa, con);
+	from = s;
+	con = from->outs;
+    }
+    assert(from->nouts == 1);
+
+    /*
+     * Propagate the constraint into the from state's inarcs.
+     */
+
+    for (a=from->ins ; a!=NULL ; a=nexta) {
+	nexta = a->inchain;
+	switch (combine(con, a)) {
+	case INCOMPATIBLE:	/* destroy the arc */
+	    freearc(nfa, a);
+	    break;
+	case SATISFIED:		/* no action needed */
+	    break;
+	case COMPATIBLE:	/* swap the two arcs, more or less */
+	    s = newstate(nfa);
+	    if (NISERR()) {
+		return 0;
+	    }
+	    cparc(nfa, a, s, to);	/* anticipate move */
+	    cparc(nfa, con, a->from, s);
+	    if (NISERR()) {
+		return 0;
+	    }
+	    freearc(nfa, a);
+	    break;
+	default:
+	    assert(NOTREACHED);
+	    break;
+	}
+    }
+
+    /*
+     * Remaining inarcs, if any, incorporate the constraint.
+     */
+
+    moveins(nfa, from, to);
+    dropstate(nfa, from);	/* will free the constraint */
+    return 1;
+}
+
+/*
+ - pushfwd - push forward constraints forward to (with luck) eliminate them
+ ^ static void pushfwd(struct nfa *, FILE *);
+ */
+static void
+pushfwd(
+    struct nfa *nfa,
+    FILE *f)			/* for debug output; NULL none */
+{
+    struct state *s;
+    struct state *nexts;
+    struct arc *a;
+    struct arc *nexta;
+    int progress;
+
+    /*
+     * Find and push until there are no more.
+     */
+
+    do {
+	progress = 0;
+	for (s=nfa->states ; s!=NULL && !NISERR() ; s=nexts) {
+	    nexts = s->next;
+	    for (a = s->ins; a != NULL && !NISERR(); a = nexta) {
+		nexta = a->inchain;
+		if (a->type == '$' || a->type == AHEAD) {
+		    if (push(nfa, a)) {
+			progress = 1;
+		    }
+		}
+		assert(nexta == NULL || s->no != FREESTATE);
+	    }
+	}
+	if (progress && f != NULL) {
+	    dumpnfa(nfa, f);
+	}
+    } while (progress && !NISERR());
+    if (NISERR()) {
+	return;
+    }
+
+    for (a = nfa->post->ins; a != NULL; a = nexta) {
+	nexta = a->inchain;
+	if (a->type == '$') {
+	    assert(a->co == 0 || a->co == 1);
+	    newarc(nfa, PLAIN, nfa->eos[a->co], a->from, a->to);
+	    freearc(nfa, a);
+	}
+    }
+}
+
+/*
+ - push - push a forward constraint forward past its destination state
+ * A significant property of this function is that it deletes at most
+ * one state -- the constraint's to state -- and only if the constraint
+ * was that state's last inarc.
+ ^ static int push(struct nfa *, struct arc *);
+ */
+static int			/* 0 couldn't, 1 could */
+push(
+    struct nfa *nfa,
+    struct arc *con)
+{
+    struct state *from = con->from;
+    struct state *to = con->to;
+    struct arc *a;
+    struct arc *nexta;
+    struct state *s;
+
+    if (to == from) {		/* circular constraint is pointless */
+	freearc(nfa, con);
+	return 1;
+    }
+    if (to->flag) {		/* can't push forward beyond end */
+	return 0;
+    }
+    if (to->nouts == 0) {	/* dead end */
+	freearc(nfa, con);
+	return 1;
+    }
+
+    /*
+     * DGP 2007-11-15: Here we duplicate the same protections as appear
+     * in pull() above to avoid troubles with cloning a state with a
+     * circular constraint on its list of ins.  It is not clear whether
+     * this is necessary, or is protecting against a "can't happen".
+     * Any test case that actually leads to a freearc() call here would
+     * be a welcome addition to the test suite.
+     */
+
+    for (a = to->ins; a != NULL; a = nexta) {
+	nexta = a->inchain;
+	switch (a->type) {
+	case '^':
+	case '$':
+	case BEHIND:
+	case AHEAD:
+	    if (a->from == to) {
+		freearc(nfa, a);
+	    }
+	    break;
+	}
+    }
+    /*
+     * First, clone to state if necessary to avoid other inarcs.
+     */
+
+    if (to->nins > 1) {
+	s = newstate(nfa);
+	if (NISERR()) {
+	    return 0;
+	}
+	copyouts(nfa, to, s);	/* duplicate outarcs */
+	cparc(nfa, con, from, s);	/* move constraint */
+	freearc(nfa, con);
+	to = s;
+	con = to->ins;
+    }
+    assert(to->nins == 1);
+
+    /*
+     * Propagate the constraint into the to state's outarcs.
+     */
+
+    for (a = to->outs; a != NULL; a = nexta) {
+	nexta = a->outchain;
+	switch (combine(con, a)) {
+	case INCOMPATIBLE:	/* destroy the arc */
+	    freearc(nfa, a);
+	    break;
+	case SATISFIED:		/* no action needed */
+	    break;
+	case COMPATIBLE:	/* swap the two arcs, more or less */
+	    s = newstate(nfa);
+	    if (NISERR()) {
+		return 0;
+	    }
+	    cparc(nfa, con, s, a->to);	/* anticipate move */
+	    cparc(nfa, a, from, s);
+	    if (NISERR()) {
+		return 0;
+	    }
+	    freearc(nfa, a);
+	    break;
+	default:
+	    assert(NOTREACHED);
+	    break;
+	}
+    }
+
+    /*
+     * Remaining outarcs, if any, incorporate the constraint.
+     */
+
+    moveouts(nfa, to, from);
+    dropstate(nfa, to);		/* will free the constraint */
+    return 1;
+}
+
+/*
+ - combine - constraint lands on an arc, what happens?
+ ^ #def	INCOMPATIBLE	1	// destroys arc
+ ^ #def	SATISFIED	2	// constraint satisfied
+ ^ #def	COMPATIBLE	3	// compatible but not satisfied yet
+ ^ static int combine(struct arc *, struct arc *);
+ */
+static int
+combine(
+    struct arc *con,
+    struct arc *a)
+{
+#define CA(ct,at)	(((ct)<<CHAR_BIT) | (at))
+
+    switch (CA(con->type, a->type)) {
+    case CA('^', PLAIN):	/* newlines are handled separately */
+    case CA('$', PLAIN):
+	return INCOMPATIBLE;
+	break;
+    case CA(AHEAD, PLAIN):	/* color constraints meet colors */
+    case CA(BEHIND, PLAIN):
+	if (con->co == a->co) {
+	    return SATISFIED;
+	}
+	return INCOMPATIBLE;
+	break;
+    case CA('^', '^'):		/* collision, similar constraints */
+    case CA('$', '$'):
+    case CA(AHEAD, AHEAD):
+    case CA(BEHIND, BEHIND):
+	if (con->co == a->co) {	/* true duplication */
+	    return SATISFIED;
+	}
+	return INCOMPATIBLE;
+	break;
+    case CA('^', BEHIND):	/* collision, dissimilar constraints */
+    case CA(BEHIND, '^'):
+    case CA('$', AHEAD):
+    case CA(AHEAD, '$'):
+	return INCOMPATIBLE;
+	break;
+    case CA('^', '$'):		/* constraints passing each other */
+    case CA('^', AHEAD):
+    case CA(BEHIND, '$'):
+    case CA(BEHIND, AHEAD):
+    case CA('$', '^'):
+    case CA('$', BEHIND):
+    case CA(AHEAD, '^'):
+    case CA(AHEAD, BEHIND):
+    case CA('^', LACON):
+    case CA(BEHIND, LACON):
+    case CA('$', LACON):
+    case CA(AHEAD, LACON):
+	return COMPATIBLE;
+	break;
+    }
+    assert(NOTREACHED);
+    return INCOMPATIBLE;	/* for benefit of blind compilers */
+}
+
+/*
+ - fixempties - get rid of EMPTY arcs
+ ^ static void fixempties(struct nfa *, FILE *);
+ */
+static void
+fixempties(
+    struct nfa *nfa,
+    FILE *f)			/* for debug output; NULL none */
+{
+    struct state *s;
+    struct state *nexts;
+    struct arc *a;
+    struct arc *nexta;
+    int progress;
+
+    /*
+     * Find and eliminate empties until there are no more.
+     */
+
+    do {
+	progress = 0;
+	for (s = nfa->states; s != NULL && !NISERR()
+		&& s->no != FREESTATE; s = nexts) {
+	    nexts = s->next;
+	    for (a = s->outs; a != NULL && !NISERR(); a = nexta) {
+		nexta = a->outchain;
+		if (a->type == EMPTY && unempty(nfa, a)) {
+		    progress = 1;
+		}
+		assert(nexta == NULL || s->no != FREESTATE);
+	    }
+	}
+	if (progress && f != NULL) {
+	    dumpnfa(nfa, f);
+	}
+    } while (progress && !NISERR());
+}
+
+/*
+ - unempty - optimize out an EMPTY arc, if possible
+ * Actually, as it stands this function always succeeds, but the return value
+ * is kept with an eye on possible future changes.
+ ^ static int unempty(struct nfa *, struct arc *);
+ */
+static int			/* 0 couldn't, 1 could */
+unempty(
+    struct nfa *nfa,
+    struct arc *a)
+{
+    struct state *from = a->from;
+    struct state *to = a->to;
+    int usefrom;		/* work on from, as opposed to to? */
+
+    assert(a->type == EMPTY);
+    assert(from != nfa->pre && to != nfa->post);
+
+    if (from == to) {		/* vacuous loop */
+	freearc(nfa, a);
+	return 1;
+    }
+
+    /*
+     * Decide which end to work on.
+     */
+
+    usefrom = 1;		/* default: attack from */
+    if (from->nouts > to->nins) {
+	usefrom = 0;
+    } else if (from->nouts == to->nins) {
+	/*
+	 * Decide on secondary issue: move/copy fewest arcs.
+	 */
+
+	if (from->nins > to->nouts) {
+	    usefrom = 0;
+	}
+    }
+
+    freearc(nfa, a);
+    if (usefrom) {
+	if (from->nouts == 0) {
+	    /*
+	     * Was the state's only outarc.
+	     */
+
+	    moveins(nfa, from, to);
+	    freestate(nfa, from);
+	} else {
+	    copyins(nfa, from, to);
+	}
+    } else {
+	if (to->nins == 0) {
+	    /*
+	     * Was the state's only inarc.
+	     */
+
+	    moveouts(nfa, to, from);
+	    freestate(nfa, to);
+	} else {
+	    copyouts(nfa, to, from);
+	}
+    }
+
+    return 1;
+}
+
+/*
+ - cleanup - clean up NFA after optimizations
+ ^ static void cleanup(struct nfa *);
+ */
+static void
+cleanup(
+    struct nfa *nfa)
+{
+    struct state *s;
+    struct state *nexts;
+    int n;
+
+    /*
+     * Clear out unreachable or dead-end states. Use pre to mark reachable,
+     * then post to mark can-reach-post.
+     */
+
+    markreachable(nfa, nfa->pre, NULL, nfa->pre);
+    markcanreach(nfa, nfa->post, nfa->pre, nfa->post);
+    for (s = nfa->states; s != NULL; s = nexts) {
+	nexts = s->next;
+	if (s->tmp != nfa->post && !s->flag) {
+	    dropstate(nfa, s);
+	}
+    }
+    assert(nfa->post->nins == 0 || nfa->post->tmp == nfa->post);
+    cleartraverse(nfa, nfa->pre);
+    assert(nfa->post->nins == 0 || nfa->post->tmp == NULL);
+    /* the nins==0 (final unreachable) case will be caught later */
+
+    /*
+     * Renumber surviving states.
+     */
+
+    n = 0;
+    for (s = nfa->states; s != NULL; s = s->next) {
+	s->no = n++;
+    }
+    nfa->nstates = n;
+}
+
+/*
+ - markreachable - recursive marking of reachable states
+ ^ static void markreachable(struct nfa *, struct state *, struct state *,
+ ^ 	struct state *);
+ */
+static void
+markreachable(
+    struct nfa *nfa,
+    struct state *s,
+    struct state *okay,		/* consider only states with this mark */
+    struct state *mark)		/* the value to mark with */
+{
+    struct arc *a;
+
+    if (s->tmp != okay) {
+	return;
+    }
+    s->tmp = mark;
+
+    for (a = s->outs; a != NULL; a = a->outchain) {
+	markreachable(nfa, a->to, okay, mark);
+    }
+}
+
+/*
+ - markcanreach - recursive marking of states which can reach here
+ ^ static void markcanreach(struct nfa *, struct state *, struct state *,
+ ^ 	struct state *);
+ */
+static void
+markcanreach(
+    struct nfa *nfa,
+    struct state *s,
+    struct state *okay,		/* consider only states with this mark */
+    struct state *mark)		/* the value to mark with */
+{
+    struct arc *a;
+
+    if (s->tmp != okay) {
+	return;
+    }
+    s->tmp = mark;
+
+    for (a = s->ins; a != NULL; a = a->inchain) {
+	markcanreach(nfa, a->from, okay, mark);
+    }
+}
+
+/*
+ - analyze - ascertain potentially-useful facts about an optimized NFA
+ ^ static long analyze(struct nfa *);
+ */
+static long			/* re_info bits to be ORed in */
+analyze(
+    struct nfa *nfa)
+{
+    struct arc *a;
+    struct arc *aa;
+
+    if (nfa->pre->outs == NULL) {
+	return REG_UIMPOSSIBLE;
+    }
+    for (a = nfa->pre->outs; a != NULL; a = a->outchain) {
+	for (aa = a->to->outs; aa != NULL; aa = aa->outchain) {
+	    if (aa->to == nfa->post) {
+		return REG_UEMPTYMATCH;
+	    }
+	}
+    }
+    return 0;
+}
+
+/*
+ - compact - compact an NFA
+ ^ static void compact(struct nfa *, struct cnfa *);
+ */
+static void
+compact(
+    struct nfa *nfa,
+    struct cnfa *cnfa)
+{
+    struct state *s;
+    struct arc *a;
+    size_t nstates;
+    size_t narcs;
+    struct carc *ca;
+    struct carc *first;
+
+    assert(!NISERR());
+
+    nstates = 0;
+    narcs = 0;
+    for (s = nfa->states; s != NULL; s = s->next) {
+	nstates++;
+	narcs += 1 + s->nouts + 1;
+	/* 1 as a fake for flags, nouts for arcs, 1 as endmarker */
+    }
+
+    cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *));
+    cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc));
+    if (cnfa->states == NULL || cnfa->arcs == NULL) {
+	if (cnfa->states != NULL) {
+	    FREE(cnfa->states);
+	}
+	if (cnfa->arcs != NULL) {
+	    FREE(cnfa->arcs);
+	}
+	NERR(REG_ESPACE);
+	return;
+    }
+    cnfa->nstates = nstates;
+    cnfa->pre = nfa->pre->no;
+    cnfa->post = nfa->post->no;
+    cnfa->bos[0] = nfa->bos[0];
+    cnfa->bos[1] = nfa->bos[1];
+    cnfa->eos[0] = nfa->eos[0];
+    cnfa->eos[1] = nfa->eos[1];
+    cnfa->ncolors = maxcolor(nfa->cm) + 1;
+    cnfa->flags = 0;
+
+    ca = cnfa->arcs;
+    for (s = nfa->states; s != NULL; s = s->next) {
+	assert((size_t) s->no < nstates);
+	cnfa->states[s->no] = ca;
+	ca->co = 0;		/* clear and skip flags "arc" */
+	ca++;
+	first = ca;
+	for (a = s->outs; a != NULL; a = a->outchain) {
+	    switch (a->type) {
+	    case PLAIN:
+		ca->co = a->co;
+		ca->to = a->to->no;
+		ca++;
+		break;
+	    case LACON:
+		assert(s->no != cnfa->pre);
+		ca->co = (color) (cnfa->ncolors + a->co);
+		ca->to = a->to->no;
+		ca++;
+		cnfa->flags |= HASLACONS;
+		break;
+	    default:
+		assert(NOTREACHED);
+		break;
+	    }
+	}
+	carcsort(first, ca-1);
+	ca->co = COLORLESS;
+	ca->to = 0;
+	ca++;
+    }
+    assert(ca == &cnfa->arcs[narcs]);
+    assert(cnfa->nstates != 0);
+
+    /*
+     * Mark no-progress states.
+     */
+
+    for (a = nfa->pre->outs; a != NULL; a = a->outchain) {
+	cnfa->states[a->to->no]->co = 1;
+    }
+    cnfa->states[nfa->pre->no]->co = 1;
+}
+
+/*
+ - carcsort - sort compacted-NFA arcs by color
+ * Really dumb algorithm, but if the list is long enough for that to matter,
+ * you're in real trouble anyway.
+ ^ static void carcsort(struct carc *, struct carc *);
+ */
+static void
+carcsort(
+    struct carc *first,
+    struct carc *last)
+{
+    struct carc *p;
+    struct carc *q;
+    struct carc tmp;
+
+    if (last - first <= 1) {
+	return;
+    }
+
+    for (p = first; p <= last; p++) {
+	for (q = p; q <= last; q++) {
+	    if (p->co > q->co || (p->co == q->co && p->to > q->to)) {
+		assert(p != q);
+		tmp = *p;
+		*p = *q;
+		*q = tmp;
+	    }
+	}
+    }
+}
+
+/*
+ - freecnfa - free a compacted NFA
+ ^ static void freecnfa(struct cnfa *);
+ */
+static void
+freecnfa(
+    struct cnfa *cnfa)
+{
+    assert(cnfa->nstates != 0);	/* not empty already */
+    cnfa->nstates = 0;
+    FREE(cnfa->states);
+    FREE(cnfa->arcs);
+}
+
+/*
+ - dumpnfa - dump an NFA in human-readable form
+ ^ static void dumpnfa(struct nfa *, FILE *);
+ */
+static void
+dumpnfa(
+    struct nfa *nfa,
+    FILE *f)
+{
+#ifdef REG_DEBUG
+    struct state *s;
+
+    fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no);
+    if (nfa->bos[0] != COLORLESS) {
+	fprintf(f, ", bos [%ld]", (long) nfa->bos[0]);
+    }
+    if (nfa->bos[1] != COLORLESS) {
+	fprintf(f, ", bol [%ld]", (long) nfa->bos[1]);
+    }
+    if (nfa->eos[0] != COLORLESS) {
+	fprintf(f, ", eos [%ld]", (long) nfa->eos[0]);
+    }
+    if (nfa->eos[1] != COLORLESS) {
+	fprintf(f, ", eol [%ld]", (long) nfa->eos[1]);
+    }
+    fprintf(f, "\n");
+    for (s = nfa->states; s != NULL; s = s->next) {
+	dumpstate(s, f);
+    }
+    if (nfa->parent == NULL) {
+	dumpcolors(nfa->cm, f);
+    }
+    fflush(f);
+#endif
+}
+
+#ifdef REG_DEBUG		/* subordinates of dumpnfa */
+/*
+ ^ #ifdef REG_DEBUG
+ */
+
+/*
+ - dumpstate - dump an NFA state in human-readable form
+ ^ static void dumpstate(struct state *, FILE *);
+ */
+static void
+dumpstate(
+    struct state *s,
+    FILE *f)
+{
+    struct arc *a;
+
+    fprintf(f, "%d%s%c", s->no, (s->tmp != NULL) ? "T" : "",
+	    (s->flag) ? s->flag : '.');
+    if (s->prev != NULL && s->prev->next != s) {
+	fprintf(f, "\tstate chain bad\n");
+    }
+    if (s->nouts == 0) {
+	fprintf(f, "\tno out arcs\n");
+    } else {
+	dumparcs(s, f);
+    }
+    fflush(f);
+    for (a = s->ins; a != NULL; a = a->inchain) {
+	if (a->to != s) {
+	    fprintf(f, "\tlink from %d to %d on %d's in-chain\n",
+		    a->from->no, a->to->no, s->no);
+	}
+    }
+}
+
+/*
+ - dumparcs - dump out-arcs in human-readable form
+ ^ static void dumparcs(struct state *, FILE *);
+ */
+static void
+dumparcs(
+    struct state *s,
+    FILE *f)
+{
+    int pos;
+
+    assert(s->nouts > 0);
+    /* printing arcs in reverse order is usually clearer */
+    pos = dumprarcs(s->outs, s, f, 1);
+    if (pos != 1) {
+	fprintf(f, "\n");
+    }
+}
+
+/*
+ - dumprarcs - dump remaining outarcs, recursively, in reverse order
+ ^ static int dumprarcs(struct arc *, struct state *, FILE *, int);
+ */
+static int			/* resulting print position */
+dumprarcs(
+    struct arc *a,
+    struct state *s,
+    FILE *f,
+    int pos)			/* initial print position */
+{
+    if (a->outchain != NULL) {
+	pos = dumprarcs(a->outchain, s, f, pos);
+    }
+    dumparc(a, s, f);
+    if (pos == 5) {
+	fprintf(f, "\n");
+	pos = 1;
+    } else {
+	pos++;
+    }
+    return pos;
+}
+
+/*
+ - dumparc - dump one outarc in readable form, including prefixing tab
+ ^ static void dumparc(struct arc *, struct state *, FILE *);
+ */
+static void
+dumparc(
+    struct arc *a,
+    struct state *s,
+    FILE *f)
+{
+    struct arc *aa;
+    struct arcbatch *ab;
+
+    fprintf(f, "\t");
+    switch (a->type) {
+    case PLAIN:
+	fprintf(f, "[%ld]", (long) a->co);
+	break;
+    case AHEAD:
+	fprintf(f, ">%ld>", (long) a->co);
+	break;
+    case BEHIND:
+	fprintf(f, "<%ld<", (long) a->co);
+	break;
+    case LACON:
+	fprintf(f, ":%ld:", (long) a->co);
+	break;
+    case '^':
+    case '$':
+	fprintf(f, "%c%d", a->type, (int) a->co);
+	break;
+    case EMPTY:
+	break;
+    default:
+	fprintf(f, "0x%x/0%lo", a->type, (long) a->co);
+	break;
+    }
+    if (a->from != s) {
+	fprintf(f, "?%d?", a->from->no);
+    }
+    for (ab = &a->from->oas; ab != NULL; ab = ab->next) {
+	for (aa = &ab->a[0]; aa < &ab->a[ABSIZE]; aa++) {
+	    if (aa == a) {
+		break;		/* NOTE BREAK OUT */
+	    }
+	}
+	if (aa < &ab->a[ABSIZE]) {	/* propagate break */
+	    break;		/* NOTE BREAK OUT */
+	}
+    }
+    if (ab == NULL) {
+	fprintf(f, "?!?");	/* not in allocated space */
+    }
+    fprintf(f, "->");
+    if (a->to == NULL) {
+	fprintf(f, "NULL");
+	return;
+    }
+    fprintf(f, "%d", a->to->no);
+    for (aa = a->to->ins; aa != NULL; aa = aa->inchain) {
+	if (aa == a) {
+	    break;		/* NOTE BREAK OUT */
+	}
+    }
+    if (aa == NULL) {
+	fprintf(f, "?!?");	/* missing from in-chain */
+    }
+}
+
+/*
+ ^ #endif
+ */
+#endif				/* ifdef REG_DEBUG */
+
+/*
+ - dumpcnfa - dump a compacted NFA in human-readable form
+ ^ static void dumpcnfa(struct cnfa *, FILE *);
+ */
+static void
+dumpcnfa(
+    struct cnfa *cnfa,
+    FILE *f)
+{
+#ifdef REG_DEBUG
+    int st;
+
+    fprintf(f, "pre %d, post %d", cnfa->pre, cnfa->post);
+    if (cnfa->bos[0] != COLORLESS) {
+	fprintf(f, ", bos [%ld]", (long) cnfa->bos[0]);
+    }
+    if (cnfa->bos[1] != COLORLESS) {
+	fprintf(f, ", bol [%ld]", (long) cnfa->bos[1]);
+    }
+    if (cnfa->eos[0] != COLORLESS) {
+	fprintf(f, ", eos [%ld]", (long) cnfa->eos[0]);
+    }
+    if (cnfa->eos[1] != COLORLESS) {
+	fprintf(f, ", eol [%ld]", (long) cnfa->eos[1]);
+    }
+    if (cnfa->flags&HASLACONS) {
+	fprintf(f, ", haslacons");
+    }
+    fprintf(f, "\n");
+    for (st = 0; st < cnfa->nstates; st++) {
+	dumpcstate(st, cnfa->states[st], cnfa, f);
+    }
+    fflush(f);
+#endif
+}
+
+#ifdef REG_DEBUG		/* subordinates of dumpcnfa */
+/*
+ ^ #ifdef REG_DEBUG
+ */
+
+/*
+ - dumpcstate - dump a compacted-NFA state in human-readable form
+ ^ static void dumpcstate(int, struct carc *, struct cnfa *, FILE *);
+ */
+static void
+dumpcstate(
+    int st,
+    struct carc *ca,
+    struct cnfa *cnfa,
+    FILE *f)
+{
+    int i;
+    int pos;
+
+    fprintf(f, "%d%s", st, (ca[0].co) ? ":" : ".");
+    pos = 1;
+    for (i = 1; ca[i].co != COLORLESS; i++) {
+	if (ca[i].co < cnfa->ncolors) {
+	    fprintf(f, "\t[%ld]->%d", (long) ca[i].co, ca[i].to);
+	} else {
+	    fprintf(f, "\t:%ld:->%d", (long) ca[i].co-cnfa->ncolors,ca[i].to);
+	}
+	if (pos == 5) {
+	    fprintf(f, "\n");
+	    pos = 1;
+	} else {
+	    pos++;
+	}
+    }
+    if (i == 1 || pos != 1) {
+	fprintf(f, "\n");
+    }
+    fflush(f);
+}
+
+/*
+ ^ #endif
+ */
+#endif				/* ifdef REG_DEBUG */
+
+/*
+ * Local Variables:
+ * mode: c
+ * c-basic-offset: 4
+ * fill-column: 78
+ * End:
+ */
diff --git a/contrib/hsrex/regcomp.c b/contrib/hsrex/regcomp.c
new file mode 100644
index 0000000..8ff77ad
--- /dev/null
+++ b/contrib/hsrex/regcomp.c
@@ -0,0 +1,2169 @@
+/*
+ * re_*comp and friends - compile REs
+ * This file #includes several others (see the bottom).
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation of
+ * software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "regguts.h"
+
+/*
+ * forward declarations, up here so forward datatypes etc. are defined early
+ */
+/* =====^!^===== begin forwards =====^!^===== */
+/* automatically gathered by fwd; do not hand-edit */
+/* === regcomp.c === */
+int compile(regex_t *, const chr *, size_t, int);
+static void moresubs(struct vars *, int);
+static int freev(struct vars *, int);
+static void makesearch(struct vars *, struct nfa *);
+static struct subre *parse(struct vars *, int, int, struct state *, struct state *);
+static struct subre *parsebranch(struct vars *, int, int, struct state *, struct state *, int);
+static void parseqatom(struct vars *, int, int, struct state *, struct state *, struct subre *);
+static void nonword(struct vars *, int, struct state *, struct state *);
+static void word(struct vars *, int, struct state *, struct state *);
+static int scannum(struct vars *);
+static void repeat(struct vars *, struct state *, struct state *, int, int);
+static void bracket(struct vars *, struct state *, struct state *);
+static void cbracket(struct vars *, struct state *, struct state *);
+static void brackpart(struct vars *, struct state *, struct state *);
+static const chr *scanplain(struct vars *);
+static void onechr(struct vars *, pchr, struct state *, struct state *);
+static void dovec(struct vars *, struct cvec *, struct state *, struct state *);
+static void wordchrs(struct vars *);
+static struct subre *subre(struct vars *, int, int, struct state *, struct state *);
+static void freesubre(struct vars *, struct subre *);
+static void freesrnode(struct vars *, struct subre *);
+static void optst(struct vars *, struct subre *);
+static int numst(struct subre *, int);
+static void markst(struct subre *);
+static void cleanst(struct vars *);
+static long nfatree(struct vars *, struct subre *, FILE *);
+static long nfanode(struct vars *, struct subre *, FILE *);
+static int newlacon(struct vars *, struct state *, struct state *, int);
+static void freelacons(struct subre *, int);
+static void rfree(regex_t *);
+static void dump(regex_t *, FILE *);
+static void dumpst(struct subre *, FILE *, int);
+static void stdump(struct subre *, FILE *, int);
+static const char *stid(struct subre *, char *, size_t);
+/* === regc_lex.c === */
+static void lexstart(struct vars *);
+static void prefixes(struct vars *);
+static void lexnest(struct vars *, const chr *, const chr *);
+static void lexword(struct vars *);
+static int next(struct vars *);
+static int lexescape(struct vars *);
+static chr lexdigits(struct vars *, int, int, int);
+static int brenext(struct vars *, pchr);
+static void skip(struct vars *);
+static chr newline(NOPARMS);
+#ifdef REG_DEBUG
+static const chr *ch(NOPARMS);
+#endif
+static chr chrnamed(struct vars *, const chr *, const chr *, pchr);
+/* === regc_color.c === */
+static void initcm(struct vars *, struct colormap *);
+static void freecm(struct colormap *);
+static void cmtreefree(struct colormap *, union tree *, int);
+static color setcolor(struct colormap *, pchr, pcolor);
+static color maxcolor(struct colormap *);
+static color newcolor(struct colormap *);
+static void freecolor(struct colormap *, pcolor);
+static color pseudocolor(struct colormap *);
+static color subcolor(struct colormap *, pchr c);
+static color newsub(struct colormap *, pcolor);
+static void subrange(struct vars *, pchr, pchr, struct state *, struct state *);
+static void subblock(struct vars *, pchr, struct state *, struct state *);
+static void okcolors(struct nfa *, struct colormap *);
+static void colorchain(struct colormap *, struct arc *);
+static void uncolorchain(struct colormap *, struct arc *);
+static void rainbow(struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *);
+static void colorcomplement(struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *);
+#ifdef REG_DEBUG
+static void dumpcolors(struct colormap *, FILE *);
+static void fillcheck(struct colormap *, union tree *, int, FILE *);
+static void dumpchr(pchr, FILE *);
+#endif
+/* === regc_nfa.c === */
+static struct nfa *newnfa(struct vars *, struct colormap *, struct nfa *);
+static void freenfa(struct nfa *);
+static struct state *newstate(struct nfa *);
+static struct state *newfstate(struct nfa *, int flag);
+static void dropstate(struct nfa *, struct state *);
+static void freestate(struct nfa *, struct state *);
+static void destroystate(struct nfa *, struct state *);
+static void newarc(struct nfa *, int, pcolor, struct state *, struct state *);
+static struct arc *allocarc(struct nfa *, struct state *);
+static void freearc(struct nfa *, struct arc *);
+static struct arc *findarc(struct state *, int, pcolor);
+static void cparc(struct nfa *, struct arc *, struct state *, struct state *);
+static void moveins(struct nfa *, struct state *, struct state *);
+static void copyins(struct nfa *, struct state *, struct state *);
+static void moveouts(struct nfa *, struct state *, struct state *);
+static void copyouts(struct nfa *, struct state *, struct state *);
+static void cloneouts(struct nfa *, struct state *, struct state *, struct state *, int);
+static void delsub(struct nfa *, struct state *, struct state *);
+static void deltraverse(struct nfa *, struct state *, struct state *);
+static void dupnfa(struct nfa *, struct state *, struct state *, struct state *, struct state *);
+static void duptraverse(struct nfa *, struct state *, struct state *, int);
+static void cleartraverse(struct nfa *, struct state *);
+static void specialcolors(struct nfa *);
+static long optimize(struct nfa *, FILE *);
+static void pullback(struct nfa *, FILE *);
+static int pull(struct nfa *, struct arc *);
+static void pushfwd(struct nfa *, FILE *);
+static int push(struct nfa *, struct arc *);
+#define	INCOMPATIBLE	1	/* destroys arc */
+#define	SATISFIED	2	/* constraint satisfied */
+#define	COMPATIBLE	3	/* compatible but not satisfied yet */
+static int combine(struct arc *, struct arc *);
+static void fixempties(struct nfa *, FILE *);
+static int unempty(struct nfa *, struct arc *);
+static void cleanup(struct nfa *);
+static void markreachable(struct nfa *, struct state *, struct state *, struct state *);
+static void markcanreach(struct nfa *, struct state *, struct state *, struct state *);
+static long analyze(struct nfa *);
+static void compact(struct nfa *, struct cnfa *);
+static void carcsort(struct carc *, struct carc *);
+static void freecnfa(struct cnfa *);
+static void dumpnfa(struct nfa *, FILE *);
+#ifdef REG_DEBUG
+static void dumpstate(struct state *, FILE *);
+static void dumparcs(struct state *, FILE *);
+static int dumprarcs(struct arc *, struct state *, FILE *, int);
+static void dumparc(struct arc *, struct state *, FILE *);
+#endif
+static void dumpcnfa(struct cnfa *, FILE *);
+#ifdef REG_DEBUG
+static void dumpcstate(int, struct carc *, struct cnfa *, FILE *);
+#endif
+/* === regc_cvec.c === */
+static struct cvec *clearcvec(struct cvec *);
+static void addchr(struct cvec *, pchr);
+static void addrange(struct cvec *, pchr, pchr);
+static struct cvec *newcvec(int, int);
+static struct cvec *getcvec(struct vars *, int, int);
+static void freecvec(struct cvec *);
+/* === regc_locale.c === */
+static celt element(struct vars *, const chr *, const chr *);
+static struct cvec *range(struct vars *, celt, celt, int);
+static int before(celt, celt);
+static struct cvec *eclass(struct vars *, celt, int);
+static struct cvec *cclass(struct vars *, const chr *, const chr *, int);
+static struct cvec *allcases(struct vars *, pchr);
+static int cmp(const chr *, const chr *, size_t);
+static int casecmp(const chr *, const chr *, size_t);
+/* automatically gathered by fwd; do not hand-edit */
+/* =====^!^===== end forwards =====^!^===== */
+
+/* internal variables, bundled for easy passing around */
+struct vars {
+    regex_t *re;
+    const chr *now;		/* scan pointer into string */
+    const chr *stop;		/* end of string */
+    const chr *savenow;		/* saved now and stop for "subroutine call" */
+    const chr *savestop;
+    int err;			/* error code (0 if none) */
+    int cflags;			/* copy of compile flags */
+    int lasttype;		/* type of previous token */
+    int nexttype;		/* type of next token */
+    chr nextvalue;		/* value (if any) of next token */
+    int lexcon;			/* lexical context type (see lex.c) */
+    int nsubexp;		/* subexpression count */
+    struct subre **subs;	/* subRE pointer vector */
+    size_t nsubs;		/* length of vector */
+    struct subre *sub10[10];	/* initial vector, enough for most */
+    struct nfa *nfa;		/* the NFA */
+    struct colormap *cm;	/* character color map */
+    color nlcolor;		/* color of newline */
+    struct state *wordchrs;	/* state in nfa holding word-char outarcs */
+    struct subre *tree;		/* subexpression tree */
+    struct subre *treechain;	/* all tree nodes allocated */
+    struct subre *treefree;	/* any free tree nodes */
+    int ntree;			/* number of tree nodes */
+    struct cvec *cv;		/* interface cvec */
+    struct cvec *cv2;		/* utility cvec */
+    struct subre *lacons;	/* lookahead-constraint vector */
+    int nlacons;		/* size of lacons */
+};
+
+/* parsing macros; most know that `v' is the struct vars pointer */
+#define	NEXT()	(next(v))		/* advance by one token */
+#define	SEE(t)	(v->nexttype == (t))	/* is next token this? */
+#define	EAT(t)	(SEE(t) && next(v))	/* if next is this, swallow it */
+#define	VISERR(vv)	((vv)->err != 0)/* have we seen an error yet? */
+#define	ISERR()	VISERR(v)
+#define	VERR(vv,e) \
+	((vv)->nexttype = EOS, ((vv)->err) ? (vv)->err : ((vv)->err = (e)))
+#define	ERR(e)	VERR(v, e)		/* record an error */
+#define	NOERR()	{if (ISERR()) return;}	/* if error seen, return */
+#define	NOERRN()	{if (ISERR()) return NULL;}	/* NOERR with retval */
+#define	NOERRZ()	{if (ISERR()) return 0;}	/* NOERR with retval */
+#define	INSIST(c, e)	((c) ? 0 : ERR(e))	/* if condition false, error */
+#define	NOTE(b)	(v->re->re_info |= (b))		/* note visible condition */
+#define	EMPTYARC(x, y)	newarc(v->nfa, EMPTY, 0, x, y)
+
+/* token type codes, some also used as NFA arc types */
+#define	EMPTY	'n'		/* no token present */
+#define	EOS	'e'		/* end of string */
+#define	PLAIN	'p'		/* ordinary character */
+#define	DIGIT	'd'		/* digit (in bound) */
+#define	BACKREF	'b'		/* back reference */
+#define	COLLEL	'I'		/* start of [. */
+#define	ECLASS	'E'		/* start of [= */
+#define	CCLASS	'C'		/* start of [: */
+#define	END	'X'		/* end of [. [= [: */
+#define	RANGE	'R'		/* - within [] which might be range delim. */
+#define	LACON	'L'		/* lookahead constraint subRE */
+#define	AHEAD	'a'		/* color-lookahead arc */
+#define	BEHIND	'r'		/* color-lookbehind arc */
+#define	WBDRY	'w'		/* word boundary constraint */
+#define	NWBDRY	'W'		/* non-word-boundary constraint */
+#define	SBEGIN	'A'		/* beginning of string (even if not BOL) */
+#define	SEND	'Z'		/* end of string (even if not EOL) */
+#define	PREFER	'P'		/* length preference */
+
+/* is an arc colored, and hence on a color chain? */
+#define	COLORED(a) \
+	((a)->type == PLAIN || (a)->type == AHEAD || (a)->type == BEHIND)
+
+/* static function list */
+static struct fns functions = {
+    rfree,			/* regfree insides */
+};
+
+/*
+ - compile - compile regular expression
+ ^ int compile(regex_t *, const chr *, size_t, int);
+ */
+int
+compile(
+    regex_t *re,
+    const chr *string,
+    size_t len,
+    int flags)
+{
+    AllocVars(v);
+    struct guts *g;
+    int i;
+    size_t j;
+    FILE *debug = (flags&REG_PROGRESS) ? stdout : NULL;
+#define	CNOERR()	{ if (ISERR()) return freev(v, v->err); }
+
+    /*
+     * Sanity checks.
+     */
+
+    if (re == NULL || string == NULL) {
+	FreeVars(v);
+	return REG_INVARG;
+    }
+    if ((flags&REG_QUOTE) && (flags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE))) {
+	FreeVars(v);
+	return REG_INVARG;
+    }
+    if (!(flags&REG_EXTENDED) && (flags&REG_ADVF)) {
+	FreeVars(v);
+	return REG_INVARG;
+    }
+
+    /*
+     * Initial setup (after which freev() is callable).
+     */
+
+    v->re = re;
+    v->now = string;
+    v->stop = v->now + len;
+    v->savenow = v->savestop = NULL;
+    v->err = 0;
+    v->cflags = flags;
+    v->nsubexp = 0;
+    v->subs = v->sub10;
+    v->nsubs = 10;
+    for (j = 0; j < v->nsubs; j++) {
+	v->subs[j] = NULL;
+    }
+    v->nfa = NULL;
+    v->cm = NULL;
+    v->nlcolor = COLORLESS;
+    v->wordchrs = NULL;
+    v->tree = NULL;
+    v->treechain = NULL;
+    v->treefree = NULL;
+    v->cv = NULL;
+    v->cv2 = NULL;
+    v->lacons = NULL;
+    v->nlacons = 0;
+    re->re_magic = REMAGIC;
+    re->re_info = 0;		/* bits get set during parse */
+    re->re_csize = sizeof(chr);
+    re->re_guts = NULL;
+    re->re_fns = VS(&functions);
+
+    /*
+     * More complex setup, malloced things.
+     */
+
+    re->re_guts = VS(MALLOC(sizeof(struct guts)));
+    if (re->re_guts == NULL) {
+	return freev(v, REG_ESPACE);
+    }
+    g = (struct guts *) re->re_guts;
+    g->tree = NULL;
+    initcm(v, &g->cmap);
+    v->cm = &g->cmap;
+    g->lacons = NULL;
+    g->nlacons = 0;
+    ZAPCNFA(g->search);
+    v->nfa = newnfa(v, v->cm, NULL);
+    CNOERR();
+    v->cv = newcvec(100, 20);
+    if (v->cv == NULL) {
+	return freev(v, REG_ESPACE);
+    }
+
+    /*
+     * Parsing.
+     */
+
+    lexstart(v);		/* also handles prefixes */
+    if ((v->cflags&REG_NLSTOP) || (v->cflags&REG_NLANCH)) {
+	/*
+	 * Assign newline a unique color.
+	 */
+
+	v->nlcolor = subcolor(v->cm, newline());
+	okcolors(v->nfa, v->cm);
+    }
+    CNOERR();
+    v->tree = parse(v, EOS, PLAIN, v->nfa->init, v->nfa->final);
+    assert(SEE(EOS));		/* even if error; ISERR() => SEE(EOS) */
+    CNOERR();
+    assert(v->tree != NULL);
+
+    /*
+     * Finish setup of nfa and its subre tree.
+     */
+
+    specialcolors(v->nfa);
+    CNOERR();
+    if (debug != NULL) {
+	fprintf(debug, "\n\n\n========= RAW ==========\n");
+	dumpnfa(v->nfa, debug);
+	dumpst(v->tree, debug, 1);
+    }
+    optst(v, v->tree);
+    v->ntree = numst(v->tree, 1);
+    markst(v->tree);
+    cleanst(v);
+    if (debug != NULL) {
+	fprintf(debug, "\n\n\n========= TREE FIXED ==========\n");
+	dumpst(v->tree, debug, 1);
+    }
+
+    /*
+     * Build compacted NFAs for tree and lacons.
+     */
+
+    re->re_info |= nfatree(v, v->tree, debug);
+    CNOERR();
+    assert(v->nlacons == 0 || v->lacons != NULL);
+    for (i = 1; i < v->nlacons; i++) {
+	if (debug != NULL) {
+	    fprintf(debug, "\n\n\n========= LA%d ==========\n", i);
+	}
+	nfanode(v, &v->lacons[i], debug);
+    }
+    CNOERR();
+    if (v->tree->flags&SHORTER) {
+	NOTE(REG_USHORTEST);
+    }
+
+    /*
+     * Build compacted NFAs for tree, lacons, fast search.
+     */
+
+    if (debug != NULL) {
+	fprintf(debug, "\n\n\n========= SEARCH ==========\n");
+    }
+
+    /*
+     * Can sacrifice main NFA now, so use it as work area.
+     */
+
+    (DISCARD) optimize(v->nfa, debug);
+    CNOERR();
+    makesearch(v, v->nfa);
+    CNOERR();
+    compact(v->nfa, &g->search);
+    CNOERR();
+
+    /*
+     * Looks okay, package it up.
+     */
+
+    re->re_nsub = v->nsubexp;
+    v->re = NULL;		/* freev no longer frees re */
+    g->magic = GUTSMAGIC;
+    g->cflags = v->cflags;
+    g->info = re->re_info;
+    g->nsub = re->re_nsub;
+    g->tree = v->tree;
+    v->tree = NULL;
+    g->ntree = v->ntree;
+    g->compare = (v->cflags&REG_ICASE) ? casecmp : cmp;
+    g->lacons = v->lacons;
+    v->lacons = NULL;
+    g->nlacons = v->nlacons;
+
+    if (flags&REG_DUMP) {
+	dump(re, stdout);
+    }
+
+    assert(v->err == 0);
+    return freev(v, 0);
+}
+
+/*
+ - moresubs - enlarge subRE vector
+ ^ static void moresubs(struct vars *, int);
+ */
+static void
+moresubs(
+    struct vars *v,
+    int wanted)			/* want enough room for this one */
+{
+    struct subre **p;
+    size_t n;
+
+    assert(wanted > 0 && (size_t)wanted >= v->nsubs);
+    n = (size_t)wanted * 3 / 2 + 1;
+    if (v->subs == v->sub10) {
+	p = (struct subre **) MALLOC(n * sizeof(struct subre *));
+	if (p != NULL) {
+	    memcpy(p, v->subs, v->nsubs * sizeof(struct subre *));
+	}
+    } else {
+	p = (struct subre **) REALLOC(v->subs, n*sizeof(struct subre *));
+    }
+    if (p == NULL) {
+	ERR(REG_ESPACE);
+	return;
+    }
+
+    v->subs = p;
+    for (p = &v->subs[v->nsubs]; v->nsubs < n; p++, v->nsubs++) {
+	*p = NULL;
+    }
+    assert(v->nsubs == n);
+    assert((size_t)wanted < v->nsubs);
+}
+
+/*
+ - freev - free vars struct's substructures where necessary
+ * Optionally does error-number setting, and always returns error code (if
+ * any), to make error-handling code terser.
+ ^ static int freev(struct vars *, int);
+ */
+static int
+freev(
+    struct vars *v,
+    int err)
+{
+    register int ret;
+
+    if (v->re != NULL) {
+	rfree(v->re);
+    }
+    if (v->subs != v->sub10) {
+	FREE(v->subs);
+    }
+    if (v->nfa != NULL) {
+	freenfa(v->nfa);
+    }
+    if (v->tree != NULL) {
+	freesubre(v, v->tree);
+    }
+    if (v->treechain != NULL) {
+	cleanst(v);
+    }
+    if (v->cv != NULL) {
+	freecvec(v->cv);
+    }
+    if (v->cv2 != NULL) {
+	freecvec(v->cv2);
+    }
+    if (v->lacons != NULL) {
+	freelacons(v->lacons, v->nlacons);
+    }
+    ERR(err);			/* nop if err==0 */
+
+    ret = v->err;
+    FreeVars(v);
+    return ret;
+}
+
+/*
+ - makesearch - turn an NFA into a search NFA (implicit prepend of .*?)
+ * NFA must have been optimize()d already.
+ ^ static void makesearch(struct vars *, struct nfa *);
+ */
+static void
+makesearch(
+    struct vars *v,
+    struct nfa *nfa)
+{
+    struct arc *a, *b;
+    struct state *pre = nfa->pre;
+    struct state *s, *s2, *slist;
+
+    /*
+     * No loops are needed if it's anchored.
+     */
+
+    for (a = pre->outs; a != NULL; a = a->outchain) {
+	assert(a->type == PLAIN);
+	if (a->co != nfa->bos[0] && a->co != nfa->bos[1]) {
+	    break;
+	}
+    }
+    if (a != NULL) {
+	/*
+	 * Add implicit .* in front.
+	 */
+
+	rainbow(nfa, v->cm, PLAIN, COLORLESS, pre, pre);
+
+	/*
+	 * And ^* and \A* too -- not always necessary, but harmless.
+	 */
+
+	newarc(nfa, PLAIN, nfa->bos[0], pre, pre);
+	newarc(nfa, PLAIN, nfa->bos[1], pre, pre);
+    }
+
+    /*
+     * Now here's the subtle part. Because many REs have no lookback
+     * constraints, often knowing when you were in the pre state tells you
+     * little; it's the next state(s) that are informative. But some of them
+     * may have other inarcs, i.e. it may be possible to make actual progress
+     * and then return to one of them. We must de-optimize such cases,
+     * splitting each such state into progress and no-progress states.
+     */
+
+    /*
+     * First, make a list of the states.
+     */
+
+    slist = NULL;
+    for (a=pre->outs ; a!=NULL ; a=a->outchain) {
+	s = a->to;
+	for (b=s->ins ; b!=NULL ; b=b->inchain) {
+	    if (b->from != pre) {
+		break;
+	    }
+	}
+	if (b != NULL && s->tmp == NULL) {
+	    /*
+	     * Must be split if not already in the list (fixes bugs 505048,
+	     * 230589, 840258, 504785).
+	     */
+
+	    s->tmp = slist;
+	    slist = s;
+	}
+    }
+
+    /*
+     * Do the splits.
+     */
+
+    for (s=slist ; s!=NULL ; s=s2) {
+	s2 = newstate(nfa);
+
+	copyouts(nfa, s, s2);
+	for (a=s->ins ; a!=NULL ; a=b) {
+	    b = a->inchain;
+
+	    if (a->from != pre) {
+		cparc(nfa, a, a->from, s2);
+		freearc(nfa, a);
+	    }
+	}
+	s2 = s->tmp;
+	s->tmp = NULL;		/* clean up while we're at it */
+    }
+}
+
+/*
+ - parse - parse an RE
+ * This is actually just the top level, which parses a bunch of branches tied
+ * together with '|'. They appear in the tree as the left children of a chain
+ * of '|' subres.
+ ^ static struct subre *parse(struct vars *, int, int, struct state *,
+ ^ 	struct state *);
+ */
+static struct subre *
+parse(
+    struct vars *v,
+    int stopper,		/* EOS or ')' */
+    int type,			/* LACON (lookahead subRE) or PLAIN */
+    struct state *init,		/* initial state */
+    struct state *final)	/* final state */
+{
+    struct state *left, *right;	/* scaffolding for branch */
+    struct subre *branches;	/* top level */
+    struct subre *branch;	/* current branch */
+    struct subre *t;		/* temporary */
+    int firstbranch;		/* is this the first branch? */
+
+    assert(stopper == ')' || stopper == EOS);
+
+    branches = subre(v, '|', LONGER, init, final);
+    NOERRN();
+    branch = branches;
+    firstbranch = 1;
+    do {	/* a branch */
+	if (!firstbranch) {
+	    /*
+	     * Need a place to hang the branch.
+	     */
+
+	    branch->right = subre(v, '|', LONGER, init, final);
+	    NOERRN();
+	    branch = branch->right;
+	}
+	firstbranch = 0;
+	left = newstate(v->nfa);
+	right = newstate(v->nfa);
+	NOERRN();
+	EMPTYARC(init, left);
+	EMPTYARC(right, final);
+	NOERRN();
+	branch->left = parsebranch(v, stopper, type, left, right, 0);
+	NOERRN();
+	branch->flags |= UP(branch->flags | branch->left->flags);
+	if ((branch->flags &~ branches->flags) != 0) {	/* new flags */
+	    for (t = branches; t != branch; t = t->right) {
+		t->flags |= branch->flags;
+	    }
+	}
+    } while (EAT('|'));
+    assert(SEE(stopper) || SEE(EOS));
+
+    if (!SEE(stopper)) {
+	assert(stopper == ')' && SEE(EOS));
+	ERR(REG_EPAREN);
+    }
+
+    /*
+     * Optimize out simple cases.
+     */
+
+    if (branch == branches) {	/* only one branch */
+	assert(branch->right == NULL);
+	t = branch->left;
+	branch->left = NULL;
+	freesubre(v, branches);
+	branches = t;
+    } else if (!MESSY(branches->flags)) {	/* no interesting innards */
+	freesubre(v, branches->left);
+	branches->left = NULL;
+	freesubre(v, branches->right);
+	branches->right = NULL;
+	branches->op = '=';
+    }
+
+    return branches;
+}
+
+/*
+ - parsebranch - parse one branch of an RE
+ * This mostly manages concatenation, working closely with parseqatom().
+ * Concatenated things are bundled up as much as possible, with separate
+ * ',' nodes introduced only when necessary due to substructure.
+ ^ static struct subre *parsebranch(struct vars *, int, int, struct state *,
+ ^ 	struct state *, int);
+ */
+static struct subre *
+parsebranch(
+    struct vars *v,
+    int stopper,		/* EOS or ')' */
+    int type,			/* LACON (lookahead subRE) or PLAIN */
+    struct state *left,		/* leftmost state */
+    struct state *right,	/* rightmost state */
+    int partial)		/* is this only part of a branch? */
+{
+    struct state *lp;		/* left end of current construct */
+    int seencontent;		/* is there anything in this branch yet? */
+    struct subre *t;
+
+    lp = left;
+    seencontent = 0;
+    t = subre(v, '=', 0, left, right);	/* op '=' is tentative */
+    NOERRN();
+    while (!SEE('|') && !SEE(stopper) && !SEE(EOS)) {
+	if (seencontent) {	/* implicit concat operator */
+	    lp = newstate(v->nfa);
+	    NOERRN();
+	    moveins(v->nfa, right, lp);
+	}
+	seencontent = 1;
+
+	/* NB, recursion in parseqatom() may swallow rest of branch */
+	parseqatom(v, stopper, type, lp, right, t);
+    }
+
+    if (!seencontent) {		/* empty branch */
+	if (!partial) {
+	    NOTE(REG_UUNSPEC);
+	}
+	assert(lp == left);
+	EMPTYARC(left, right);
+    }
+
+    return t;
+}
+
+/*
+ - parseqatom - parse one quantified atom or constraint of an RE
+ * The bookkeeping near the end cooperates very closely with parsebranch(); in
+ * particular, it contains a recursion that can involve parsing the rest of
+ * the branch, making this function's name somewhat inaccurate.
+ ^ static void parseqatom(struct vars *, int, int, struct state *,
+ ^ 	struct state *, struct subre *);
+ */
+static void
+parseqatom(
+    struct vars *v,
+    int stopper,		/* EOS or ')' */
+    int type,			/* LACON (lookahead subRE) or PLAIN */
+    struct state *lp,		/* left state to hang it on */
+    struct state *rp,		/* right state to hang it on */
+    struct subre *top)		/* subtree top */
+{
+    struct state *s;		/* temporaries for new states */
+    struct state *s2;
+#define	ARCV(t, val)	newarc(v->nfa, t, val, lp, rp)
+    int m, n;
+    struct subre *atom;		/* atom's subtree */
+    struct subre *t;
+    int cap;			/* capturing parens? */
+    int pos;			/* positive lookahead? */
+    int subno;			/* capturing-parens or backref number */
+    int atomtype;
+    int qprefer;		/* quantifier short/long preference */
+    int f;
+    struct subre **atomp;	/* where the pointer to atom is */
+
+    /*
+     * Initial bookkeeping.
+     */
+
+    atom = NULL;
+    assert(lp->nouts == 0);	/* must string new code */
+    assert(rp->nins == 0);	/* between lp and rp */
+    subno = 0;			/* just to shut lint up */
+
+    /*
+     * An atom or constraint...
+     */
+
+    atomtype = v->nexttype;
+    switch (atomtype) {
+	/* first, constraints, which end by returning */
+    case '^':
+	ARCV('^', 1);
+	if (v->cflags&REG_NLANCH) {
+	    ARCV(BEHIND, v->nlcolor);
+	}
+	NEXT();
+	return;
+    case '$':
+	ARCV('$', 1);
+	if (v->cflags&REG_NLANCH) {
+	    ARCV(AHEAD, v->nlcolor);
+	}
+	NEXT();
+	return;
+    case SBEGIN:
+	ARCV('^', 1);		/* BOL */
+	ARCV('^', 0);		/* or BOS */
+	NEXT();
+	return;
+    case SEND:
+	ARCV('$', 1);		/* EOL */
+	ARCV('$', 0);		/* or EOS */
+	NEXT();
+	return;
+    case '<':
+	wordchrs(v);		/* does NEXT() */
+	s = newstate(v->nfa);
+	NOERR();
+	nonword(v, BEHIND, lp, s);
+	word(v, AHEAD, s, rp);
+	return;
+    case '>':
+	wordchrs(v);		/* does NEXT() */
+	s = newstate(v->nfa);
+	NOERR();
+	word(v, BEHIND, lp, s);
+	nonword(v, AHEAD, s, rp);
+	return;
+    case WBDRY:
+	wordchrs(v);		/* does NEXT() */
+	s = newstate(v->nfa);
+	NOERR();
+	nonword(v, BEHIND, lp, s);
+	word(v, AHEAD, s, rp);
+	s = newstate(v->nfa);
+	NOERR();
+	word(v, BEHIND, lp, s);
+	nonword(v, AHEAD, s, rp);
+	return;
+    case NWBDRY:
+	wordchrs(v);		/* does NEXT() */
+	s = newstate(v->nfa);
+	NOERR();
+	word(v, BEHIND, lp, s);
+	word(v, AHEAD, s, rp);
+	s = newstate(v->nfa);
+	NOERR();
+	nonword(v, BEHIND, lp, s);
+	nonword(v, AHEAD, s, rp);
+	return;
+    case LACON:			/* lookahead constraint */
+	pos = v->nextvalue;
+	NEXT();
+	s = newstate(v->nfa);
+	s2 = newstate(v->nfa);
+	NOERR();
+	t = parse(v, ')', LACON, s, s2);
+	freesubre(v, t);	/* internal structure irrelevant */
+	assert(SEE(')') || ISERR());
+	NEXT();
+	n = newlacon(v, s, s2, pos);
+	NOERR();
+	ARCV(LACON, n);
+	return;
+
+	/*
+	 * Then errors, to get them out of the way.
+	 */
+
+    case '*':
+    case '+':
+    case '?':
+    case '{':
+	ERR(REG_BADRPT);
+	return;
+    default:
+	ERR(REG_ASSERT);
+	return;
+
+	/*
+	 * Then plain characters, and minor variants on that theme.
+	 */
+
+    case ')':			/* unbalanced paren */
+	if ((v->cflags&REG_ADVANCED) != REG_EXTENDED) {
+	    ERR(REG_EPAREN);
+	    return;
+	}
+
+	/*
+	 * Legal in EREs due to specification botch.
+	 */
+
+	NOTE(REG_UPBOTCH);
+	/* fallthrough into case PLAIN */
+    case PLAIN:
+	onechr(v, v->nextvalue, lp, rp);
+	okcolors(v->nfa, v->cm);
+	NOERR();
+	NEXT();
+	break;
+    case '[':
+	if (v->nextvalue == 1) {
+	    bracket(v, lp, rp);
+	} else {
+	    cbracket(v, lp, rp);
+	}
+	assert(SEE(']') || ISERR());
+	NEXT();
+	break;
+    case '.':
+	rainbow(v->nfa, v->cm, PLAIN,
+		(v->cflags&REG_NLSTOP) ? v->nlcolor : COLORLESS, lp, rp);
+	NEXT();
+	break;
+
+	/*
+	 * And finally the ugly stuff.
+	 */
+
+    case '(':			/* value flags as capturing or non */
+	cap = (type == LACON) ? 0 : v->nextvalue;
+	if (cap) {
+	    v->nsubexp++;
+	    subno = v->nsubexp;
+	    if ((size_t)subno >= v->nsubs) {
+		moresubs(v, subno);
+	    }
+	    assert((size_t)subno < v->nsubs);
+	} else {
+	    atomtype = PLAIN;	/* something that's not '(' */
+	}
+	NEXT();
+
+	/*
+	 * Need new endpoints because tree will contain pointers.
+	 */
+
+	s = newstate(v->nfa);
+	s2 = newstate(v->nfa);
+	NOERR();
+	EMPTYARC(lp, s);
+	EMPTYARC(s2, rp);
+	NOERR();
+	atom = parse(v, ')', PLAIN, s, s2);
+	assert(SEE(')') || ISERR());
+	NEXT();
+	NOERR();
+	if (cap) {
+	    v->subs[subno] = atom;
+	    t = subre(v, '(', atom->flags|CAP, lp, rp);
+	    NOERR();
+	    t->subno = subno;
+	    t->left = atom;
+	    atom = t;
+	}
+
+	/*
+	 * Postpone everything else pending possible {0}.
+	 */
+
+	break;
+    case BACKREF:		/* the Feature From The Black Lagoon */
+	INSIST(type != LACON, REG_ESUBREG);
+	INSIST(v->nextvalue < v->nsubs, REG_ESUBREG);
+	INSIST(v->subs[v->nextvalue] != NULL, REG_ESUBREG);
+	NOERR();
+	assert(v->nextvalue > 0);
+	atom = subre(v, 'b', BACKR, lp, rp);
+	subno = v->nextvalue;
+	atom->subno = subno;
+	EMPTYARC(lp, rp);	/* temporarily, so there's something */
+	NEXT();
+	break;
+    }
+
+    /*
+     * ...and an atom may be followed by a quantifier.
+     */
+
+    switch (v->nexttype) {
+    case '*':
+	m = 0;
+	n = INFINITY;
+	qprefer = (v->nextvalue) ? LONGER : SHORTER;
+	NEXT();
+	break;
+    case '+':
+	m = 1;
+	n = INFINITY;
+	qprefer = (v->nextvalue) ? LONGER : SHORTER;
+	NEXT();
+	break;
+    case '?':
+	m = 0;
+	n = 1;
+	qprefer = (v->nextvalue) ? LONGER : SHORTER;
+	NEXT();
+	break;
+    case '{':
+	NEXT();
+	m = scannum(v);
+	if (EAT(',')) {
+	    if (SEE(DIGIT)) {
+		n = scannum(v);
+	    } else {
+		n = INFINITY;
+	    }
+	    if (m > n) {
+		ERR(REG_BADBR);
+		return;
+	    }
+
+	    /*
+	     * {m,n} exercises preference, even if it's {m,m}
+	     */
+
+	    qprefer = (v->nextvalue) ? LONGER : SHORTER;
+	} else {
+	    n = m;
+	    /*
+	     * {m} passes operand's preference through.
+	     */
+
+	    qprefer = 0;
+	}
+	if (!SEE('}')) {	/* catches errors too */
+	    ERR(REG_BADBR);
+	    return;
+	}
+	NEXT();
+	break;
+    default:			/* no quantifier */
+	m = n = 1;
+	qprefer = 0;
+	break;
+    }
+
+    /*
+     * Annoying special case: {0} or {0,0} cancels everything.
+     */
+
+    if (m == 0 && n == 0) {
+	if (atom != NULL) {
+	    freesubre(v, atom);
+	}
+	if (atomtype == '(') {
+	    v->subs[subno] = NULL;
+	}
+	delsub(v->nfa, lp, rp);
+	EMPTYARC(lp, rp);
+	return;
+    }
+
+    /*
+     * If not a messy case, avoid hard part.
+     */
+
+    assert(!MESSY(top->flags));
+    f = top->flags | qprefer | ((atom != NULL) ? atom->flags : 0);
+    if (atomtype != '(' && atomtype != BACKREF && !MESSY(UP(f))) {
+	if (!(m == 1 && n == 1)) {
+	    repeat(v, lp, rp, m, n);
+	}
+	if (atom != NULL) {
+	    freesubre(v, atom);
+	}
+	top->flags = f;
+	return;
+    }
+
+    /*
+     * hard part: something messy
+     * That is, capturing parens, back reference, short/long clash, or an atom
+     * with substructure containing one of those.
+     */
+
+    /*
+     * Now we'll need a subre for the contents even if they're boring.
+     */
+
+    if (atom == NULL) {
+	atom = subre(v, '=', 0, lp, rp);
+	NOERR();
+    }
+
+    /*
+     * Prepare a general-purpose state skeleton.
+     *
+     *    ---> [s] ---prefix---> [begin] ---atom---> [end] ----rest---> [rp]
+     *   /                                            /
+     * [lp] ----> [s2] ----bypass---------------------
+     *
+     * where bypass is an empty, and prefix is some repetitions of atom
+     */
+
+    s = newstate(v->nfa);	/* first, new endpoints for the atom */
+    s2 = newstate(v->nfa);
+    NOERR();
+    moveouts(v->nfa, lp, s);
+    moveins(v->nfa, rp, s2);
+    NOERR();
+    atom->begin = s;
+    atom->end = s2;
+    s = newstate(v->nfa);	/* and spots for prefix and bypass */
+    s2 = newstate(v->nfa);
+    NOERR();
+    EMPTYARC(lp, s);
+    EMPTYARC(lp, s2);
+    NOERR();
+
+    /*
+     * Break remaining subRE into x{...} and what follows.
+     */
+
+    t = subre(v, '.', COMBINE(qprefer, atom->flags), lp, rp);
+    t->left = atom;
+    atomp = &t->left;
+
+    /*
+     * Here we should recurse... but we must postpone that to the end.
+     */
+
+    /*
+     * Split top into prefix and remaining.
+     */
+
+    assert(top->op == '=' && top->left == NULL && top->right == NULL);
+    top->left = subre(v, '=', top->flags, top->begin, lp);
+    top->op = '.';
+    top->right = t;
+
+    /*
+     * If it's a backref, now is the time to replicate the subNFA.
+     */
+
+    if (atomtype == BACKREF) {
+	assert(atom->begin->nouts == 1);	/* just the EMPTY */
+	delsub(v->nfa, atom->begin, atom->end);
+	assert(v->subs[subno] != NULL);
+
+	/*
+	 * And here's why the recursion got postponed: it must wait until the
+	 * skeleton is filled in, because it may hit a backref that wants to
+	 * copy the filled-in skeleton.
+	 */
+
+	dupnfa(v->nfa, v->subs[subno]->begin, v->subs[subno]->end,
+		atom->begin, atom->end);
+	NOERR();
+    }
+
+    /*
+     * It's quantifier time; first, turn x{0,...} into x{1,...}|empty
+     */
+
+    if (m == 0) {
+	EMPTYARC(s2, atom->end);/* the bypass */
+	assert(PREF(qprefer) != 0);
+	f = COMBINE(qprefer, atom->flags);
+	t = subre(v, '|', f, lp, atom->end);
+	NOERR();
+	t->left = atom;
+	t->right = subre(v, '|', PREF(f), s2, atom->end);
+	NOERR();
+	t->right->left = subre(v, '=', 0, s2, atom->end);
+	NOERR();
+	*atomp = t;
+	atomp = &t->left;
+	m = 1;
+    }
+
+    /*
+     * Deal with the rest of the quantifier.
+     */
+
+    if (atomtype == BACKREF) {
+	/*
+	 * Special case: backrefs have internal quantifiers.
+	 */
+
+	EMPTYARC(s, atom->begin);	/* empty prefix */
+
+	/*
+	 * Just stuff everything into atom.
+	 */
+
+	repeat(v, atom->begin, atom->end, m, n);
+	atom->min = (short) m;
+	atom->max = (short) n;
+	atom->flags |= COMBINE(qprefer, atom->flags);
+    } else if (m == 1 && n == 1) {
+	/*
+	 * No/vacuous quantifier: done.
+	 */
+
+	EMPTYARC(s, atom->begin);	/* empty prefix */
+    } else {
+	/*
+	 * Turn x{m,n} into x{m-1,n-1}x, with capturing parens in only second
+	 * x
+	 */
+
+	dupnfa(v->nfa, atom->begin, atom->end, s, atom->begin);
+	assert(m >= 1 && m != INFINITY && n >= 1);
+	repeat(v, s, atom->begin, m-1, (n == INFINITY) ? n : n-1);
+	f = COMBINE(qprefer, atom->flags);
+	t = subre(v, '.', f, s, atom->end);	/* prefix and atom */
+	NOERR();
+	t->left = subre(v, '=', PREF(f), s, atom->begin);
+	NOERR();
+	t->right = atom;
+	*atomp = t;
+    }
+
+    /*
+     * And finally, look after that postponed recursion.
+     */
+
+    t = top->right;
+    if (!(SEE('|') || SEE(stopper) || SEE(EOS))) {
+	t->right = parsebranch(v, stopper, type, atom->end, rp, 1);
+    } else {
+	EMPTYARC(atom->end, rp);
+	t->right = subre(v, '=', 0, atom->end, rp);
+    }
+    assert(SEE('|') || SEE(stopper) || SEE(EOS));
+    t->flags |= COMBINE(t->flags, t->right->flags);
+    top->flags |= COMBINE(top->flags, t->flags);
+}
+
+/*
+ - nonword - generate arcs for non-word-character ahead or behind
+ ^ static void nonword(struct vars *, int, struct state *, struct state *);
+ */
+static void
+nonword(
+    struct vars *v,
+    int dir,			/* AHEAD or BEHIND */
+    struct state *lp,
+    struct state *rp)
+{
+    int anchor = (dir == AHEAD) ? '$' : '^';
+
+    assert(dir == AHEAD || dir == BEHIND);
+    newarc(v->nfa, anchor, 1, lp, rp);
+    newarc(v->nfa, anchor, 0, lp, rp);
+    colorcomplement(v->nfa, v->cm, dir, v->wordchrs, lp, rp);
+    /* (no need for special attention to \n) */
+}
+
+/*
+ - word - generate arcs for word character ahead or behind
+ ^ static void word(struct vars *, int, struct state *, struct state *);
+ */
+static void
+word(
+    struct vars *v,
+    int dir,			/* AHEAD or BEHIND */
+    struct state *lp,
+    struct state *rp)
+{
+    assert(dir == AHEAD || dir == BEHIND);
+    cloneouts(v->nfa, v->wordchrs, lp, rp, dir);
+    /* (no need for special attention to \n) */
+}
+
+/*
+ - scannum - scan a number
+ ^ static int scannum(struct vars *);
+ */
+static int			/* value, <= DUPMAX */
+scannum(
+    struct vars *v)
+{
+    int n = 0;
+
+    while (SEE(DIGIT) && n < DUPMAX) {
+	n = n*10 + v->nextvalue;
+	NEXT();
+    }
+    if (SEE(DIGIT) || n > DUPMAX) {
+	ERR(REG_BADBR);
+	return 0;
+    }
+    return n;
+}
+
+/*
+ - repeat - replicate subNFA for quantifiers
+ * The duplication sequences used here are chosen carefully so that any
+ * pointers starting out pointing into the subexpression end up pointing into
+ * the last occurrence. (Note that it may not be strung between the same left
+ * and right end states, however!) This used to be important for the subRE
+ * tree, although the important bits are now handled by the in-line code in
+ * parse(), and when this is called, it doesn't matter any more.
+ ^ static void repeat(struct vars *, struct state *, struct state *, int, int);
+ */
+static void
+repeat(
+    struct vars *v,
+    struct state *lp,
+    struct state *rp,
+    int m,
+    int n)
+{
+#define	SOME		2
+#define	INF		3
+#define	PAIR(x, y)	((x)*4 + (y))
+#define	REDUCE(x)	( ((x) == INFINITY) ? INF : (((x) > 1) ? SOME : (x)) )
+    const int rm = REDUCE(m);
+    const int rn = REDUCE(n);
+    struct state *s, *s2;
+
+    switch (PAIR(rm, rn)) {
+    case PAIR(0, 0):		/* empty string */
+	delsub(v->nfa, lp, rp);
+	EMPTYARC(lp, rp);
+	break;
+    case PAIR(0, 1):		/* do as x| */
+	EMPTYARC(lp, rp);
+	break;
+    case PAIR(0, SOME):		/* do as x{1,n}| */
+	repeat(v, lp, rp, 1, n);
+	NOERR();
+	EMPTYARC(lp, rp);
+	break;
+    case PAIR(0, INF):		/* loop x around */
+	s = newstate(v->nfa);
+	NOERR();
+	moveouts(v->nfa, lp, s);
+	moveins(v->nfa, rp, s);
+	EMPTYARC(lp, s);
+	EMPTYARC(s, rp);
+	break;
+    case PAIR(1, 1):		/* no action required */
+	break;
+    case PAIR(1, SOME):		/* do as x{0,n-1}x = (x{1,n-1}|)x */
+	s = newstate(v->nfa);
+	NOERR();
+	moveouts(v->nfa, lp, s);
+	dupnfa(v->nfa, s, rp, lp, s);
+	NOERR();
+	repeat(v, lp, s, 1, n-1);
+	NOERR();
+	EMPTYARC(lp, s);
+	break;
+    case PAIR(1, INF):		/* add loopback arc */
+	s = newstate(v->nfa);
+	s2 = newstate(v->nfa);
+	NOERR();
+	moveouts(v->nfa, lp, s);
+	moveins(v->nfa, rp, s2);
+	EMPTYARC(lp, s);
+	EMPTYARC(s2, rp);
+	EMPTYARC(s2, s);
+	break;
+    case PAIR(SOME, SOME):		/* do as x{m-1,n-1}x */
+	s = newstate(v->nfa);
+	NOERR();
+	moveouts(v->nfa, lp, s);
+	dupnfa(v->nfa, s, rp, lp, s);
+	NOERR();
+	repeat(v, lp, s, m-1, n-1);
+	break;
+    case PAIR(SOME, INF):		/* do as x{m-1,}x */
+	s = newstate(v->nfa);
+	NOERR();
+	moveouts(v->nfa, lp, s);
+	dupnfa(v->nfa, s, rp, lp, s);
+	NOERR();
+	repeat(v, lp, s, m-1, n);
+	break;
+    default:
+	ERR(REG_ASSERT);
+	break;
+    }
+}
+
+/*
+ - bracket - handle non-complemented bracket expression
+ * Also called from cbracket for complemented bracket expressions.
+ ^ static void bracket(struct vars *, struct state *, struct state *);
+ */
+static void
+bracket(
+    struct vars *v,
+    struct state *lp,
+    struct state *rp)
+{
+    assert(SEE('['));
+    NEXT();
+    while (!SEE(']') && !SEE(EOS)) {
+	brackpart(v, lp, rp);
+    }
+    assert(SEE(']') || ISERR());
+    okcolors(v->nfa, v->cm);
+}
+
+/*
+ - cbracket - handle complemented bracket expression
+ * We do it by calling bracket() with dummy endpoints, and then complementing
+ * the result. The alternative would be to invoke rainbow(), and then delete
+ * arcs as the b.e. is seen... but that gets messy.
+ ^ static void cbracket(struct vars *, struct state *, struct state *);
+ */
+static void
+cbracket(
+    struct vars *v,
+    struct state *lp,
+    struct state *rp)
+{
+    struct state *left = newstate(v->nfa);
+    struct state *right = newstate(v->nfa);
+
+    NOERR();
+    bracket(v, left, right);
+    if (v->cflags&REG_NLSTOP) {
+	newarc(v->nfa, PLAIN, v->nlcolor, left, right);
+    }
+    NOERR();
+
+    assert(lp->nouts == 0);	/* all outarcs will be ours */
+
+    /*
+     * Easy part of complementing, and all there is to do since the MCCE code
+     * was removed.
+     */
+
+    colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp);
+    NOERR();
+    dropstate(v->nfa, left);
+    assert(right->nins == 0);
+    freestate(v->nfa, right);
+    return;
+}
+
+/*
+ - brackpart - handle one item (or range) within a bracket expression
+ ^ static void brackpart(struct vars *, struct state *, struct state *);
+ */
+static void
+brackpart(
+    struct vars *v,
+    struct state *lp,
+    struct state *rp)
+{
+    celt startc, endc;
+    struct cvec *cv;
+    const chr *startp, *endp;
+    chr c[1];
+
+    /*
+     * Parse something, get rid of special cases, take shortcuts.
+     */
+
+    switch (v->nexttype) {
+    case RANGE:			/* a-b-c or other botch */
+	ERR(REG_ERANGE);
+	return;
+	break;
+    case PLAIN:
+	c[0] = v->nextvalue;
+	NEXT();
+
+	/*
+	 * Shortcut for ordinary chr (not range).
+	 */
+
+	if (!SEE(RANGE)) {
+	    onechr(v, c[0], lp, rp);
+	    return;
+	}
+	startc = element(v, c, c+1);
+	NOERR();
+	break;
+    case COLLEL:
+	startp = v->now;
+	endp = scanplain(v);
+	INSIST(startp < endp, REG_ECOLLATE);
+	NOERR();
+	startc = element(v, startp, endp);
+	NOERR();
+	break;
+    case ECLASS:
+	startp = v->now;
+	endp = scanplain(v);
+	INSIST(startp < endp, REG_ECOLLATE);
+	NOERR();
+	startc = element(v, startp, endp);
+	NOERR();
+	cv = eclass(v, startc, (v->cflags&REG_ICASE));
+	NOERR();
+	dovec(v, cv, lp, rp);
+	return;
+	break;
+    case CCLASS:
+	startp = v->now;
+	endp = scanplain(v);
+	INSIST(startp < endp, REG_ECTYPE);
+	NOERR();
+	cv = cclass(v, startp, endp, (v->cflags&REG_ICASE));
+	NOERR();
+	dovec(v, cv, lp, rp);
+	return;
+	break;
+    default:
+	ERR(REG_ASSERT);
+	return;
+	break;
+    }
+
+    if (SEE(RANGE)) {
+	NEXT();
+	switch (v->nexttype) {
+	case PLAIN:
+	case RANGE:
+	    c[0] = v->nextvalue;
+	    NEXT();
+	    endc = element(v, c, c+1);
+	    NOERR();
+	    break;
+	case COLLEL:
+	    startp = v->now;
+	    endp = scanplain(v);
+	    INSIST(startp < endp, REG_ECOLLATE);
+	    NOERR();
+	    endc = element(v, startp, endp);
+	    NOERR();
+	    break;
+	default:
+	    ERR(REG_ERANGE);
+	    return;
+	    break;
+	}
+    } else {
+	endc = startc;
+    }
+
+    /*
+     * Ranges are unportable. Actually, standard C does guarantee that digits
+     * are contiguous, but making that an exception is just too complicated.
+     */
+
+    if (startc != endc) {
+	NOTE(REG_UUNPORT);
+    }
+    cv = range(v, startc, endc, (v->cflags&REG_ICASE));
+    NOERR();
+    dovec(v, cv, lp, rp);
+}
+
+/*
+ - scanplain - scan PLAIN contents of [. etc.
+ * Certain bits of trickery in lex.c know that this code does not try to look
+ * past the final bracket of the [. etc.
+ ^ static const chr *scanplain(struct vars *);
+ */
+static const chr *		/* just after end of sequence */
+scanplain(
+    struct vars *v)
+{
+    const chr *endp;
+
+    assert(SEE(COLLEL) || SEE(ECLASS) || SEE(CCLASS));
+    NEXT();
+
+    endp = v->now;
+    while (SEE(PLAIN)) {
+	endp = v->now;
+	NEXT();
+    }
+
+    assert(SEE(END) || ISERR());
+    NEXT();
+
+    return endp;
+}
+
+/*
+ - onechr - fill in arcs for a plain character, and possible case complements
+ * This is mostly a shortcut for efficient handling of the common case.
+ ^ static void onechr(struct vars *, pchr, struct state *, struct state *);
+ */
+static void
+onechr(
+    struct vars *v,
+    pchr c,
+    struct state *lp,
+    struct state *rp)
+{
+    if (!(v->cflags&REG_ICASE)) {
+	newarc(v->nfa, PLAIN, subcolor(v->cm, c), lp, rp);
+	return;
+    }
+
+    /*
+     * Rats, need general case anyway...
+     */
+
+    dovec(v, allcases(v, c), lp, rp);
+}
+
+/*
+ - dovec - fill in arcs for each element of a cvec
+ ^ static void dovec(struct vars *, struct cvec *, struct state *,
+ ^ 	struct state *);
+ */
+static void
+dovec(
+    struct vars *v,
+    struct cvec *cv,
+    struct state *lp,
+    struct state *rp)
+{
+    chr ch, from, to;
+    const chr *p;
+    int i;
+
+    for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) {
+	ch = *p;
+	newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp);
+    }
+
+    for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) {
+	from = *p;
+	to = *(p+1);
+	if (from <= to) {
+	    subrange(v, from, to, lp, rp);
+	}
+    }
+
+}
+
+/*
+ - wordchrs - set up word-chr list for word-boundary stuff, if needed
+ * The list is kept as a bunch of arcs between two dummy states; it's disposed
+ * of by the unreachable-states sweep in NFA optimization. Does NEXT(). Must
+ * not be called from any unusual lexical context. This should be reconciled
+ * with the \w etc. handling in lex.c, and should be cleaned up to reduce
+ * dependencies on input scanning.
+ ^ static void wordchrs(struct vars *);
+ */
+static void
+wordchrs(
+    struct vars *v)
+{
+    struct state *left, *right;
+
+    if (v->wordchrs != NULL) {
+	NEXT();		/* for consistency */
+	return;
+    }
+
+    left = newstate(v->nfa);
+    right = newstate(v->nfa);
+    NOERR();
+
+    /*
+     * Fine point: implemented with [::], and lexer will set REG_ULOCALE.
+     */
+
+    lexword(v);
+    NEXT();
+    assert(v->savenow != NULL && SEE('['));
+    bracket(v, left, right);
+    assert((v->savenow != NULL && SEE(']')) || ISERR());
+    NEXT();
+    NOERR();
+    v->wordchrs = left;
+}
+
+/*
+ - subre - allocate a subre
+ ^ static struct subre *subre(struct vars *, int, int, struct state *,
+ ^	struct state *);
+ */
+static struct subre *
+subre(
+    struct vars *v,
+    int op,
+    int flags,
+    struct state *begin,
+    struct state *end)
+{
+    struct subre *ret = v->treefree;
+
+    if (ret != NULL) {
+	v->treefree = ret->left;
+    } else {
+	ret = (struct subre *) MALLOC(sizeof(struct subre));
+	if (ret == NULL) {
+	    ERR(REG_ESPACE);
+	    return NULL;
+	}
+	ret->chain = v->treechain;
+	v->treechain = ret;
+    }
+
+    assert(strchr("|.b(=", op) != NULL);
+
+    ret->op = op;
+    ret->flags = flags;
+    ret->retry = 0;
+    ret->subno = 0;
+    ret->min = ret->max = 1;
+    ret->left = NULL;
+    ret->right = NULL;
+    ret->begin = begin;
+    ret->end = end;
+    ZAPCNFA(ret->cnfa);
+
+    return ret;
+}
+
+/*
+ - freesubre - free a subRE subtree
+ ^ static void freesubre(struct vars *, struct subre *);
+ */
+static void
+freesubre(
+    struct vars *v,		/* might be NULL */
+    struct subre *sr)
+{
+    if (sr == NULL) {
+	return;
+    }
+
+    if (sr->left != NULL) {
+	freesubre(v, sr->left);
+    }
+    if (sr->right != NULL) {
+	freesubre(v, sr->right);
+    }
+
+    freesrnode(v, sr);
+}
+
+/*
+ - freesrnode - free one node in a subRE subtree
+ ^ static void freesrnode(struct vars *, struct subre *);
+ */
+static void
+freesrnode(
+    struct vars *v,		/* might be NULL */
+    struct subre *sr)
+{
+    if (sr == NULL) {
+	return;
+    }
+
+    if (!NULLCNFA(sr->cnfa)) {
+	freecnfa(&sr->cnfa);
+    }
+    sr->flags = 0;
+
+    if (v != NULL) {
+	sr->left = v->treefree;
+	v->treefree = sr;
+    } else {
+	FREE(sr);
+    }
+}
+
+/*
+ - optst - optimize a subRE subtree
+ ^ static void optst(struct vars *, struct subre *);
+ */
+static void
+optst(
+    struct vars *v,
+    struct subre *t)
+{
+    /*
+     * DGP (2007-11-13): I assume it was the programmer's intent to eventually
+     * come back and add code to optimize subRE trees, but the routine coded
+     * just spends effort traversing the tree and doing nothing. We can do
+     * nothing with less effort.
+     */
+
+    return;
+}
+
+/*
+ - numst - number tree nodes (assigning retry indexes)
+ ^ static int numst(struct subre *, int);
+ */
+static int			/* next number */
+numst(
+    struct subre *t,
+    int start)			/* starting point for subtree numbers */
+{
+    int i;
+
+    assert(t != NULL);
+
+    i = start;
+    t->retry = (short) i++;
+    if (t->left != NULL) {
+	i = numst(t->left, i);
+    }
+    if (t->right != NULL) {
+	i = numst(t->right, i);
+    }
+    return i;
+}
+
+/*
+ - markst - mark tree nodes as INUSE
+ ^ static void markst(struct subre *);
+ */
+static void
+markst(
+    struct subre *t)
+{
+    assert(t != NULL);
+
+    t->flags |= INUSE;
+    if (t->left != NULL) {
+	markst(t->left);
+    }
+    if (t->right != NULL) {
+	markst(t->right);
+    }
+}
+
+/*
+ - cleanst - free any tree nodes not marked INUSE
+ ^ static void cleanst(struct vars *);
+ */
+static void
+cleanst(
+    struct vars *v)
+{
+    struct subre *t;
+    struct subre *next;
+
+    for (t = v->treechain; t != NULL; t = next) {
+	next = t->chain;
+	if (!(t->flags&INUSE)) {
+	    FREE(t);
+	}
+    }
+    v->treechain = NULL;
+    v->treefree = NULL;		/* just on general principles */
+}
+
+/*
+ - nfatree - turn a subRE subtree into a tree of compacted NFAs
+ ^ static long nfatree(struct vars *, struct subre *, FILE *);
+ */
+static long			/* optimize results from top node */
+nfatree(
+    struct vars *v,
+    struct subre *t,
+    FILE *f)			/* for debug output */
+{
+    assert(t != NULL && t->begin != NULL);
+
+    if (t->left != NULL) {
+	(DISCARD) nfatree(v, t->left, f);
+    }
+    if (t->right != NULL) {
+	(DISCARD) nfatree(v, t->right, f);
+    }
+
+    return nfanode(v, t, f);
+}
+
+/*
+ - nfanode - do one NFA for nfatree
+ ^ static long nfanode(struct vars *, struct subre *, FILE *);
+ */
+static long			/* optimize results */
+nfanode(
+    struct vars *v,
+    struct subre *t,
+    FILE *f)			/* for debug output */
+{
+    struct nfa *nfa;
+    long ret = 0;
+    char idbuf[50];
+
+    assert(t->begin != NULL);
+
+    if (f != NULL) {
+	fprintf(f, "\n\n\n========= TREE NODE %s ==========\n",
+		stid(t, idbuf, sizeof(idbuf)));
+    }
+    nfa = newnfa(v, v->cm, v->nfa);
+    NOERRZ();
+    dupnfa(nfa, t->begin, t->end, nfa->init, nfa->final);
+    if (!ISERR()) {
+	specialcolors(nfa);
+	ret = optimize(nfa, f);
+    }
+    if (!ISERR()) {
+	compact(nfa, &t->cnfa);
+    }
+
+    freenfa(nfa);
+    return ret;
+}
+
+/*
+ - newlacon - allocate a lookahead-constraint subRE
+ ^ static int newlacon(struct vars *, struct state *, struct state *, int);
+ */
+static int			/* lacon number */
+newlacon(
+    struct vars *v,
+    struct state *begin,
+    struct state *end,
+    int pos)
+{
+    struct subre *sub;
+    int n;
+
+    if (v->nlacons == 0) {
+	v->lacons = (struct subre *) MALLOC(2 * sizeof(struct subre));
+	n = 1;		/* skip 0th */
+	v->nlacons = 2;
+    } else {
+	v->lacons = (struct subre *) REALLOC(v->lacons,
+		(v->nlacons+1)*sizeof(struct subre));
+	n = v->nlacons++;
+    }
+
+    if (v->lacons == NULL) {
+	ERR(REG_ESPACE);
+	return 0;
+    }
+
+    sub = &v->lacons[n];
+    sub->begin = begin;
+    sub->end = end;
+    sub->subno = pos;
+    ZAPCNFA(sub->cnfa);
+    return n;
+}
+
+/*
+ - freelacons - free lookahead-constraint subRE vector
+ ^ static void freelacons(struct subre *, int);
+ */
+static void
+freelacons(
+    struct subre *subs,
+    int n)
+{
+    struct subre *sub;
+    int i;
+
+    assert(n > 0);
+    for (sub=subs+1, i=n-1; i>0; sub++, i--) {	/* no 0th */
+	if (!NULLCNFA(sub->cnfa)) {
+	    freecnfa(&sub->cnfa);
+	}
+    }
+    FREE(subs);
+}
+
+/*
+ - rfree - free a whole RE (insides of regfree)
+ ^ static void rfree(regex_t *);
+ */
+static void
+rfree(
+    regex_t *re)
+{
+    struct guts *g;
+
+    if (re == NULL || re->re_magic != REMAGIC) {
+	return;
+    }
+
+    re->re_magic = 0;	/* invalidate RE */
+    g = (struct guts *) re->re_guts;
+    re->re_guts = NULL;
+    re->re_fns = NULL;
+    g->magic = 0;
+    freecm(&g->cmap);
+    if (g->tree != NULL) {
+	freesubre(NULL, g->tree);
+    }
+    if (g->lacons != NULL) {
+	freelacons(g->lacons, g->nlacons);
+    }
+    if (!NULLCNFA(g->search)) {
+	freecnfa(&g->search);
+    }
+    FREE(g);
+}
+
+/*
+ - dump - dump an RE in human-readable form
+ ^ static void dump(regex_t *, FILE *);
+ */
+static void
+dump(
+    regex_t *re,
+    FILE *f)
+{
+#ifdef REG_DEBUG
+    struct guts *g;
+    int i;
+
+    if (re->re_magic != REMAGIC) {
+	fprintf(f, "bad magic number (0x%x not 0x%x)\n",
+		re->re_magic, REMAGIC);
+    }
+    if (re->re_guts == NULL) {
+	fprintf(f, "NULL guts!!!\n");
+	return;
+    }
+    g = (struct guts *) re->re_guts;
+    if (g->magic != GUTSMAGIC) {
+	fprintf(f, "bad guts magic number (0x%x not 0x%x)\n",
+		g->magic, GUTSMAGIC);
+    }
+
+    fprintf(f, "\n\n\n========= DUMP ==========\n");
+    fprintf(f, "nsub %d, info 0%lo, csize %d, ntree %d\n",
+	    re->re_nsub, re->re_info, re->re_csize, g->ntree);
+
+    dumpcolors(&g->cmap, f);
+    if (!NULLCNFA(g->search)) {
+	printf("\nsearch:\n");
+	dumpcnfa(&g->search, f);
+    }
+    for (i = 1; i < g->nlacons; i++) {
+	fprintf(f, "\nla%d (%s):\n", i,
+		(g->lacons[i].subno) ? "positive" : "negative");
+	dumpcnfa(&g->lacons[i].cnfa, f);
+    }
+    fprintf(f, "\n");
+    dumpst(g->tree, f, 0);
+#endif
+}
+
+/*
+ - dumpst - dump a subRE tree
+ ^ static void dumpst(struct subre *, FILE *, int);
+ */
+static void
+dumpst(
+    struct subre *t,
+    FILE *f,
+    int nfapresent)		/* is the original NFA still around? */
+{
+    if (t == NULL) {
+	fprintf(f, "null tree\n");
+    } else {
+	stdump(t, f, nfapresent);
+    }
+    fflush(f);
+}
+
+/*
+ - stdump - recursive guts of dumpst
+ ^ static void stdump(struct subre *, FILE *, int);
+ */
+static void
+stdump(
+    struct subre *t,
+    FILE *f,
+    int nfapresent)		/* is the original NFA still around? */
+{
+    char idbuf[50];
+
+    fprintf(f, "%s. `%c'", stid(t, idbuf, sizeof(idbuf)), t->op);
+    if (t->flags&LONGER) {
+	fprintf(f, " longest");
+    }
+    if (t->flags&SHORTER) {
+	fprintf(f, " shortest");
+    }
+    if (t->flags&MIXED) {
+	fprintf(f, " hasmixed");
+    }
+    if (t->flags&CAP) {
+	fprintf(f, " hascapture");
+    }
+    if (t->flags&BACKR) {
+	fprintf(f, " hasbackref");
+    }
+    if (!(t->flags&INUSE)) {
+	fprintf(f, " UNUSED");
+    }
+    if (t->subno != 0) {
+	fprintf(f, " (#%d)", t->subno);
+    }
+    if (t->min != 1 || t->max != 1) {
+	fprintf(f, " {%d,", t->min);
+	if (t->max != INFINITY) {
+	    fprintf(f, "%d", t->max);
+	}
+	fprintf(f, "}");
+    }
+    if (nfapresent) {
+	fprintf(f, " %ld-%ld", (long)t->begin->no, (long)t->end->no);
+    }
+    if (t->left != NULL) {
+	fprintf(f, " L:%s", stid(t->left, idbuf, sizeof(idbuf)));
+    }
+    if (t->right != NULL) {
+	fprintf(f, " R:%s", stid(t->right, idbuf, sizeof(idbuf)));
+    }
+    if (!NULLCNFA(t->cnfa)) {
+	fprintf(f, "\n");
+	dumpcnfa(&t->cnfa, f);
+    }
+    fprintf(f, "\n");
+    if (t->left != NULL) {
+	stdump(t->left, f, nfapresent);
+    }
+    if (t->right != NULL) {
+	stdump(t->right, f, nfapresent);
+    }
+}
+
+/*
+ - stid - identify a subtree node for dumping
+ ^ static char *stid(struct subre *, char *, size_t);
+ */
+static const char *			/* points to buf or constant string */
+stid(
+    struct subre *t,
+    char *buf,
+    size_t bufsize)
+{
+    /*
+     * Big enough for hex int or decimal t->retry?
+     */
+
+    if (bufsize < sizeof(void*)*2 + 3 || bufsize < sizeof(t->retry)*3 + 1) {
+	return "unable";
+    }
+    if (t->retry != 0) {
+	sprintf(buf, "%d", t->retry);
+    } else {
+	sprintf(buf, "%p", t);
+    }
+    return buf;
+}
+
+#include "regc_lex.c"
+#include "regc_color.c"
+#include "regc_nfa.c"
+#include "regc_cvec.c"
+#include "regc_locale.c"
+
+/*
+ * Local Variables:
+ * mode: c
+ * c-basic-offset: 4
+ * fill-column: 78
+ * End:
+ */
diff --git a/contrib/hsrex/regcustom.h b/contrib/hsrex/regcustom.h
new file mode 100644
index 0000000..c341c23
--- /dev/null
+++ b/contrib/hsrex/regcustom.h
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms - with or without
+ * modification - are permitted for any purpose, provided that redistributions
+ * in source form retain this entire copyright notice and indicate the origin
+ * and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation of
+ * software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Headers if any.
+ */
+
+#ifdef REGEX_STANDALONE
+#	include "regalone.h"
+#else
+#	include "tclInt.h"
+#endif
+
+/*
+ * Overrides for regguts.h definitions, if any.
+ */
+
+#define	FUNCPTR(name, args)	(*name)args
+#ifndef REGEX_STANDALONE
+#define	MALLOC(n)		ckalloc(n)
+#define	FREE(p)			ckfree(VS(p))
+#define	REALLOC(p,n)		ckrealloc(VS(p),n)
+#endif
+
+/*
+ * Do not insert extras between the "begin" and "end" lines - this chunk is
+ * automatically extracted to be fitted into regex.h.
+ */
+
+/* --- begin --- */
+/* Ensure certain things don't sneak in from system headers. */
+#ifdef __REG_WIDE_T
+#undef __REG_WIDE_T
+#endif
+#ifdef __REG_WIDE_COMPILE
+#undef __REG_WIDE_COMPILE
+#endif
+#ifdef __REG_WIDE_EXEC
+#undef __REG_WIDE_EXEC
+#endif
+#ifdef __REG_REGOFF_T
+#undef __REG_REGOFF_T
+#endif
+#ifdef __REG_VOID_T
+#undef __REG_VOID_T
+#endif
+#ifdef __REG_CONST
+#undef __REG_CONST
+#endif
+#ifdef __REG_NOFRONT
+#undef __REG_NOFRONT
+#endif
+#ifdef __REG_NOCHAR
+#undef __REG_NOCHAR
+#endif
+/* Interface types */
+#define	__REG_WIDE_T	Tcl_UniChar
+#define	__REG_REGOFF_T	long	/* Not really right, but good enough... */
+#define	__REG_VOID_T	void
+#define	__REG_CONST	const
+/* Names and declarations */
+#define	__REG_WIDE_COMPILE	TclReComp
+#define	__REG_WIDE_EXEC		TclReExec
+#define	__REG_NOFRONT		/* Don't want regcomp() and regexec() */
+#define	__REG_NOCHAR		/* Or the char versions */
+#define	regfree		TclReFree
+#define	regerror	TclReError
+/* --- end --- */
+
+/*
+ * Internal character type and related.
+ */
+
+#ifndef REGEX_STANDALONE
+typedef Tcl_UniChar chr;	/* The type itself. */
+#endif
+typedef int pchr;		/* What it promotes to. */
+typedef unsigned uchr;		/* Unsigned type that will hold a chr. */
+typedef int celt;		/* Type to hold chr, or NOCELT */
+#define	NOCELT (-1)		/* Celt value which is not valid chr */
+#define	CHR(c) (UCHAR(c))	/* Turn char literal into chr literal */
+#define	DIGITVAL(c) ((c)-'0')	/* Turn chr digit into its value */
+#if TCL_UTF_MAX > 3
+#define	CHRBITS	32		/* Bits in a chr; must not use sizeof */
+#define	CHR_MIN	0x00000000	/* Smallest and largest chr; the value */
+#define	CHR_MAX	0xffffffff	/* CHR_MAX-CHR_MIN+1 should fit in uchr */
+#elif defined(REGEX_STANDALONE) && ! defined(REGEX_WCHAR)
+#	define CHRBITS	8
+#	define CHR_MIN	0x00
+#	define CHR_MAX	0xff
+#else
+#define	CHRBITS	16		/* Bits in a chr; must not use sizeof */
+#define	CHR_MIN	0x0000		/* Smallest and largest chr; the value */
+#define	CHR_MAX	0xffff		/* CHR_MAX-CHR_MIN+1 should fit in uchr */
+#endif
+
+/*
+ * Functions operating on chr.
+ */
+
+#define	iscalnum(x)	Tcl_UniCharIsAlnum(x)
+#define	iscalpha(x)	Tcl_UniCharIsAlpha(x)
+#define	iscdigit(x)	Tcl_UniCharIsDigit(x)
+#define	iscspace(x)	Tcl_UniCharIsSpace(x)
+
+/*
+ * Name the external functions.
+ */
+
+#ifdef REGEX_STANDALONE
+#	ifdef REGEX_WCHAR
+#		define compile		re_wcomp
+#		define exec		re_wexec
+#		define __REG_NOCHAR
+#	else
+#		define compile		re_comp
+#		define exec		re_exec
+#		undef __REG_NOCHAR
+#	endif
+#else
+#define	compile		TclReComp
+#define	exec		TclReExec
+#endif
+
+/*
+& Enable/disable debugging code (by whether REG_DEBUG is defined or not).
+*/
+
+#if 0				/* No debug unless requested by makefile. */
+#define	REG_DEBUG	/* */
+#endif
+
+
+#ifndef REGEX_STANDALONE
+/*
+ * Method of allocating a local workspace. We used a thread-specific data
+ * space to store this because the regular expression engine is never
+ * reentered from the same thread; it doesn't make any callbacks.
+ */
+#define AllocVars(vPtr) \
+    static Tcl_ThreadDataKey varsKey; \
+    register struct vars *vPtr = (struct vars *) \
+	    Tcl_GetThreadData(&varsKey, sizeof(struct vars))
+#elif 0
+/*
+ * This strategy for allocating workspace is "more proper" in some sense, but
+ * quite a bit slower. Using TSD (as above) leads to code that is quite a bit
+ * faster in practice (measured!)
+ */
+#define AllocVars(vPtr) \
+    register struct vars *vPtr = (struct vars *) MALLOC(sizeof(struct vars))
+#define FreeVars(vPtr) \
+    FREE(vPtr)
+#endif
+
+/*
+ * And pick up the standard header.
+ */
+
+#include "regex.h"
diff --git a/contrib/hsrex/rege_dfa.c b/contrib/hsrex/rege_dfa.c
new file mode 100644
index 0000000..fbeae20
--- /dev/null
+++ b/contrib/hsrex/rege_dfa.c
@@ -0,0 +1,816 @@
+/*
+ * DFA routines
+ * This file is #included by regexec.c.
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results.  The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation
+ * of software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ - longest - longest-preferred matching engine
+ ^ static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *);
+ */
+static chr *			/* endpoint, or NULL */
+longest(
+    struct vars *v,		/* used only for debug and exec flags */
+    struct dfa *d,
+    chr *start,			/* where the match should start */
+    chr *stop,			/* match must end at or before here */
+    int *hitstopp)		/* record whether hit v->stop, if non-NULL */
+{
+    chr *cp;
+    chr *realstop = (stop == v->stop) ? stop : stop + 1;
+    color co;
+    struct sset *css;
+    struct sset *ss;
+    chr *post;
+    int i;
+    struct colormap *cm = d->cm;
+
+    /*
+     * Initialize.
+     */
+
+    css = initialize(v, d, start);
+    cp = start;
+    if (hitstopp != NULL) {
+	*hitstopp = 0;
+    }
+
+    /*
+     * Startup.
+     */
+
+    FDEBUG(("+++ startup +++\n"));
+    if (cp == v->start) {
+	co = d->cnfa->bos[(v->eflags&REG_NOTBOL) ? 0 : 1];
+	FDEBUG(("color %ld\n", (long)co));
+    } else {
+	co = GETCOLOR(cm, *(cp - 1));
+	FDEBUG(("char %c, color %ld\n", (char)*(cp-1), (long)co));
+    }
+    css = miss(v, d, css, co, cp, start);
+    if (css == NULL) {
+	return NULL;
+    }
+    css->lastseen = cp;
+
+    /*
+     * Main loop.
+     */
+
+    if (v->eflags&REG_FTRACE) {
+	while (cp < realstop) {
+	    FDEBUG(("+++ at c%d +++\n", css - d->ssets));
+	    co = GETCOLOR(cm, *cp);
+	    FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co));
+	    ss = css->outs[co];
+	    if (ss == NULL) {
+		ss = miss(v, d, css, co, cp+1, start);
+		if (ss == NULL) {
+		    break;	/* NOTE BREAK OUT */
+		}
+	    }
+	    cp++;
+	    ss->lastseen = cp;
+	    css = ss;
+	}
+    } else {
+	while (cp < realstop) {
+	    co = GETCOLOR(cm, *cp);
+	    ss = css->outs[co];
+	    if (ss == NULL) {
+		ss = miss(v, d, css, co, cp+1, start);
+		if (ss == NULL) {
+		    break;	/* NOTE BREAK OUT */
+		}
+	    }
+	    cp++;
+	    ss->lastseen = cp;
+	    css = ss;
+	}
+    }
+
+    /*
+     * Shutdown.
+     */
+
+    FDEBUG(("+++ shutdown at c%d +++\n", css - d->ssets));
+    if (cp == v->stop && stop == v->stop) {
+	if (hitstopp != NULL) {
+	    *hitstopp = 1;
+	}
+	co = d->cnfa->eos[(v->eflags&REG_NOTEOL) ? 0 : 1];
+	FDEBUG(("color %ld\n", (long)co));
+	ss = miss(v, d, css, co, cp, start);
+
+	/*
+	 * Special case: match ended at eol?
+	 */
+
+	if (ss != NULL && (ss->flags&POSTSTATE)) {
+	    return cp;
+	} else if (ss != NULL) {
+	    ss->lastseen = cp;	/* to be tidy */
+	}
+    }
+
+    /*
+     * Find last match, if any.
+     */
+
+    post = d->lastpost;
+    for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--) {
+	if ((ss->flags&POSTSTATE) && (post != ss->lastseen) &&
+		(post == NULL || post < ss->lastseen)) {
+	    post = ss->lastseen;
+	}
+    }
+    if (post != NULL) {		/* found one */
+	return post - 1;
+    }
+
+    return NULL;
+}
+
+/*
+ - shortest - shortest-preferred matching engine
+ ^ static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *,
+ ^ 	chr **, int *);
+ */
+static chr *			/* endpoint, or NULL */
+shortest(
+    struct vars *v,
+    struct dfa *d,
+    chr *start,			/* where the match should start */
+    chr *min,			/* match must end at or after here */
+    chr *max,			/* match must end at or before here */
+    chr **coldp,		/* store coldstart pointer here, if nonNULL */
+    int *hitstopp)		/* record whether hit v->stop, if non-NULL */
+{
+    chr *cp;
+    chr *realmin = (min == v->stop) ? min : min + 1;
+    chr *realmax = (max == v->stop) ? max : max + 1;
+    color co;
+    struct sset *css;
+    struct sset *ss;
+    struct colormap *cm = d->cm;
+
+    /*
+     * Initialize.
+     */
+
+    css = initialize(v, d, start);
+    cp = start;
+    if (hitstopp != NULL) {
+	*hitstopp = 0;
+    }
+
+    /*
+     * Startup.
+     */
+
+    FDEBUG(("--- startup ---\n"));
+    if (cp == v->start) {
+	co = d->cnfa->bos[(v->eflags&REG_NOTBOL) ? 0 : 1];
+	FDEBUG(("color %ld\n", (long)co));
+    } else {
+	co = GETCOLOR(cm, *(cp - 1));
+	FDEBUG(("char %c, color %ld\n", (char)*(cp-1), (long)co));
+    }
+    css = miss(v, d, css, co, cp, start);
+    if (css == NULL) {
+	return NULL;
+    }
+    css->lastseen = cp;
+    ss = css;
+
+    /*
+     * Main loop.
+     */
+
+    if (v->eflags&REG_FTRACE) {
+	while (cp < realmax) {
+	    FDEBUG(("--- at c%d ---\n", css - d->ssets));
+	    co = GETCOLOR(cm, *cp);
+	    FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co));
+	    ss = css->outs[co];
+	    if (ss == NULL) {
+		ss = miss(v, d, css, co, cp+1, start);
+		if (ss == NULL) {
+		    break;	/* NOTE BREAK OUT */
+		}
+	    }
+	    cp++;
+	    ss->lastseen = cp;
+	    css = ss;
+	    if ((ss->flags&POSTSTATE) && cp >= realmin) {
+		break;		/* NOTE BREAK OUT */
+	    }
+	}
+    } else {
+	while (cp < realmax) {
+	    co = GETCOLOR(cm, *cp);
+	    ss = css->outs[co];
+	    if (ss == NULL) {
+		ss = miss(v, d, css, co, cp+1, start);
+		if (ss == NULL) {
+		    break;	/* NOTE BREAK OUT */
+		}
+	    }
+	    cp++;
+	    ss->lastseen = cp;
+	    css = ss;
+	    if ((ss->flags&POSTSTATE) && cp >= realmin) {
+		break;		/* NOTE BREAK OUT */
+	    }
+	}
+    }
+
+    if (ss == NULL) {
+	return NULL;
+    }
+
+    if (coldp != NULL) {	/* report last no-progress state set, if any */
+	*coldp = lastcold(v, d);
+    }
+
+    if ((ss->flags&POSTSTATE) && cp > min) {
+	assert(cp >= realmin);
+	cp--;
+    } else if (cp == v->stop && max == v->stop) {
+	co = d->cnfa->eos[(v->eflags&REG_NOTEOL) ? 0 : 1];
+	FDEBUG(("color %ld\n", (long)co));
+	ss = miss(v, d, css, co, cp, start);
+
+	/*
+	 * Match might have ended at eol.
+	 */
+
+	if ((ss == NULL || !(ss->flags&POSTSTATE)) && hitstopp != NULL) {
+	    *hitstopp = 1;
+	}
+    }
+
+    if (ss == NULL || !(ss->flags&POSTSTATE)) {
+	return NULL;
+    }
+
+    return cp;
+}
+
+/*
+ - lastcold - determine last point at which no progress had been made
+ ^ static chr *lastcold(struct vars *, struct dfa *);
+ */
+static chr *			/* endpoint, or NULL */
+lastcold(
+    struct vars *v,
+    struct dfa *d)
+{
+    struct sset *ss;
+    chr *nopr;
+    int i;
+
+    nopr = d->lastnopr;
+    if (nopr == NULL) {
+	nopr = v->start;
+    }
+    for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--) {
+	if ((ss->flags&NOPROGRESS) && nopr < ss->lastseen) {
+	    nopr = ss->lastseen;
+	}
+    }
+    return nopr;
+}
+
+/*
+ - newdfa - set up a fresh DFA
+ ^ static struct dfa *newdfa(struct vars *, struct cnfa *,
+ ^ 	struct colormap *, struct smalldfa *);
+ */
+static struct dfa *
+newdfa(
+    struct vars *v,
+    struct cnfa *cnfa,
+    struct colormap *cm,
+    struct smalldfa *small)	/* preallocated space, may be NULL */
+{
+    struct dfa *d;
+    size_t nss = cnfa->nstates * 2;
+    int wordsper = (cnfa->nstates + UBITS - 1) / UBITS;
+    struct smalldfa *smallwas = small;
+
+    assert(cnfa != NULL && cnfa->nstates != 0);
+
+    if (nss <= FEWSTATES && cnfa->ncolors <= FEWCOLORS) {
+	assert(wordsper == 1);
+	if (small == NULL) {
+	    small = (struct smalldfa *) MALLOC(sizeof(struct smalldfa));
+	    if (small == NULL) {
+		ERR(REG_ESPACE);
+		return NULL;
+	    }
+	}
+	d = &small->dfa;
+	d->ssets = small->ssets;
+	d->statesarea = small->statesarea;
+	d->work = &d->statesarea[nss];
+	d->outsarea = small->outsarea;
+	d->incarea = small->incarea;
+	d->cptsmalloced = 0;
+	d->mallocarea = (smallwas == NULL) ? (char *)small : NULL;
+    } else {
+	d = (struct dfa *)MALLOC(sizeof(struct dfa));
+	if (d == NULL) {
+	    ERR(REG_ESPACE);
+	    return NULL;
+	}
+	d->ssets = (struct sset *)MALLOC(nss * sizeof(struct sset));
+	d->statesarea = (unsigned *)
+		MALLOC((nss+WORK) * wordsper * sizeof(unsigned));
+	d->work = &d->statesarea[nss * wordsper];
+	d->outsarea = (struct sset **)
+		MALLOC(nss * cnfa->ncolors * sizeof(struct sset *));
+	d->incarea = (struct arcp *)
+		MALLOC(nss * cnfa->ncolors * sizeof(struct arcp));
+	d->cptsmalloced = 1;
+	d->mallocarea = (char *)d;
+	if (d->ssets == NULL || d->statesarea == NULL ||
+		d->outsarea == NULL || d->incarea == NULL) {
+	    freedfa(d);
+	    ERR(REG_ESPACE);
+	    return NULL;
+	}
+    }
+
+    d->nssets = (v->eflags&REG_SMALL) ? 7 : nss;
+    d->nssused = 0;
+    d->nstates = cnfa->nstates;
+    d->ncolors = cnfa->ncolors;
+    d->wordsper = wordsper;
+    d->cnfa = cnfa;
+    d->cm = cm;
+    d->lastpost = NULL;
+    d->lastnopr = NULL;
+    d->search = d->ssets;
+
+    /*
+     * Initialization of sset fields is done as needed.
+     */
+
+    return d;
+}
+
+/*
+ - freedfa - free a DFA
+ ^ static void freedfa(struct dfa *);
+ */
+static void
+freedfa(
+    struct dfa *d)
+{
+    if (d->cptsmalloced) {
+	if (d->ssets != NULL) {
+	    FREE(d->ssets);
+	}
+	if (d->statesarea != NULL) {
+	    FREE(d->statesarea);
+	}
+	if (d->outsarea != NULL) {
+	    FREE(d->outsarea);
+	}
+	if (d->incarea != NULL) {
+	    FREE(d->incarea);
+	}
+    }
+
+    if (d->mallocarea != NULL) {
+	FREE(d->mallocarea);
+    }
+}
+
+/*
+ - hash - construct a hash code for a bitvector
+ * There are probably better ways, but they're more expensive.
+ ^ static unsigned hash(unsigned *, int);
+ */
+static unsigned
+hash(
+    unsigned *uv,
+    int n)
+{
+    int i;
+    unsigned h;
+
+    h = 0;
+    for (i = 0; i < n; i++) {
+	h ^= uv[i];
+    }
+    return h;
+}
+
+/*
+ - initialize - hand-craft a cache entry for startup, otherwise get ready
+ ^ static struct sset *initialize(struct vars *, struct dfa *, chr *);
+ */
+static struct sset *
+initialize(
+    struct vars *v,		/* used only for debug flags */
+    struct dfa *d,
+    chr *start)
+{
+    struct sset *ss;
+    int i;
+
+    /*
+     * Is previous one still there?
+     */
+
+    if (d->nssused > 0 && (d->ssets[0].flags&STARTER)) {
+	ss = &d->ssets[0];
+    } else {			/* no, must (re)build it */
+	ss = getvacant(v, d, start, start);
+	for (i = 0; i < d->wordsper; i++) {
+	    ss->states[i] = 0;
+	}
+	BSET(ss->states, d->cnfa->pre);
+	ss->hash = HASH(ss->states, d->wordsper);
+	assert(d->cnfa->pre != d->cnfa->post);
+	ss->flags = STARTER|LOCKED|NOPROGRESS;
+
+	/*
+	 * lastseen dealt with below
+	 */
+    }
+
+    for (i = 0; i < d->nssused; i++) {
+	d->ssets[i].lastseen = NULL;
+    }
+    ss->lastseen = start;	/* maybe untrue, but harmless */
+    d->lastpost = NULL;
+    d->lastnopr = NULL;
+    return ss;
+}
+
+/*
+ - miss - handle a cache miss
+ ^ static struct sset *miss(struct vars *, struct dfa *, struct sset *,
+ ^ 	pcolor, chr *, chr *);
+ */
+static struct sset *		/* NULL if goes to empty set */
+miss(
+    struct vars *v,		/* used only for debug flags */
+    struct dfa *d,
+    struct sset *css,
+    pcolor co,
+    chr *cp,			/* next chr */
+    chr *start)			/* where the attempt got started */
+{
+    struct cnfa *cnfa = d->cnfa;
+    int i;
+    unsigned h;
+    struct carc *ca;
+    struct sset *p;
+    int ispost;
+    int noprogress;
+    int gotstate;
+    int dolacons;
+    int sawlacons;
+
+    /*
+     * For convenience, we can be called even if it might not be a miss.
+     */
+
+    if (css->outs[co] != NULL) {
+	FDEBUG(("hit\n"));
+	return css->outs[co];
+    }
+    FDEBUG(("miss\n"));
+
+    /*
+     * First, what set of states would we end up in?
+     */
+
+    for (i = 0; i < d->wordsper; i++) {
+	d->work[i] = 0;
+    }
+    ispost = 0;
+    noprogress = 1;
+    gotstate = 0;
+    for (i = 0; i < d->nstates; i++) {
+	if (ISBSET(css->states, i)) {
+	    for (ca = cnfa->states[i]+1; ca->co != COLORLESS; ca++) {
+		if (ca->co == co) {
+		    BSET(d->work, ca->to);
+		    gotstate = 1;
+		    if (ca->to == cnfa->post) {
+			ispost = 1;
+		    }
+		    if (!cnfa->states[ca->to]->co) {
+			noprogress = 0;
+		    }
+		    FDEBUG(("%d -> %d\n", i, ca->to));
+		}
+	    }
+	}
+    }
+    dolacons = (gotstate) ? (cnfa->flags&HASLACONS) : 0;
+    sawlacons = 0;
+    while (dolacons) {		/* transitive closure */
+	dolacons = 0;
+	for (i = 0; i < d->nstates; i++) {
+	    if (ISBSET(d->work, i)) {
+		for (ca = cnfa->states[i]+1; ca->co != COLORLESS; ca++) {
+		    if (ca->co <= cnfa->ncolors) {
+			continue; /* NOTE CONTINUE */
+		    }
+		    sawlacons = 1;
+		    if (ISBSET(d->work, ca->to)) {
+			continue; /* NOTE CONTINUE */
+		    }
+		    if (!lacon(v, cnfa, cp, ca->co)) {
+			continue; /* NOTE CONTINUE */
+		    }
+		    BSET(d->work, ca->to);
+		    dolacons = 1;
+		    if (ca->to == cnfa->post) {
+			ispost = 1;
+		    }
+		    if (!cnfa->states[ca->to]->co) {
+			noprogress = 0;
+		    }
+		    FDEBUG(("%d :> %d\n", i, ca->to));
+		}
+	    }
+	}
+    }
+    if (!gotstate) {
+	return NULL;
+    }
+    h = HASH(d->work, d->wordsper);
+
+    /*
+     * Next, is that in the cache?
+     */
+
+    for (p = d->ssets, i = d->nssused; i > 0; p++, i--) {
+	 if (HIT(h, d->work, p, d->wordsper)) {
+	     FDEBUG(("cached c%d\n", p - d->ssets));
+	     break;			/* NOTE BREAK OUT */
+	 }
+    }
+    if (i == 0) {		/* nope, need a new cache entry */
+	p = getvacant(v, d, cp, start);
+	assert(p != css);
+	for (i = 0; i < d->wordsper; i++) {
+	    p->states[i] = d->work[i];
+	}
+	p->hash = h;
+	p->flags = (ispost) ? POSTSTATE : 0;
+	if (noprogress) {
+	    p->flags |= NOPROGRESS;
+	}
+
+	/*
+	 * lastseen to be dealt with by caller
+	 */
+    }
+
+    if (!sawlacons) {		/* lookahead conds. always cache miss */
+	FDEBUG(("c%d[%d]->c%d\n", css - d->ssets, co, p - d->ssets));
+	css->outs[co] = p;
+	css->inchain[co] = p->ins;
+	p->ins.ss = css;
+	p->ins.co = (color)co;
+    }
+    return p;
+}
+
+/*
+ - lacon - lookahead-constraint checker for miss()
+ ^ static int lacon(struct vars *, struct cnfa *, chr *, pcolor);
+ */
+static int			/* predicate:  constraint satisfied? */
+lacon(
+    struct vars *v,
+    struct cnfa *pcnfa,		/* parent cnfa */
+    chr *cp,
+    pcolor co)			/* "color" of the lookahead constraint */
+{
+    int n;
+    struct subre *sub;
+    struct dfa *d;
+    struct smalldfa sd;
+    chr *end;
+
+    n = co - pcnfa->ncolors;
+    assert(n < v->g->nlacons && v->g->lacons != NULL);
+    FDEBUG(("=== testing lacon %d\n", n));
+    sub = &v->g->lacons[n];
+    d = newdfa(v, &sub->cnfa, &v->g->cmap, &sd);
+    if (d == NULL) {
+	ERR(REG_ESPACE);
+	return 0;
+    }
+    end = longest(v, d, cp, v->stop, (int *)NULL);
+    freedfa(d);
+    FDEBUG(("=== lacon %d match %d\n", n, (end != NULL)));
+    return (sub->subno) ? (end != NULL) : (end == NULL);
+}
+
+/*
+ - getvacant - get a vacant state set
+ * This routine clears out the inarcs and outarcs, but does not otherwise
+ * clear the innards of the state set -- that's up to the caller.
+ ^ static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *);
+ */
+static struct sset *
+getvacant(
+    struct vars *v,		/* used only for debug flags */
+    struct dfa *d,
+    chr *cp,
+    chr *start)
+{
+    int i;
+    struct sset *ss;
+    struct sset *p;
+    struct arcp ap;
+    struct arcp lastap = {NULL, 0}; /* silence gcc 4 warning */
+    color co;
+
+    ss = pickss(v, d, cp, start);
+    assert(!(ss->flags&LOCKED));
+
+    /*
+     * Clear out its inarcs, including self-referential ones.
+     */
+
+    ap = ss->ins;
+    while ((p = ap.ss) != NULL) {
+	co = ap.co;
+	FDEBUG(("zapping c%d's %ld outarc\n", p - d->ssets, (long)co));
+	p->outs[co] = NULL;
+	ap = p->inchain[co];
+	p->inchain[co].ss = NULL; /* paranoia */
+    }
+    ss->ins.ss = NULL;
+
+    /*
+     * Take it off the inarc chains of the ssets reached by its outarcs.
+     */
+
+    for (i = 0; i < d->ncolors; i++) {
+	p = ss->outs[i];
+	assert(p != ss);	/* not self-referential */
+	if (p == NULL) {
+	    continue;		/* NOTE CONTINUE */
+	}
+	FDEBUG(("del outarc %d from c%d's in chn\n", i, p - d->ssets));
+	if (p->ins.ss == ss && p->ins.co == i) {
+	    p->ins = ss->inchain[i];
+	} else {
+	    assert(p->ins.ss != NULL);
+	    for (ap = p->ins; ap.ss != NULL &&
+		    !(ap.ss == ss && ap.co == i);
+		    ap = ap.ss->inchain[ap.co]) {
+		lastap = ap;
+	    }
+	    assert(ap.ss != NULL);
+	    lastap.ss->inchain[lastap.co] = ss->inchain[i];
+	}
+	ss->outs[i] = NULL;
+	ss->inchain[i].ss = NULL;
+    }
+
+    /*
+     * If ss was a success state, may need to remember location.
+     */
+
+    if ((ss->flags&POSTSTATE) && ss->lastseen != d->lastpost &&
+	    (d->lastpost == NULL || d->lastpost < ss->lastseen)) {
+	d->lastpost = ss->lastseen;
+    }
+
+    /*
+     * Likewise for a no-progress state.
+     */
+
+    if ((ss->flags&NOPROGRESS) && ss->lastseen != d->lastnopr &&
+	    (d->lastnopr == NULL || d->lastnopr < ss->lastseen)) {
+	d->lastnopr = ss->lastseen;
+    }
+
+    return ss;
+}
+
+/*
+ - pickss - pick the next stateset to be used
+ ^ static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *);
+ */
+static struct sset *
+pickss(
+    struct vars *v,		/* used only for debug flags */
+    struct dfa *d,
+    chr *cp,
+    chr *start)
+{
+    int i;
+    struct sset *ss;
+    struct sset *end;
+    chr *ancient;
+
+    /*
+     * Shortcut for cases where cache isn't full.
+     */
+
+    if (d->nssused < d->nssets) {
+	i = d->nssused;
+	d->nssused++;
+	ss = &d->ssets[i];
+	FDEBUG(("new c%d\n", i));
+
+	/*
+	 * Set up innards.
+	 */
+
+	ss->states = &d->statesarea[i * d->wordsper];
+	ss->flags = 0;
+	ss->ins.ss = NULL;
+	ss->ins.co = WHITE;	/* give it some value */
+	ss->outs = &d->outsarea[i * d->ncolors];
+	ss->inchain = &d->incarea[i * d->ncolors];
+	for (i = 0; i < d->ncolors; i++) {
+	    ss->outs[i] = NULL;
+	    ss->inchain[i].ss = NULL;
+	}
+	return ss;
+    }
+
+    /*
+     * Look for oldest, or old enough anyway.
+     */
+
+    if (cp - start > d->nssets*2/3) {	/* oldest 33% are expendable */
+	ancient = cp - d->nssets*2/3;
+    } else {
+	ancient = start;
+    }
+    for (ss = d->search, end = &d->ssets[d->nssets]; ss < end; ss++) {
+	if ((ss->lastseen == NULL || ss->lastseen < ancient)
+		&& !(ss->flags&LOCKED)) {
+	    d->search = ss + 1;
+	    FDEBUG(("replacing c%d\n", ss - d->ssets));
+	    return ss;
+	}
+    }
+    for (ss = d->ssets, end = d->search; ss < end; ss++) {
+	if ((ss->lastseen == NULL || ss->lastseen < ancient)
+		&& !(ss->flags&LOCKED)) {
+	    d->search = ss + 1;
+	    FDEBUG(("replacing c%d\n", ss - d->ssets));
+	    return ss;
+	}
+    }
+
+    /*
+     * Nobody's old enough?!? -- something's really wrong.
+     */
+
+    FDEBUG(("can't find victim to replace!\n"));
+    assert(NOTREACHED);
+    ERR(REG_ASSERT);
+    return d->ssets;
+}
+
+/*
+ * Local Variables:
+ * mode: c
+ * c-basic-offset: 4
+ * fill-column: 78
+ * End:
+ */
diff --git a/contrib/hsrex/regerror.c b/contrib/hsrex/regerror.c
new file mode 100644
index 0000000..49b6f3e
--- /dev/null
+++ b/contrib/hsrex/regerror.c
@@ -0,0 +1,129 @@
+/*
+ * regerror - error-code expansion
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results.  The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation of
+ * software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "regguts.h"
+
+/*
+ * Unknown-error explanation.
+ */
+
+static char unk[] = "*** unknown regex error code 0x%x ***";
+
+/*
+ * Struct to map among codes, code names, and explanations.
+ */
+
+static struct rerr {
+    int code;
+    const char *name;
+    const char *explain;
+} rerrs[] = {
+    /* The actual table is built from regex.h */
+#include "regerrs.h"
+    { -1, "", "oops" },		/* explanation special-cased in code */
+};
+
+/*
+ - regerror - the interface to error numbers
+ */
+/* ARGSUSED */
+size_t				/* Actual space needed (including NUL) */
+regerror(
+    int code,			/* Error code, or REG_ATOI or REG_ITOA */
+    const regex_t *preg,	/* Associated regex_t (unused at present) */
+    char *errbuf,		/* Result buffer (unless errbuf_size==0) */
+    size_t errbuf_size)		/* Available space in errbuf, can be 0 */
+{
+    struct rerr *r;
+    const char *msg;
+    char convbuf[sizeof(unk)+50]; /* 50 = plenty for int */
+    size_t len;
+    int icode;
+
+    switch (code) {
+    case REG_ATOI:		/* Convert name to number */
+	for (r = rerrs; r->code >= 0; r++) {
+	    if (strcmp(r->name, errbuf) == 0) {
+		break;
+	    }
+	}
+	sprintf(convbuf, "%d", r->code); /* -1 for unknown */
+	msg = convbuf;
+	break;
+    case REG_ITOA:		/* Convert number to name */
+	icode = atoi(errbuf);	/* Not our problem if this fails */
+	for (r = rerrs; r->code >= 0; r++) {
+	    if (r->code == icode) {
+		break;
+	    }
+	}
+	if (r->code >= 0) {
+	    msg = r->name;
+	} else {		/* Unknown; tell him the number */
+	    sprintf(convbuf, "REG_%u", (unsigned)icode);
+	    msg = convbuf;
+	}
+	break;
+    default:			/* A real, normal error code */
+	for (r = rerrs; r->code >= 0; r++) {
+	    if (r->code == code) {
+		break;
+	    }
+	}
+	if (r->code >= 0) {
+	    msg = r->explain;
+	} else {		/* Unknown; say so */
+	    sprintf(convbuf, unk, code);
+	    msg = convbuf;
+	}
+	break;
+    }
+
+    len = strlen(msg) + 1;	/* Space needed, including NUL */
+    if (errbuf_size > 0) {
+	if (errbuf_size > len) {
+	    strcpy(errbuf, msg);
+	} else {		/* Truncate to fit */
+	    strncpy(errbuf, msg, errbuf_size-1);
+	    errbuf[errbuf_size-1] = '\0';
+	}
+    }
+
+    return len;
+}
+
+/*
+ * Local Variables:
+ * mode: c
+ * c-basic-offset: 4
+ * fill-column: 78
+ * End:
+ */
diff --git a/contrib/hsrex/regerrs.h b/contrib/hsrex/regerrs.h
new file mode 100644
index 0000000..259c0cb
--- /dev/null
+++ b/contrib/hsrex/regerrs.h
@@ -0,0 +1,19 @@
+{ REG_OKAY,	"REG_OKAY",	"no errors detected" },
+{ REG_NOMATCH,	"REG_NOMATCH",	"failed to match" },
+{ REG_BADPAT,	"REG_BADPAT",	"invalid regexp (reg version 0.8)" },
+{ REG_ECOLLATE,	"REG_ECOLLATE",	"invalid collating element" },
+{ REG_ECTYPE,	"REG_ECTYPE",	"invalid character class" },
+{ REG_EESCAPE,	"REG_EESCAPE",	"invalid escape \\ sequence" },
+{ REG_ESUBREG,	"REG_ESUBREG",	"invalid backreference number" },
+{ REG_EBRACK,	"REG_EBRACK",	"brackets [] not balanced" },
+{ REG_EPAREN,	"REG_EPAREN",	"parentheses () not balanced" },
+{ REG_EBRACE,	"REG_EBRACE",	"braces {} not balanced" },
+{ REG_BADBR,	"REG_BADBR",	"invalid repetition count(s)" },
+{ REG_ERANGE,	"REG_ERANGE",	"invalid character range" },
+{ REG_ESPACE,	"REG_ESPACE",	"out of memory" },
+{ REG_BADRPT,	"REG_BADRPT",	"quantifier operand invalid" },
+{ REG_ASSERT,	"REG_ASSERT",	"\"can't happen\" -- you found a bug" },
+{ REG_INVARG,	"REG_INVARG",	"invalid argument to regex function" },
+{ REG_MIXED,	"REG_MIXED",	"character widths of regex and string differ" },
+{ REG_BADOPT,	"REG_BADOPT",	"invalid embedded option" },
+{ REG_ETOOBIG,	"REG_ETOOBIG",	"nfa has too many states" },
diff --git a/contrib/hsrex/regex.h b/contrib/hsrex/regex.h
new file mode 100644
index 0000000..2ef538a
--- /dev/null
+++ b/contrib/hsrex/regex.h
@@ -0,0 +1,336 @@
+#ifndef _REGEX_H_
+#define	_REGEX_H_	/* never again */
+/*
+ * regular expressions
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation of
+ * software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * Prototypes etc. marked with "^" within comments get gathered up (and
+ * possibly edited) by the regfwd program and inserted near the bottom of this
+ * file.
+ *
+ * We offer the option of declaring one wide-character version of the RE
+ * functions as well as the char versions. To do that, define __REG_WIDE_T to
+ * the type of wide characters (unfortunately, there is no consensus that
+ * wchar_t is suitable) and __REG_WIDE_COMPILE and __REG_WIDE_EXEC to the
+ * names to be used for the compile and execute functions (suggestion:
+ * re_Xcomp and re_Xexec, where X is a letter suggestive of the wide type,
+ * e.g. re_ucomp and re_uexec for Unicode). For cranky old compilers, it may
+ * be necessary to do something like:
+ * #define	__REG_WIDE_COMPILE(a,b,c,d)	re_Xcomp(a,b,c,d)
+ * #define	__REG_WIDE_EXEC(a,b,c,d,e,f,g)	re_Xexec(a,b,c,d,e,f,g)
+ * rather than just #defining the names as parameterless macros.
+ *
+ * For some specialized purposes, it may be desirable to suppress the
+ * declarations of the "front end" functions, regcomp() and regexec(), or of
+ * the char versions of the compile and execute functions. To suppress the
+ * front-end functions, define __REG_NOFRONT. To suppress the char versions,
+ * define __REG_NOCHAR.
+ *
+ * The right place to do those defines (and some others you may want, see
+ * below) would be <sys/types.h>. If you don't have control of that file, the
+ * right place to add your own defines to this file is marked below. This is
+ * normally done automatically, by the makefile and regmkhdr, based on the
+ * contents of regcustom.h.
+ */
+
+/*
+ * voodoo for C++
+ */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Add your own defines, if needed, here.
+ */
+
+/*
+ * Location where a chunk of regcustom.h is automatically spliced into this
+ * file (working from its prototype, regproto.h).
+ */
+
+/* --- begin --- */
+/* ensure certain things don't sneak in from system headers */
+#ifdef __REG_WIDE_T
+#undef __REG_WIDE_T
+#endif
+#ifdef __REG_WIDE_COMPILE
+#undef __REG_WIDE_COMPILE
+#endif
+#ifdef __REG_WIDE_EXEC
+#undef __REG_WIDE_EXEC
+#endif
+#ifdef __REG_REGOFF_T
+#undef __REG_REGOFF_T
+#endif
+#ifdef __REG_VOID_T
+#undef __REG_VOID_T
+#endif
+#ifdef __REG_CONST
+#undef __REG_CONST
+#endif
+#ifdef __REG_NOFRONT
+#undef __REG_NOFRONT
+#endif
+#ifdef __REG_NOCHAR
+#undef __REG_NOCHAR
+#endif
+/* interface types */
+#define	__REG_WIDE_T	Tcl_UniChar
+#define	__REG_REGOFF_T	long	/* not really right, but good enough... */
+#define	__REG_VOID_T	void
+#define	__REG_CONST	const
+/* names and declarations */
+#define	__REG_WIDE_COMPILE	TclReComp
+#define	__REG_WIDE_EXEC		TclReExec
+#define	__REG_NOFRONT		/* don't want regcomp() and regexec() */
+#define	__REG_NOCHAR		/* or the char versions */
+#define	regfree		TclReFree
+#define	regerror	TclReError
+/* --- end --- */
+#ifdef REGEX_STANDALONE
+#	undef		regfree
+#	undef		regerror
+#	define regfree	re_free
+#	define regerror	re_error
+#	undef __REG_WIDE_T
+#	define __REG_WIDE_T		wchar_t
+#	undef __REG_WIDE_COMPILE
+#	define __REG_WIDE_COMPILE	re_wcomp
+#	undef __REG_WIDE_EXEC
+#	define __REG_WIDE_EXEC		re_wexec
+#	ifndef REGEX_WCHAR
+#		undef __REG_NOCHAR
+#	endif
+#endif
+
+/*
+ * interface types etc.
+ */
+
+/*
+ * regoff_t has to be large enough to hold either off_t or ssize_t, and must
+ * be signed; it's only a guess that long is suitable, so we offer
+ * <sys/types.h> an override.
+ */
+#ifdef __REG_REGOFF_T
+typedef __REG_REGOFF_T regoff_t;
+#else
+typedef long regoff_t;
+#endif
+
+/*
+ * For benefit of old compilers, we offer <sys/types.h> the option of
+ * overriding the `void' type used to declare nonexistent return types.
+ */
+#ifdef __REG_VOID_T
+typedef __REG_VOID_T re_void;
+#else
+typedef void re_void;
+#endif
+
+/*
+ * Also for benefit of old compilers, <sys/types.h> can supply a macro which
+ * expands to a substitute for `const'.
+ */
+#ifndef __REG_CONST
+#define	__REG_CONST	const
+#endif
+
+
+
+/*
+ * other interface types
+ */
+
+/* the biggie, a compiled RE (or rather, a front end to same) */
+typedef struct {
+    int re_magic;		/* magic number */
+    size_t re_nsub;		/* number of subexpressions */
+    long re_info;		/* information about RE */
+#define	REG_UBACKREF		000001
+#define	REG_ULOOKAHEAD		000002
+#define	REG_UBOUNDS		000004
+#define	REG_UBRACES		000010
+#define	REG_UBSALNUM		000020
+#define	REG_UPBOTCH		000040
+#define	REG_UBBS		000100
+#define	REG_UNONPOSIX		000200
+#define	REG_UUNSPEC		000400
+#define	REG_UUNPORT		001000
+#define	REG_ULOCALE		002000
+#define	REG_UEMPTYMATCH		004000
+#define	REG_UIMPOSSIBLE		010000
+#define	REG_USHORTEST		020000
+    int re_csize;		/* sizeof(character) */
+    char *re_endp;		/* backward compatibility kludge */
+    /* the rest is opaque pointers to hidden innards */
+    char *re_guts;		/* `char *' is more portable than `void *' */
+    char *re_fns;
+} regex_t;
+
+/* result reporting (may acquire more fields later) */
+typedef struct {
+    regoff_t rm_so;		/* start of substring */
+    regoff_t rm_eo;		/* end of substring */
+} regmatch_t;
+
+/* supplementary control and reporting */
+typedef struct {
+    regmatch_t rm_extend;	/* see REG_EXPECT */
+} rm_detail_t;
+
+/*
+ * compilation
+ ^ #ifndef __REG_NOCHAR
+ ^ int re_comp(regex_t *, __REG_CONST char *, size_t, int);
+ ^ #endif
+ ^ #ifndef __REG_NOFRONT
+ ^ int regcomp(regex_t *, __REG_CONST char *, int);
+ ^ #endif
+ ^ #ifdef __REG_WIDE_T
+ ^ int __REG_WIDE_COMPILE(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int);
+ ^ #endif
+ */
+#define	REG_BASIC	000000	/* BREs (convenience) */
+#define	REG_EXTENDED	000001	/* EREs */
+#define	REG_ADVF	000002	/* advanced features in EREs */
+#define	REG_ADVANCED	000003	/* AREs (which are also EREs) */
+#define	REG_QUOTE	000004	/* no special characters, none */
+#define	REG_NOSPEC	REG_QUOTE	/* historical synonym */
+#define	REG_ICASE	000010	/* ignore case */
+#define	REG_NOSUB	000020	/* don't care about subexpressions */
+#define	REG_EXPANDED	000040	/* expanded format, white space & comments */
+#define	REG_NLSTOP	000100	/* \n doesn't match . or [^ ] */
+#define	REG_NLANCH	000200	/* ^ matches after \n, $ before */
+#define	REG_NEWLINE	000300	/* newlines are line terminators */
+#define	REG_PEND	000400	/* ugh -- backward-compatibility hack */
+#define	REG_EXPECT	001000	/* report details on partial/limited matches */
+#define	REG_BOSONLY	002000	/* temporary kludge for BOS-only matches */
+#define	REG_DUMP	004000	/* none of your business :-) */
+#define	REG_FAKE	010000	/* none of your business :-) */
+#define	REG_PROGRESS	020000	/* none of your business :-) */
+
+/*
+ * execution
+ ^ #ifndef __REG_NOCHAR
+ ^ int re_exec(regex_t *, __REG_CONST char *, size_t,
+ ^				rm_detail_t *, size_t, regmatch_t [], int);
+ ^ #endif
+ ^ #ifndef __REG_NOFRONT
+ ^ int regexec(regex_t *, __REG_CONST char *, size_t, regmatch_t [], int);
+ ^ #endif
+ ^ #ifdef __REG_WIDE_T
+ ^ int __REG_WIDE_EXEC(regex_t *, __REG_CONST __REG_WIDE_T *, size_t,
+ ^				rm_detail_t *, size_t, regmatch_t [], int);
+ ^ #endif
+ */
+#define	REG_NOTBOL	0001	/* BOS is not BOL */
+#define	REG_NOTEOL	0002	/* EOS is not EOL */
+#define	REG_STARTEND	0004	/* backward compatibility kludge */
+#define	REG_FTRACE	0010	/* none of your business */
+#define	REG_MTRACE	0020	/* none of your business */
+#define	REG_SMALL	0040	/* none of your business */
+
+/*
+ * misc generics (may be more functions here eventually)
+ ^ re_void regfree(regex_t *);
+ */
+
+/*
+ * error reporting
+ * Be careful if modifying the list of error codes -- the table used by
+ * regerror() is generated automatically from this file!
+ *
+ * Note that there is no wide-char variant of regerror at this time; what kind
+ * of character is used for error reports is independent of what kind is used
+ * in matching.
+ *
+ ^ extern size_t regerror(int, __REG_CONST regex_t *, char *, size_t);
+ */
+#define	REG_OKAY	 0	/* no errors detected */
+#define	REG_NOMATCH	 1	/* failed to match */
+#define	REG_BADPAT	 2	/* invalid regexp */
+#define	REG_ECOLLATE	 3	/* invalid collating element */
+#define	REG_ECTYPE	 4	/* invalid character class */
+#define	REG_EESCAPE	 5	/* invalid escape \ sequence */
+#define	REG_ESUBREG	 6	/* invalid backreference number */
+#define	REG_EBRACK	 7	/* brackets [] not balanced */
+#define	REG_EPAREN	 8	/* parentheses () not balanced */
+#define	REG_EBRACE	 9	/* braces {} not balanced */
+#define	REG_BADBR	10	/* invalid repetition count(s) */
+#define	REG_ERANGE	11	/* invalid character range */
+#define	REG_ESPACE	12	/* out of memory */
+#define	REG_BADRPT	13	/* quantifier operand invalid */
+#define	REG_ASSERT	15	/* "can't happen" -- you found a bug */
+#define	REG_INVARG	16	/* invalid argument to regex function */
+#define	REG_MIXED	17	/* character widths of regex and string differ */
+#define	REG_BADOPT	18	/* invalid embedded option */
+#define	REG_ETOOBIG	19	/* nfa has too many states */
+/* two specials for debugging and testing */
+#define	REG_ATOI	101	/* convert error-code name to number */
+#define	REG_ITOA	102	/* convert error-code number to name */
+
+/*
+ * the prototypes, as possibly munched by regfwd
+ */
+/* =====^!^===== begin forwards =====^!^===== */
+/* automatically gathered by fwd; do not hand-edit */
+/* === regproto.h === */
+#ifndef __REG_NOCHAR
+int re_comp(regex_t *, __REG_CONST unsigned char *, size_t, int);
+#endif
+#ifndef __REG_NOFRONT
+int regcomp(regex_t *, __REG_CONST char *, int);
+#endif
+#ifdef __REG_WIDE_T
+MODULE_SCOPE int __REG_WIDE_COMPILE(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int);
+#endif
+#ifndef __REG_NOCHAR
+int re_exec(regex_t *, __REG_CONST unsigned char *, size_t, rm_detail_t *, size_t, regmatch_t [], int);
+#endif
+#ifndef __REG_NOFRONT
+int regexec(regex_t *, __REG_CONST char *, size_t, regmatch_t [], int);
+#endif
+#ifdef __REG_WIDE_T
+MODULE_SCOPE int __REG_WIDE_EXEC(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, rm_detail_t *, size_t, regmatch_t [], int);
+#endif
+MODULE_SCOPE re_void regfree(regex_t *);
+MODULE_SCOPE size_t regerror(int, __REG_CONST regex_t *, char *, size_t);
+/* automatically gathered by fwd; do not hand-edit */
+/* =====^!^===== end forwards =====^!^===== */
+
+/*
+ * more C++ voodoo
+ */
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/contrib/hsrex/regexec.c b/contrib/hsrex/regexec.c
new file mode 100644
index 0000000..24edb41
--- /dev/null
+++ b/contrib/hsrex/regexec.c
@@ -0,0 +1,1215 @@
+/*
+ * re_*exec and friends - match REs
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results.  The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation of
+ * software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "regguts.h"
+
+/*
+ * Lazy-DFA representation.
+ */
+
+struct arcp {			/* "pointer" to an outarc */
+    struct sset *ss;
+    color co;
+};
+
+struct sset {			/* state set */
+    unsigned *states;		/* pointer to bitvector */
+    unsigned hash;		/* hash of bitvector */
+#define	HASH(bv, nw)	(((nw) == 1) ? *(bv) : hash(bv, nw))
+#define	HIT(h,bv,ss,nw)	((ss)->hash == (h) && ((nw) == 1 || \
+	memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
+    int flags;
+#define	STARTER		01	/* the initial state set */
+#define	POSTSTATE	02	/* includes the goal state */
+#define	LOCKED		04	/* locked in cache */
+#define	NOPROGRESS	010	/* zero-progress state set */
+    struct arcp ins;		/* chain of inarcs pointing here */
+    chr *lastseen;		/* last entered on arrival here */
+    struct sset **outs;		/* outarc vector indexed by color */
+    struct arcp *inchain;	/* chain-pointer vector for outarcs */
+};
+
+struct dfa {
+    int nssets;			/* size of cache */
+    int nssused;		/* how many entries occupied yet */
+    int nstates;		/* number of states */
+    int ncolors;		/* length of outarc and inchain vectors */
+    int wordsper;		/* length of state-set bitvectors */
+    struct sset *ssets;		/* state-set cache */
+    unsigned *statesarea;	/* bitvector storage */
+    unsigned *work;		/* pointer to work area within statesarea */
+    struct sset **outsarea;	/* outarc-vector storage */
+    struct arcp *incarea;	/* inchain storage */
+    struct cnfa *cnfa;
+    struct colormap *cm;
+    chr *lastpost;		/* location of last cache-flushed success */
+    chr *lastnopr;		/* location of last cache-flushed NOPROGRESS */
+    struct sset *search;	/* replacement-search-pointer memory */
+    int cptsmalloced;		/* were the areas individually malloced? */
+    char *mallocarea;		/* self, or master malloced area, or NULL */
+};
+
+#define	WORK	1		/* number of work bitvectors needed */
+
+/*
+ * Setup for non-malloc allocation for small cases.
+ */
+
+#define	FEWSTATES	20	/* must be less than UBITS */
+#define	FEWCOLORS	15
+struct smalldfa {
+    struct dfa dfa;
+    struct sset ssets[FEWSTATES*2];
+    unsigned statesarea[FEWSTATES*2 + WORK];
+    struct sset *outsarea[FEWSTATES*2 * FEWCOLORS];
+    struct arcp incarea[FEWSTATES*2 * FEWCOLORS];
+};
+#define	DOMALLOC	((struct smalldfa *)NULL)	/* force malloc */
+
+/*
+ * Internal variables, bundled for easy passing around.
+ */
+
+struct vars {
+    regex_t *re;
+    struct guts *g;
+    int eflags;			/* copies of arguments */
+    size_t nmatch;
+    regmatch_t *pmatch;
+    rm_detail_t *details;
+    chr *start;			/* start of string */
+    chr *stop;			/* just past end of string */
+    int err;			/* error code if any (0 none) */
+    regoff_t *mem;		/* memory vector for backtracking */
+    struct smalldfa dfa1;
+    struct smalldfa dfa2;
+};
+#define	VISERR(vv) ((vv)->err != 0)	/* have we seen an error yet? */
+#define	ISERR()	VISERR(v)
+#define	VERR(vv,e) (((vv)->err) ? (vv)->err : ((vv)->err = (e)))
+#define	ERR(e)	VERR(v, e)	/* record an error */
+#define	NOERR()	{if (ISERR()) return v->err;}	/* if error seen, return it */
+#define	OFF(p)	((p) - v->start)
+#define	LOFF(p)	((long)OFF(p))
+
+/*
+ * forward declarations
+ */
+/* =====^!^===== begin forwards =====^!^===== */
+/* automatically gathered by fwd; do not hand-edit */
+/* === regexec.c === */
+int exec(regex_t *, const chr *, size_t, rm_detail_t *, size_t, regmatch_t [], int);
+static int find(struct vars *, struct cnfa *, struct colormap *);
+static int cfind(struct vars *, struct cnfa *, struct colormap *);
+static int cfindloop(struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **);
+static void zapsubs(regmatch_t *, size_t);
+static void zapmem(struct vars *, struct subre *);
+static void subset(struct vars *, struct subre *, chr *, chr *);
+static int dissect(struct vars *, struct subre *, chr *, chr *);
+static int condissect(struct vars *, struct subre *, chr *, chr *);
+static int altdissect(struct vars *, struct subre *, chr *, chr *);
+static int cdissect(struct vars *, struct subre *, chr *, chr *);
+static int ccondissect(struct vars *, struct subre *, chr *, chr *);
+static int crevdissect(struct vars *, struct subre *, chr *, chr *);
+static int cbrdissect(struct vars *, struct subre *, chr *, chr *);
+static int caltdissect(struct vars *, struct subre *, chr *, chr *);
+/* === rege_dfa.c === */
+static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *);
+static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *);
+static chr *lastcold(struct vars *, struct dfa *);
+static struct dfa *newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *);
+static void freedfa(struct dfa *);
+static unsigned hash(unsigned *, int);
+static struct sset *initialize(struct vars *, struct dfa *, chr *);
+static struct sset *miss(struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *);
+static int lacon(struct vars *, struct cnfa *, chr *, pcolor);
+static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *);
+static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *);
+/* automatically gathered by fwd; do not hand-edit */
+/* =====^!^===== end forwards =====^!^===== */
+
+/*
+ - exec - match regular expression
+ ^ int exec(regex_t *, const chr *, size_t, rm_detail_t *,
+ ^					size_t, regmatch_t [], int);
+ */
+int
+exec(
+    regex_t *re,
+    const chr *string,
+    size_t len,
+    rm_detail_t *details,
+    size_t nmatch,
+    regmatch_t pmatch[],
+    int flags)
+{
+    AllocVars(v);
+    int st;
+    size_t n;
+    int backref;
+#define	LOCALMAT	20
+    regmatch_t mat[LOCALMAT];
+#define	LOCALMEM	40
+    regoff_t mem[LOCALMEM];
+
+    /*
+     * Sanity checks.
+     */
+
+    if (re == NULL || string == NULL || re->re_magic != REMAGIC) {
+	FreeVars(v);
+	return REG_INVARG;
+    }
+    if (re->re_csize != sizeof(chr)) {
+	FreeVars(v);
+	return REG_MIXED;
+    }
+
+    /*
+     * Setup.
+     */
+
+    v->re = re;
+    v->g = (struct guts *)re->re_guts;
+    if ((v->g->cflags&REG_EXPECT) && details == NULL) {
+	FreeVars(v);
+	return REG_INVARG;
+    }
+    if (v->g->info&REG_UIMPOSSIBLE) {
+	FreeVars(v);
+	return REG_NOMATCH;
+    }
+    backref = (v->g->info&REG_UBACKREF) ? 1 : 0;
+    v->eflags = flags;
+    if (v->g->cflags&REG_NOSUB) {
+	nmatch = 0;		/* override client */
+    }
+    v->nmatch = nmatch;
+    if (backref) {
+	/*
+	 * Need work area.
+	 */
+
+	if (v->g->nsub + 1 <= LOCALMAT) {
+	    v->pmatch = mat;
+	} else {
+	    v->pmatch = (regmatch_t *)
+		    MALLOC((v->g->nsub + 1) * sizeof(regmatch_t));
+	}
+	if (v->pmatch == NULL) {
+	    FreeVars(v);
+	    return REG_ESPACE;
+	}
+	v->nmatch = v->g->nsub + 1;
+    } else {
+	v->pmatch = pmatch;
+    }
+    v->details = details;
+    v->start = (chr *)string;
+    v->stop = (chr *)string + len;
+    v->err = 0;
+    if (backref) {
+	/*
+	 * Need retry memory.
+	 */
+
+	assert(v->g->ntree >= 0);
+	n = (size_t)v->g->ntree;
+	if (n <= LOCALMEM) {
+	    v->mem = mem;
+	} else {
+	    v->mem = (regoff_t *) MALLOC(n*sizeof(regoff_t));
+	}
+	if (v->mem == NULL) {
+	    if (v->pmatch != pmatch && v->pmatch != mat) {
+		FREE(v->pmatch);
+	    }
+	    FreeVars(v);
+	    return REG_ESPACE;
+	}
+    } else {
+	v->mem = NULL;
+    }
+
+    /*
+     * Do it.
+     */
+
+    assert(v->g->tree != NULL);
+    if (backref) {
+	st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
+    } else {
+	st = find(v, &v->g->tree->cnfa, &v->g->cmap);
+    }
+
+    /*
+     * Copy (portion of) match vector over if necessary.
+     */
+
+    if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) {
+	zapsubs(pmatch, nmatch);
+	n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
+	memcpy(VS(pmatch), VS(v->pmatch), n*sizeof(regmatch_t));
+    }
+
+    /*
+     * Clean up.
+     */
+
+    if (v->pmatch != pmatch && v->pmatch != mat) {
+	FREE(v->pmatch);
+    }
+    if (v->mem != NULL && v->mem != mem) {
+	FREE(v->mem);
+    }
+    FreeVars(v);
+    return st;
+}
+
+/*
+ - find - find a match for the main NFA (no-complications case)
+ ^ static int find(struct vars *, struct cnfa *, struct colormap *);
+ */
+static int
+find(
+    struct vars *v,
+    struct cnfa *cnfa,
+    struct colormap *cm)
+{
+    struct dfa *s;
+    struct dfa *d;
+    chr *begin;
+    chr *end = NULL;
+    chr *cold;
+    chr *open;			/* Open and close of range of possible
+				 * starts */
+    chr *close;
+    int hitend;
+    int shorter = (v->g->tree->flags&SHORTER) ? 1 : 0;
+
+    /*
+     * First, a shot with the search RE.
+     */
+
+    s = newdfa(v, &v->g->search, cm, &v->dfa1);
+    assert(!(ISERR() && s != NULL));
+    NOERR();
+    MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
+    cold = NULL;
+    close = shortest(v, s, v->start, v->start, v->stop, &cold, NULL);
+    freedfa(s);
+    NOERR();
+    if (v->g->cflags&REG_EXPECT) {
+	assert(v->details != NULL);
+	if (cold != NULL) {
+	    v->details->rm_extend.rm_so = OFF(cold);
+	} else {
+	    v->details->rm_extend.rm_so = OFF(v->stop);
+	}
+	v->details->rm_extend.rm_eo = OFF(v->stop);	/* unknown */
+    }
+    if (close == NULL) {	/* not found */
+	return REG_NOMATCH;
+    }
+    if (v->nmatch == 0) {	/* found, don't need exact location */
+	return REG_OKAY;
+    }
+
+    /*
+     * Find starting point and match.
+     */
+
+    assert(cold != NULL);
+    open = cold;
+    cold = NULL;
+    MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
+    d = newdfa(v, cnfa, cm, &v->dfa1);
+    assert(!(ISERR() && d != NULL));
+    NOERR();
+    for (begin = open; begin <= close; begin++) {
+	MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
+	if (shorter) {
+	    end = shortest(v, d, begin, begin, v->stop, NULL, &hitend);
+	} else {
+	    end = longest(v, d, begin, v->stop, &hitend);
+	}
+	NOERR();
+	if (hitend && cold == NULL) {
+	    cold = begin;
+	}
+	if (end != NULL) {
+	    break;		/* NOTE BREAK OUT */
+	}
+    }
+    assert(end != NULL);	/* search RE succeeded so loop should */
+    freedfa(d);
+
+    /*
+     * And pin down details.
+     */
+
+    assert(v->nmatch > 0);
+    v->pmatch[0].rm_so = OFF(begin);
+    v->pmatch[0].rm_eo = OFF(end);
+    if (v->g->cflags&REG_EXPECT) {
+	if (cold != NULL) {
+	    v->details->rm_extend.rm_so = OFF(cold);
+	} else {
+	    v->details->rm_extend.rm_so = OFF(v->stop);
+	}
+	v->details->rm_extend.rm_eo = OFF(v->stop);	/* unknown */
+    }
+    if (v->nmatch == 1) {	/* no need for submatches */
+	return REG_OKAY;
+    }
+
+    /*
+     * Submatches.
+     */
+
+    zapsubs(v->pmatch, v->nmatch);
+    return dissect(v, v->g->tree, begin, end);
+}
+
+/*
+ - cfind - find a match for the main NFA (with complications)
+ ^ static int cfind(struct vars *, struct cnfa *, struct colormap *);
+ */
+static int
+cfind(
+    struct vars *v,
+    struct cnfa *cnfa,
+    struct colormap *cm)
+{
+    struct dfa *s;
+    struct dfa *d;
+    chr *cold = NULL; /* silence gcc 4 warning */
+    int ret;
+
+    s = newdfa(v, &v->g->search, cm, &v->dfa1);
+    NOERR();
+    d = newdfa(v, cnfa, cm, &v->dfa2);
+    if (ISERR()) {
+	assert(d == NULL);
+	freedfa(s);
+	return v->err;
+    }
+
+    ret = cfindloop(v, cnfa, cm, d, s, &cold);
+
+    freedfa(d);
+    freedfa(s);
+    NOERR();
+    if (v->g->cflags&REG_EXPECT) {
+	assert(v->details != NULL);
+	if (cold != NULL) {
+	    v->details->rm_extend.rm_so = OFF(cold);
+	} else {
+	    v->details->rm_extend.rm_so = OFF(v->stop);
+	}
+	v->details->rm_extend.rm_eo = OFF(v->stop);	/* unknown */
+    }
+    return ret;
+}
+
+/*
+ - cfindloop - the heart of cfind
+ ^ static int cfindloop(struct vars *, struct cnfa *, struct colormap *,
+ ^	struct dfa *, struct dfa *, chr **);
+ */
+static int
+cfindloop(
+    struct vars *v,
+    struct cnfa *cnfa,
+    struct colormap *cm,
+    struct dfa *d,
+    struct dfa *s,
+    chr **coldp)		/* where to put coldstart pointer */
+{
+    chr *begin;
+    chr *end;
+    chr *cold;
+    chr *open;			/* Open and close of range of possible
+				 * starts */
+    chr *close;
+    chr *estart;
+    chr *estop;
+    int er;
+    int shorter = v->g->tree->flags&SHORTER;
+    int hitend;
+
+    assert(d != NULL && s != NULL);
+    cold = NULL;
+    close = v->start;
+    do {
+	MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
+	close = shortest(v, s, close, close, v->stop, &cold, NULL);
+	if (close == NULL) {
+	    break;		/* NOTE BREAK */
+	}
+	assert(cold != NULL);
+	open = cold;
+	cold = NULL;
+	MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
+	for (begin = open; begin <= close; begin++) {
+	    MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
+	    estart = begin;
+	    estop = v->stop;
+	    for (;;) {
+		if (shorter) {
+		    end = shortest(v, d, begin, estart, estop, NULL, &hitend);
+		} else {
+		    end = longest(v, d, begin, estop, &hitend);
+		}
+		if (hitend && cold == NULL) {
+		    cold = begin;
+		}
+		if (end == NULL) {
+		    break;	/* NOTE BREAK OUT */
+		}
+
+		MDEBUG(("tentative end %ld\n", LOFF(end)));
+		zapsubs(v->pmatch, v->nmatch);
+		zapmem(v, v->g->tree);
+		er = cdissect(v, v->g->tree, begin, end);
+		if (er == REG_OKAY) {
+		    if (v->nmatch > 0) {
+			v->pmatch[0].rm_so = OFF(begin);
+			v->pmatch[0].rm_eo = OFF(end);
+		    }
+		    *coldp = cold;
+		    return REG_OKAY;
+		}
+		if (er != REG_NOMATCH) {
+		    ERR(er);
+		    return er;
+		}
+		if ((shorter) ? end == estop : end == begin) {
+		    /*
+		     * No point in trying again.
+		     */
+
+		    *coldp = cold;
+		    return REG_NOMATCH;
+		}
+
+		/*
+		 * Go around and try again
+		 */
+
+		if (shorter) {
+		    estart = end + 1;
+		} else {
+		    estop = end - 1;
+		}
+	    }
+	}
+    } while (close < v->stop);
+
+    *coldp = cold;
+    return REG_NOMATCH;
+}
+
+/*
+ - zapsubs - initialize the subexpression matches to "no match"
+ ^ static void zapsubs(regmatch_t *, size_t);
+ */
+static void
+zapsubs(
+    regmatch_t *p,
+    size_t n)
+{
+    size_t i;
+
+    for (i = n-1; i > 0; i--) {
+	p[i].rm_so = -1;
+	p[i].rm_eo = -1;
+    }
+}
+
+/*
+ - zapmem - initialize the retry memory of a subtree to zeros
+ ^ static void zapmem(struct vars *, struct subre *);
+ */
+static void
+zapmem(
+    struct vars *v,
+    struct subre *t)
+{
+    if (t == NULL) {
+	return;
+    }
+
+    assert(v->mem != NULL);
+    v->mem[t->retry] = 0;
+    if (t->op == '(') {
+	assert(t->subno > 0);
+	v->pmatch[t->subno].rm_so = -1;
+		v->pmatch[t->subno].rm_eo = -1;
+    }
+
+    if (t->left != NULL) {
+	zapmem(v, t->left);
+    }
+    if (t->right != NULL) {
+	zapmem(v, t->right);
+    }
+}
+
+/*
+ - subset - set any subexpression relevant to a successful subre
+ ^ static void subset(struct vars *, struct subre *, chr *, chr *);
+ */
+static void
+subset(
+    struct vars *v,
+    struct subre *sub,
+    chr *begin,
+    chr *end)
+{
+    int n = sub->subno;
+
+    assert(n > 0);
+    if ((size_t)n >= v->nmatch) {
+	return;
+    }
+
+    MDEBUG(("setting %d\n", n));
+    v->pmatch[n].rm_so = OFF(begin);
+    v->pmatch[n].rm_eo = OFF(end);
+}
+
+/*
+ - dissect - determine subexpression matches (uncomplicated case)
+ ^ static int dissect(struct vars *, struct subre *, chr *, chr *);
+ */
+static int			/* regexec return code */
+dissect(
+    struct vars *v,
+    struct subre *t,
+    chr *begin,			/* beginning of relevant substring */
+    chr *end)			/* end of same */
+{
+    assert(t != NULL);
+    MDEBUG(("dissect %ld-%ld\n", LOFF(begin), LOFF(end)));
+
+    switch (t->op) {
+    case '=':			/* terminal node */
+	assert(t->left == NULL && t->right == NULL);
+	return REG_OKAY;	/* no action, parent did the work */
+	break;
+    case '|':			/* alternation */
+	assert(t->left != NULL);
+	return altdissect(v, t, begin, end);
+	break;
+    case 'b':			/* back ref -- shouldn't be calling us! */
+	return REG_ASSERT;
+	break;
+    case '.':			/* concatenation */
+	assert(t->left != NULL && t->right != NULL);
+	return condissect(v, t, begin, end);
+	break;
+    case '(':			/* capturing */
+	assert(t->left != NULL && t->right == NULL);
+	assert(t->subno > 0);
+	subset(v, t, begin, end);
+	return dissect(v, t->left, begin, end);
+	break;
+    default:
+	return REG_ASSERT;
+	break;
+    }
+}
+
+/*
+ - condissect - determine concatenation subexpression matches (uncomplicated)
+ ^ static int condissect(struct vars *, struct subre *, chr *, chr *);
+ */
+static int			/* regexec return code */
+condissect(
+    struct vars *v,
+    struct subre *t,
+    chr *begin,			/* beginning of relevant substring */
+    chr *end)			/* end of same */
+{
+    struct dfa *d;
+    struct dfa *d2;
+    chr *mid;
+    int i;
+    int shorter = (t->left->flags&SHORTER) ? 1 : 0;
+    chr *stop = (shorter) ? end : begin;
+
+    assert(t->op == '.');
+    assert(t->left != NULL && t->left->cnfa.nstates > 0);
+    assert(t->right != NULL && t->right->cnfa.nstates > 0);
+
+    d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
+    NOERR();
+    d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &v->dfa2);
+    if (ISERR()) {
+	assert(d2 == NULL);
+	freedfa(d);
+	return v->err;
+    }
+
+    /*
+     * Pick a tentative midpoint.
+     */
+
+    if (shorter) {
+	mid = shortest(v, d, begin, begin, end, NULL, NULL);
+    } else {
+	mid = longest(v, d, begin, end, NULL);
+    }
+    if (mid == NULL) {
+	freedfa(d);
+	freedfa(d2);
+	return REG_ASSERT;
+    }
+    MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
+
+    /*
+     * Iterate until satisfaction or failure.
+     */
+
+    while (longest(v, d2, mid, end, NULL) != end) {
+	/*
+	 * That midpoint didn't work, find a new one.
+	 */
+
+	if (mid == stop) {
+	    /*
+	     * All possibilities exhausted!
+	     */
+
+	    MDEBUG(("no midpoint!\n"));
+	    freedfa(d);
+	    freedfa(d2);
+	    return REG_ASSERT;
+	}
+	if (shorter) {
+	    mid = shortest(v, d, begin, mid+1, end, NULL, NULL);
+	} else {
+	    mid = longest(v, d, begin, mid-1, NULL);
+	}
+	if (mid == NULL) {
+	    /*
+	     * Failed to find a new one!
+	     */
+
+	    MDEBUG(("failed midpoint!\n"));
+	    freedfa(d);
+	    freedfa(d2);
+	    return REG_ASSERT;
+	}
+	MDEBUG(("new midpoint %ld\n", LOFF(mid)));
+    }
+
+    /*
+     * Satisfaction.
+     */
+
+    MDEBUG(("successful\n"));
+    freedfa(d);
+    freedfa(d2);
+    i = dissect(v, t->left, begin, mid);
+    if (i != REG_OKAY) {
+	return i;
+    }
+    return dissect(v, t->right, mid, end);
+}
+
+/*
+ - altdissect - determine alternative subexpression matches (uncomplicated)
+ ^ static int altdissect(struct vars *, struct subre *, chr *, chr *);
+ */
+static int			/* regexec return code */
+altdissect(
+    struct vars *v,
+    struct subre *t,
+    chr *begin,			/* beginning of relevant substring */
+    chr *end)			/* end of same */
+{
+    struct dfa *d;
+    int i;
+
+    assert(t != NULL);
+    assert(t->op == '|');
+
+    for (i = 0; t != NULL; t = t->right, i++) {
+	MDEBUG(("trying %dth\n", i));
+	assert(t->left != NULL && t->left->cnfa.nstates > 0);
+	d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
+	if (ISERR()) {
+	    return v->err;
+	}
+	if (longest(v, d, begin, end, NULL) == end) {
+	    MDEBUG(("success\n"));
+	    freedfa(d);
+	    return dissect(v, t->left, begin, end);
+	}
+	freedfa(d);
+    }
+    return REG_ASSERT;		/* none of them matched?!? */
+}
+
+/*
+ - cdissect - determine subexpression matches (with complications)
+ * The retry memory stores the offset of the trial midpoint from begin, plus 1
+ * so that 0 uniquely means "clean slate".
+ ^ static int cdissect(struct vars *, struct subre *, chr *, chr *);
+ */
+static int			/* regexec return code */
+cdissect(
+    struct vars *v,
+    struct subre *t,
+    chr *begin,			/* beginning of relevant substring */
+    chr *end)			/* end of same */
+{
+    int er;
+
+    assert(t != NULL);
+    MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op));
+
+    switch (t->op) {
+    case '=':			/* terminal node */
+	assert(t->left == NULL && t->right == NULL);
+	return REG_OKAY;	/* no action, parent did the work */
+	break;
+    case '|':			/* alternation */
+	assert(t->left != NULL);
+	return caltdissect(v, t, begin, end);
+	break;
+    case 'b':			/* back ref -- shouldn't be calling us! */
+	assert(t->left == NULL && t->right == NULL);
+	return cbrdissect(v, t, begin, end);
+	break;
+    case '.':			/* concatenation */
+	assert(t->left != NULL && t->right != NULL);
+	return ccondissect(v, t, begin, end);
+	break;
+    case '(':			/* capturing */
+	assert(t->left != NULL && t->right == NULL);
+	assert(t->subno > 0);
+	er = cdissect(v, t->left, begin, end);
+	if (er == REG_OKAY) {
+	    subset(v, t, begin, end);
+	}
+	return er;
+	break;
+    default:
+	return REG_ASSERT;
+	break;
+    }
+}
+
+/*
+ - ccondissect - concatenation subexpression matches (with complications)
+ * The retry memory stores the offset of the trial midpoint from begin, plus 1
+ * so that 0 uniquely means "clean slate".
+ ^ static int ccondissect(struct vars *, struct subre *, chr *, chr *);
+ */
+static int			/* regexec return code */
+ccondissect(
+    struct vars *v,
+    struct subre *t,
+    chr *begin,			/* beginning of relevant substring */
+    chr *end)			/* end of same */
+{
+    struct dfa *d;
+    struct dfa *d2;
+    chr *mid;
+    int er;
+
+    assert(t->op == '.');
+    assert(t->left != NULL && t->left->cnfa.nstates > 0);
+    assert(t->right != NULL && t->right->cnfa.nstates > 0);
+
+    if (t->left->flags&SHORTER) { /* reverse scan */
+	return crevdissect(v, t, begin, end);
+    }
+
+    d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
+    if (ISERR()) {
+	return v->err;
+    }
+    d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC);
+    if (ISERR()) {
+	freedfa(d);
+	return v->err;
+    }
+    MDEBUG(("cconcat %d\n", t->retry));
+
+    /*
+     * Pick a tentative midpoint.
+     */
+
+    if (v->mem[t->retry] == 0) {
+	mid = longest(v, d, begin, end, NULL);
+	if (mid == NULL) {
+	    freedfa(d);
+	    freedfa(d2);
+	    return REG_NOMATCH;
+	}
+	MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
+	v->mem[t->retry] = (mid - begin) + 1;
+    } else {
+	mid = begin + (v->mem[t->retry] - 1);
+	MDEBUG(("working midpoint %ld\n", LOFF(mid)));
+    }
+
+    /*
+     * Iterate until satisfaction or failure.
+     */
+
+    for (;;) {
+	/*
+	 * Try this midpoint on for size.
+	 */
+
+	er = cdissect(v, t->left, begin, mid);
+	if ((er == REG_OKAY) && (longest(v, d2, mid, end, NULL) == end)
+		&& (er = cdissect(v, t->right, mid, end)) == REG_OKAY) {
+	    break;		/* NOTE BREAK OUT */
+	}
+	if ((er != REG_OKAY) && (er != REG_NOMATCH)) {
+	    freedfa(d);
+	    freedfa(d2);
+	    return er;
+	}
+
+	/*
+	 * That midpoint didn't work, find a new one.
+	 */
+
+	if (mid == begin) {
+	    /*
+	     * All possibilities exhausted.
+	     */
+
+	    MDEBUG(("%d no midpoint\n", t->retry));
+	    freedfa(d);
+	    freedfa(d2);
+	    return REG_NOMATCH;
+	}
+	mid = longest(v, d, begin, mid-1, NULL);
+	if (mid == NULL) {
+	    /*
+	     * Failed to find a new one.
+	     */
+
+	    MDEBUG(("%d failed midpoint\n", t->retry));
+	    freedfa(d);
+	    freedfa(d2);
+	    return REG_NOMATCH;
+	}
+	MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
+	v->mem[t->retry] = (mid - begin) + 1;
+	zapmem(v, t->left);
+	zapmem(v, t->right);
+    }
+
+    /*
+     * Satisfaction.
+     */
+
+    MDEBUG(("successful\n"));
+    freedfa(d);
+    freedfa(d2);
+    return REG_OKAY;
+}
+
+/*
+ - crevdissect - determine backref shortest-first subexpression matches
+ * The retry memory stores the offset of the trial midpoint from begin, plus 1
+ * so that 0 uniquely means "clean slate".
+ ^ static int crevdissect(struct vars *, struct subre *, chr *, chr *);
+ */
+static int			/* regexec return code */
+crevdissect(
+    struct vars *v,
+    struct subre *t,
+    chr *begin,			/* beginning of relevant substring */
+    chr *end)			/* end of same */
+{
+    struct dfa *d;
+    struct dfa *d2;
+    chr *mid;
+    int er;
+
+    assert(t->op == '.');
+    assert(t->left != NULL && t->left->cnfa.nstates > 0);
+    assert(t->right != NULL && t->right->cnfa.nstates > 0);
+    assert(t->left->flags&SHORTER);
+
+    /*
+     * Concatenation -- need to split the substring between parts.
+     */
+
+    d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
+    if (ISERR()) {
+	return v->err;
+    }
+    d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC);
+    if (ISERR()) {
+	freedfa(d);
+	return v->err;
+    }
+    MDEBUG(("crev %d\n", t->retry));
+
+    /*
+     * Pick a tentative midpoint.
+     */
+
+    if (v->mem[t->retry] == 0) {
+	mid = shortest(v, d, begin, begin, end, NULL, NULL);
+	if (mid == NULL) {
+	    freedfa(d);
+	    freedfa(d2);
+	    return REG_NOMATCH;
+	}
+	MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
+	v->mem[t->retry] = (mid - begin) + 1;
+    } else {
+	mid = begin + (v->mem[t->retry] - 1);
+	MDEBUG(("working midpoint %ld\n", LOFF(mid)));
+    }
+
+    /*
+     * Iterate until satisfaction or failure.
+     */
+
+    for (;;) {
+	/*
+	 * Try this midpoint on for size.
+	 */
+
+	er = cdissect(v, t->left, begin, mid);
+	if ((er == REG_OKAY) && (longest(v, d2, mid, end, NULL) == end)
+		&& (er = cdissect(v, t->right, mid, end)) == REG_OKAY) {
+	    break;		/* NOTE BREAK OUT */
+	}
+	if (er != REG_OKAY && er != REG_NOMATCH) {
+	    freedfa(d);
+	    freedfa(d2);
+	    return er;
+	}
+
+	/*
+	 * That midpoint didn't work, find a new one.
+	 */
+
+	if (mid == end) {
+	    /*
+	     * All possibilities exhausted.
+	     */
+
+	    MDEBUG(("%d no midpoint\n", t->retry));
+	    freedfa(d);
+	    freedfa(d2);
+	    return REG_NOMATCH;
+	}
+	mid = shortest(v, d, begin, mid+1, end, NULL, NULL);
+	if (mid == NULL) {
+	    /*
+	     * Failed to find a new one.
+	     */
+
+	    MDEBUG(("%d failed midpoint\n", t->retry));
+	    freedfa(d);
+	    freedfa(d2);
+	    return REG_NOMATCH;
+	}
+	MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
+	v->mem[t->retry] = (mid - begin) + 1;
+	zapmem(v, t->left);
+	zapmem(v, t->right);
+    }
+
+    /*
+     * Satisfaction.
+     */
+
+    MDEBUG(("successful\n"));
+    freedfa(d);
+    freedfa(d2);
+    return REG_OKAY;
+}
+
+/*
+ - cbrdissect - determine backref subexpression matches
+ ^ static int cbrdissect(struct vars *, struct subre *, chr *, chr *);
+ */
+static int			/* regexec return code */
+cbrdissect(
+    struct vars *v,
+    struct subre *t,
+    chr *begin,			/* beginning of relevant substring */
+    chr *end)			/* end of same */
+{
+    int i;
+    int n = t->subno;
+    size_t len;
+    chr *paren;
+    chr *p;
+    chr *stop;
+    int min = t->min;
+    int max = t->max;
+
+    assert(t != NULL);
+    assert(t->op == 'b');
+    assert(n >= 0);
+    assert((size_t)n < v->nmatch);
+
+    MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max));
+
+    if (v->pmatch[n].rm_so == -1) {
+	return REG_NOMATCH;
+    }
+    paren = v->start + v->pmatch[n].rm_so;
+    len = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
+
+    /*
+     * No room to maneuver -- retries are pointless.
+     */
+
+    if (v->mem[t->retry]) {
+	return REG_NOMATCH;
+    }
+    v->mem[t->retry] = 1;
+
+    /*
+     * Special-case zero-length string.
+     */
+
+    if (len == 0) {
+	if (begin == end) {
+	    return REG_OKAY;
+	}
+	return REG_NOMATCH;
+    }
+
+    /*
+     * And too-short string.
+     */
+
+    assert(end >= begin);
+    if ((size_t)(end - begin) < len) {
+	return REG_NOMATCH;
+    }
+    stop = end - len;
+
+    /*
+     * Count occurrences.
+     */
+
+    i = 0;
+    for (p = begin; p <= stop && (i < max || max == INFINITY); p += len) {
+	if ((*v->g->compare)(paren, p, len) != 0) {
+	    break;
+	}
+	i++;
+    }
+    MDEBUG(("cbackref found %d\n", i));
+
+    /*
+     * And sort it out.
+     */
+
+    if (p != end) {		/* didn't consume all of it */
+	return REG_NOMATCH;
+    }
+    if (min <= i && (i <= max || max == INFINITY)) {
+	return REG_OKAY;
+    }
+    return REG_NOMATCH;		/* out of range */
+}
+
+/*
+ - caltdissect - determine alternative subexpression matches (w. complications)
+ ^ static int caltdissect(struct vars *, struct subre *, chr *, chr *);
+ */
+static int			/* regexec return code */
+caltdissect(
+    struct vars *v,
+    struct subre *t,
+    chr *begin,			/* beginning of relevant substring */
+    chr *end)			/* end of same */
+{
+    struct dfa *d;
+    int er;
+#define	UNTRIED	0		/* not yet tried at all */
+#define	TRYING	1		/* top matched, trying submatches */
+#define	TRIED	2		/* top didn't match or submatches exhausted */
+
+    if (t == NULL) {
+	return REG_NOMATCH;
+    }
+    assert(t->op == '|');
+    if (v->mem[t->retry] == TRIED) {
+	return caltdissect(v, t->right, begin, end);
+    }
+
+    MDEBUG(("calt n%d\n", t->retry));
+    assert(t->left != NULL);
+
+    if (v->mem[t->retry] == UNTRIED) {
+	d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
+	if (ISERR()) {
+	    return v->err;
+	}
+	if (longest(v, d, begin, end, NULL) != end) {
+	    freedfa(d);
+	    v->mem[t->retry] = TRIED;
+	    return caltdissect(v, t->right, begin, end);
+	}
+	freedfa(d);
+	MDEBUG(("calt matched\n"));
+	v->mem[t->retry] = TRYING;
+    }
+
+    er = cdissect(v, t->left, begin, end);
+    if (er != REG_NOMATCH) {
+	return er;
+    }
+
+    v->mem[t->retry] = TRIED;
+    return caltdissect(v, t->right, begin, end);
+}
+
+#include "rege_dfa.c"
+
+/*
+ * Local Variables:
+ * mode: c
+ * c-basic-offset: 4
+ * fill-column: 78
+ * End:
+ */
diff --git a/contrib/hsrex/regfree.c b/contrib/hsrex/regfree.c
new file mode 100644
index 0000000..b0aaa70
--- /dev/null
+++ b/contrib/hsrex/regfree.c
@@ -0,0 +1,60 @@
+/*
+ * regfree - free an RE
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results.  The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation of
+ * software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You might think that this could be incorporated into regcomp.c, and that
+ * would be a reasonable idea... except that this is a generic function (with
+ * a generic name), applicable to all compiled REs regardless of the size of
+ * their characters, whereas the stuff in regcomp.c gets compiled once per
+ * character size.
+ */
+
+#include "regguts.h"
+
+/*
+ - regfree - free an RE (generic function, punts to RE-specific function)
+ *
+ * Ignoring invocation with NULL is a convenience.
+ */
+void
+regfree(
+    regex_t *re)
+{
+    if (re == NULL) {
+	return;
+    }
+    (*((struct fns *)re->re_fns)->free)(re);
+}
+
+/*
+ * Local Variables:
+ * mode: c
+ * c-basic-offset: 4
+ * fill-column: 78
+ * End:
+ */
diff --git a/contrib/hsrex/regguts.h b/contrib/hsrex/regguts.h
new file mode 100644
index 0000000..67e3d03
--- /dev/null
+++ b/contrib/hsrex/regguts.h
@@ -0,0 +1,428 @@
+/*
+ * Internal interface definitions, etc., for the reg package
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results.  The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation of
+ * software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Environmental customization. It should not (I hope) be necessary to alter
+ * the file you are now reading -- regcustom.h should handle it all, given
+ * care here and elsewhere.
+ */
+#include "regcustom.h"
+
+/*
+ * Things that regcustom.h might override.
+ */
+
+/* standard header files (NULL is a reasonable indicator for them) */
+#ifndef NULL
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <limits.h>
+#include <string.h>
+#endif
+
+/* assertions */
+#ifndef assert
+#ifndef REG_DEBUG
+#ifndef NDEBUG
+#define	NDEBUG		/* no assertions */
+#endif
+#endif /* !REG_DEBUG */
+#include <assert.h>
+#endif
+
+/* voids */
+#ifndef VOID
+#define	VOID	void		/* for function return values */
+#endif
+#ifndef DISCARD
+#define	DISCARD	void		/* for throwing values away */
+#endif
+#ifndef PVOID
+#define	PVOID	void *		/* generic pointer */
+#endif
+#ifndef VS
+#define	VS(x)	((void*)(x))	/* cast something to generic ptr */
+#endif
+#ifndef NOPARMS
+#define	NOPARMS	void		/* for empty parm lists */
+#endif
+
+/* const */
+#ifndef CONST
+#define	CONST	const		/* for old compilers, might be empty */
+#endif
+
+/* function-pointer declarator */
+#ifndef FUNCPTR
+#if __STDC__ >= 1
+#define	FUNCPTR(name, args)	(*name)args
+#else
+#define	FUNCPTR(name, args)	(*name)()
+#endif
+#endif
+
+/* memory allocation */
+#ifndef MALLOC
+#define	MALLOC(n)	malloc(n)
+#endif
+#ifndef REALLOC
+#define	REALLOC(p, n)	realloc(VS(p), n)
+#endif
+#ifndef FREE
+#define	FREE(p)		free(VS(p))
+#endif
+
+/* want size of a char in bits, and max value in bounded quantifiers */
+#ifndef CHAR_BIT
+#include <limits.h>
+#endif
+#ifndef _POSIX2_RE_DUP_MAX
+#define	_POSIX2_RE_DUP_MAX 255	/* normally from <limits.h> */
+#endif
+
+/*
+ * misc
+ */
+
+#define	NOTREACHED	0
+#define	xxx		1
+
+#define	DUPMAX	_POSIX2_RE_DUP_MAX
+#define	INFINITY	(DUPMAX+1)
+
+#define	REMAGIC	0xfed7		/* magic number for main struct */
+
+/*
+ * debugging facilities
+ */
+#ifdef REG_DEBUG
+/* FDEBUG does finite-state tracing */
+#define	FDEBUG(arglist)	{ if (v->eflags&REG_FTRACE) printf arglist; }
+/* MDEBUG does higher-level tracing */
+#define	MDEBUG(arglist)	{ if (v->eflags&REG_MTRACE) printf arglist; }
+#else
+#define	FDEBUG(arglist)	{}
+#define	MDEBUG(arglist)	{}
+#endif
+
+/*
+ * bitmap manipulation
+ */
+#define	UBITS	(CHAR_BIT * sizeof(unsigned))
+#define	BSET(uv, sn)	((uv)[(sn)/UBITS] |= (unsigned)1 << ((sn)%UBITS))
+#define	ISBSET(uv, sn)	((uv)[(sn)/UBITS] & ((unsigned)1 << ((sn)%UBITS)))
+
+/*
+ * We dissect a chr into byts for colormap table indexing. Here we define a
+ * byt, which will be the same as a byte on most machines... The exact size of
+ * a byt is not critical, but about 8 bits is good, and extraction of 8-bit
+ * chunks is sometimes especially fast.
+ */
+
+#ifndef BYTBITS
+#define	BYTBITS	8		/* bits in a byt */
+#endif
+#define	BYTTAB	(1<<BYTBITS)	/* size of table with one entry per byt value */
+#define	BYTMASK	(BYTTAB-1)	/* bit mask for byt */
+#define	NBYTS	((CHRBITS+BYTBITS-1)/BYTBITS)
+/* the definition of GETCOLOR(), below, assumes NBYTS <= 4 */
+
+/*
+ * As soon as possible, we map chrs into equivalence classes -- "colors" --
+ * which are of much more manageable number.
+ */
+
+typedef short color;		/* colors of characters */
+typedef int pcolor;		/* what color promotes to */
+#define	COLORLESS	(-1)	/* impossible color */
+#define	WHITE		0	/* default color, parent of all others */
+
+/*
+ * A colormap is a tree -- more precisely, a DAG -- indexed at each level by a
+ * byt of the chr, to map the chr to a color efficiently. Because lower
+ * sections of the tree can be shared, it can exploit the usual sparseness of
+ * such a mapping table. The tree is always NBYTS levels deep (in the past it
+ * was shallower during construction but was "filled" to full depth at the end
+ * of that); areas that are unaltered as yet point to "fill blocks" which are
+ * entirely WHITE in color.
+ */
+
+/* the tree itself */
+struct colors {
+    color ccolor[BYTTAB];
+};
+struct ptrs {
+    union tree *pptr[BYTTAB];
+};
+union tree {
+    struct colors colors;
+    struct ptrs ptrs;
+};
+#define	tcolor	colors.ccolor
+#define	tptr	ptrs.pptr
+
+/* Internal per-color descriptor structure for the color machinery */
+struct colordesc {
+    uchr nchrs;			/* number of chars of this color */
+    color sub;			/* open subcolor (if any); free chain ptr */
+#define	NOSUB	COLORLESS
+    struct arc *arcs;		/* color chain */
+    int flags;
+#define	FREECOL	01		/* currently free */
+#define	PSEUDO	02		/* pseudocolor, no real chars */
+#define	UNUSEDCOLOR(cd)	((cd)->flags&FREECOL)
+    union tree *block;		/* block of solid color, if any */
+};
+
+/* the color map itself */
+struct colormap {
+    int magic;
+#define	CMMAGIC	0x876
+    struct vars *v;		/* for compile error reporting */
+    size_t ncds;		/* number of colordescs */
+    size_t max;			/* highest in use */
+    color free;			/* beginning of free chain (if non-0) */
+    struct colordesc *cd;
+#define	CDEND(cm)	(&(cm)->cd[(cm)->max + 1])
+#define	NINLINECDS	((size_t)10)
+    struct colordesc cdspace[NINLINECDS];
+    union tree tree[NBYTS];	/* tree top, plus fill blocks */
+};
+
+/* optimization magic to do fast chr->color mapping */
+#define	B0(c)	((c) & BYTMASK)
+#define	B1(c)	(((c)>>BYTBITS) & BYTMASK)
+#define	B2(c)	(((c)>>(2*BYTBITS)) & BYTMASK)
+#define	B3(c)	(((c)>>(3*BYTBITS)) & BYTMASK)
+#if NBYTS == 1
+#define	GETCOLOR(cm, c)	((cm)->tree->tcolor[B0(c)])
+#endif
+/* beware, for NBYTS>1, GETCOLOR() is unsafe -- 2nd arg used repeatedly */
+#if NBYTS == 2
+#define	GETCOLOR(cm, c)	((cm)->tree->tptr[B1(c)]->tcolor[B0(c)])
+#endif
+#if NBYTS == 4
+#define	GETCOLOR(cm, c)	((cm)->tree->tptr[B3(c)]->tptr[B2(c)]->tptr[B1(c)]->tcolor[B0(c)])
+#endif
+
+/*
+ * Interface definitions for locale-interface functions in locale.c.
+ */
+
+/* Representation of a set of characters. */
+struct cvec {
+    int nchrs;			/* number of chrs */
+    int chrspace;		/* number of chrs possible */
+    chr *chrs;			/* pointer to vector of chrs */
+    int nranges;		/* number of ranges (chr pairs) */
+    int rangespace;		/* number of chrs possible */
+    chr *ranges;		/* pointer to vector of chr pairs */
+};
+
+/*
+ * definitions for non-deterministic finite autmaton (NFA) internal
+ * representation
+ *
+ * Having a "from" pointer within each arc may seem redundant, but it saves a
+ * lot of hassle.
+ */
+
+struct state;
+
+struct arc {
+    int type;
+#define	ARCFREE	'\0'
+    color co;
+    struct state *from;		/* where it's from (and contained within) */
+    struct state *to;		/* where it's to */
+    struct arc *outchain;	/* *from's outs chain or free chain */
+#define	freechain	outchain
+    struct arc *inchain;	/* *to's ins chain */
+    struct arc *colorchain;	/* color's arc chain */
+    struct arc *colorchainRev;	/* back-link in color's arc chain */
+};
+
+struct arcbatch {		/* for bulk allocation of arcs */
+    struct arcbatch *next;
+#define	ABSIZE	10
+    struct arc a[ABSIZE];
+};
+
+struct state {
+    int no;
+#define	FREESTATE	(-1)
+    char flag;			/* marks special states */
+    int nins;			/* number of inarcs */
+    struct arc *ins;		/* chain of inarcs */
+    int nouts;			/* number of outarcs */
+    struct arc *outs;		/* chain of outarcs */
+    struct arc *free;		/* chain of free arcs */
+    struct state *tmp;		/* temporary for traversal algorithms */
+    struct state *next;		/* chain for traversing all */
+    struct state *prev;		/* back chain */
+    struct arcbatch oas;	/* first arcbatch, avoid malloc in easy case */
+    int noas;			/* number of arcs used in first arcbatch */
+};
+
+struct nfa {
+    struct state *pre;		/* pre-initial state */
+    struct state *init;		/* initial state */
+    struct state *final;	/* final state */
+    struct state *post;		/* post-final state */
+    int nstates;		/* for numbering states */
+    struct state *states;	/* state-chain header */
+    struct state *slast;	/* tail of the chain */
+    struct state *free;		/* free list */
+    struct colormap *cm;	/* the color map */
+    color bos[2];		/* colors, if any, assigned to BOS and BOL */
+    color eos[2];		/* colors, if any, assigned to EOS and EOL */
+    size_t size;		/* Current NFA size; differs from nstates as
+				 * it also counts the number of states created
+				 * by children of this state. */
+    struct vars *v;		/* simplifies compile error reporting */
+    struct nfa *parent;		/* parent NFA, if any */
+};
+
+/*
+ * definitions for compacted NFA
+ */
+
+struct carc {
+    color co;			/* COLORLESS is list terminator */
+    int to;			/* state number */
+};
+
+struct cnfa {
+    int nstates;		/* number of states */
+    int ncolors;		/* number of colors */
+    int flags;
+#define	HASLACONS	01	/* uses lookahead constraints */
+    int pre;			/* setup state number */
+    int post;			/* teardown state number */
+    color bos[2];		/* colors, if any, assigned to BOS and BOL */
+    color eos[2];		/* colors, if any, assigned to EOS and EOL */
+    struct carc **states;	/* vector of pointers to outarc lists */
+    struct carc *arcs;		/* the area for the lists */
+};
+#define	ZAPCNFA(cnfa)	((cnfa).nstates = 0)
+#define	NULLCNFA(cnfa)	((cnfa).nstates == 0)
+
+/*
+ * Used to limit the maximum NFA size to something sane. [Bug 1810264]
+ */
+
+#ifndef REG_MAX_STATES
+#   define REG_MAX_STATES	100000
+#endif
+
+/*
+ * subexpression tree
+ */
+
+struct subre {
+    char op;			/* '|', '.' (concat), 'b' (backref), '(',
+				 * '=' */
+    char flags;
+#define	LONGER	01		/* prefers longer match */
+#define	SHORTER	02		/* prefers shorter match */
+#define	MIXED	04		/* mixed preference below */
+#define	CAP	010		/* capturing parens below */
+#define	BACKR	020		/* back reference below */
+#define	INUSE	0100		/* in use in final tree */
+#define	LOCAL	03		/* bits which may not propagate up */
+#define	LMIX(f)	((f)<<2)	/* LONGER -> MIXED */
+#define	SMIX(f)	((f)<<1)	/* SHORTER -> MIXED */
+#define	UP(f)	(((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED))
+#define	MESSY(f)	((f)&(MIXED|CAP|BACKR))
+#define	PREF(f)	((f)&LOCAL)
+#define	PREF2(f1, f2)	((PREF(f1) != 0) ? PREF(f1) : PREF(f2))
+#define	COMBINE(f1, f2)	(UP((f1)|(f2)) | PREF2(f1, f2))
+    short retry;		/* index into retry memory */
+    int subno;			/* subexpression number (for 'b' and '(') */
+    short min;			/* min repetitions, for backref only */
+    short max;			/* max repetitions, for backref only */
+    struct subre *left;		/* left child, if any (also freelist chain) */
+    struct subre *right;	/* right child, if any */
+    struct state *begin;	/* outarcs from here... */
+    struct state *end;		/* ...ending in inarcs here */
+    struct cnfa cnfa;		/* compacted NFA, if any */
+    struct subre *chain;	/* for bookkeeping and error cleanup */
+};
+
+/*
+ * table of function pointers for generic manipulation functions. A regex_t's
+ * re_fns points to one of these.
+ */
+
+struct fns {
+    VOID FUNCPTR(free, (regex_t *));
+};
+
+/*
+ * the insides of a regex_t, hidden behind a void *
+ */
+
+struct guts {
+    int magic;
+#define	GUTSMAGIC	0xfed9
+    int cflags;			/* copy of compile flags */
+    long info;			/* copy of re_info */
+    size_t nsub;		/* copy of re_nsub */
+    struct subre *tree;
+    struct cnfa search;		/* for fast preliminary search */
+    int ntree;
+    struct colormap cmap;
+    int FUNCPTR(compare, (CONST chr *, CONST chr *, size_t));
+    struct subre *lacons;	/* lookahead-constraint vector */
+    int nlacons;		/* size of lacons */
+};
+
+/*
+ * Magic for allocating a variable workspace. This default version is
+ * stack-hungry.
+ */
+
+#ifndef AllocVars
+#define AllocVars(vPtr) \
+    struct vars var; \
+    register struct vars *vPtr = &var
+#endif
+#ifndef FreeVars
+#define FreeVars(vPtr) ((void) 0)
+#endif
+
+/*
+ * Local Variables:
+ * mode: c
+ * c-basic-offset: 4
+ * fill-column: 78
+ * End:
+ */
diff --git a/src/Makefile.am b/src/Makefile.am
index 5b2572e..b850905 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -13,7 +13,7 @@ include $(top_srcdir)/contrib/scintilla.am
 
 # FIXME: Common flags should be in configure.ac
 AM_CFLAGS = -std=gnu11 -Wall -Wno-initializer-overrides -Wno-unused-value
-AM_CPPFLAGS += -I$(top_srcdir)/contrib/rb3ptr
+AM_CPPFLAGS += -I$(top_srcdir)/contrib/rb3ptr -I$(top_srcdir)/contrib/hsrex
 AM_LDFLAGS =
 
 if STATIC_EXECUTABLES
@@ -57,7 +57,8 @@ libsciteco_base_la_SOURCES = main.c sciteco.h list.h \
 # NOTE: We cannot link in Scintilla (static library) into
 # a libtool convenience library
 libsciteco_base_la_LIBADD = $(LIBSCITECO_INTERFACE) \
-                            $(top_builddir)/contrib/rb3ptr/librb3ptr.la
+                            $(top_builddir)/contrib/rb3ptr/librb3ptr.la \
+                            $(top_builddir)/contrib/hsrex/libhswrex.la
 if REPLACE_MALLOC
 libsciteco_base_la_LIBADD += $(top_builddir)/contrib/dlmalloc/libdlmalloc.la
 endif
diff --git a/src/search.c b/src/search.c
index 01c598e..81d2074 100644
--- a/src/search.c
+++ b/src/search.c
@@ -24,6 +24,13 @@
 #include <glib.h>
 #include <glib/gprintf.h>
 
+/* should always be Henry Spencer's version from contrib/hsrex */
+#define REGEX_STANDALONE
+//#define REGEX_WCHAR
+#include <regalone.h>
+#include <regex.h>
+G_DEFINE_AUTO_CLEANUP_CLEAR_FUNC(regex_t, regfree);
+
 #include "sciteco.h"
 #include "string-utils.h"
 #include "expressions.h"
@@ -463,53 +470,38 @@ teco_pattern2regexp(teco_string_t *pattern, guint codepage, gboolean single_expr
 }
 
 static gboolean
-teco_do_search(GRegex *re, gint from, gint to, gint *count, GError **error)
+teco_do_search(regex_t *re, gsize from, gsize to, gint *count, GError **error)
 {
-	g_autoptr(GMatchInfo) info = NULL;
-	const gchar *buffer = (const gchar *)teco_interface_ssm(SCI_GETCHARACTERPOINTER, 0, 0);
-	GError *tmp_error = NULL;
-
-	/*
-	 * NOTE: The return boolean does NOT signal whether an error was generated.
-	 */
-	g_regex_match_full(re, buffer, (gssize)to, from, 0, &info, &tmp_error);
-	if (tmp_error) {
-		g_propagate_error(error, tmp_error);
-		return FALSE;
-	}
+	regmatch_t info = {.rm_so = from, .rm_eo = to};
+	/* FIXME: avoid moving the gap here */
+	const guchar *buffer = (const guchar *)teco_interface_ssm(SCI_GETCHARACTERPOINTER, 0, 0);
 
 	gint matched_from = -1, matched_to = -1;
 
 	if (*count >= 0) {
-		while (g_match_info_matches(info) && --(*count)) {
-			/*
-			 * NOTE: The return boolean does NOT signal whether an error was generated.
-			 */
-			g_match_info_next(info, &tmp_error);
-			if (tmp_error) {
-				g_propagate_error(error, tmp_error);
-				return FALSE;
-			}
-		}
-
-		if (!*count)
+		gint rc;
+		while ((rc = re_exec(re, buffer+from, to-from, NULL, 1, &info, REG_NOTEOL | REG_NOTBOL)) == REG_OKAY && --(*count))
+			from += info.rm_eo;
+		if (rc == REG_OKAY) {
 			/* successful */
-			g_match_info_fetch_pos(info, 0,
-					       &matched_from, &matched_to);
+			matched_from = from+info.rm_so;
+			matched_to = from+info.rm_eo;
+		} else if (rc != REG_NOMATCH) {
+			// FIXME: Use regerror()
+			g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED,
+			                    "Error executing regular expression");
+			return FALSE;
+		}
 	} else {
 		/* only keep the last `count' matches, in a circular stack */
-		typedef struct {
-			gint from, to;
-		} teco_range_t;
-
-		gsize matched_size = sizeof(teco_range_t) * -*count;
+		gsize matched_size = sizeof(regmatch_t) * -*count;
 
 		/*
 		 * matched_size could overflow.
 		 * NOTE: Glib 2.48 has g_size_checked_mul() which uses
 		 * compiler intrinsics.
 		 */
-		if (matched_size / sizeof(teco_range_t) != -*count)
+		if (matched_size / sizeof(regmatch_t) != -*count)
 			/* guaranteed to fail either teco_memory_check() or g_malloc() */
 			matched_size = G_MAXSIZE;
 
@@ -522,32 +514,29 @@ teco_do_search(GRegex *re, gint from, gint to, gint *count, GError **error)
 		if (!teco_memory_check(matched_size, error))
 			return FALSE;
 
-		g_autofree teco_range_t *matched = g_malloc(matched_size);
+		g_autofree regmatch_t *matched = g_malloc(matched_size);
 
 		gint matched_total = 0, i = 0;
 
-		while (g_match_info_matches(info)) {
-			g_match_info_fetch_pos(info, 0,
-					       &matched[i].from, &matched[i].to);
-
-			/*
-			 * NOTE: The return boolean does NOT signal whether an error was generated.
-			 */
-			g_match_info_next(info, &tmp_error);
-			if (tmp_error) {
-				g_propagate_error(error, tmp_error);
-				return FALSE;
-			}
-
+		gint rc;
+		while ((rc = re_exec(re, buffer+from, to-from, NULL, 1, &matched[i], REG_NOTEOL | REG_NOTBOL | REG_STARTEND)) == REG_OKAY) {
+			matched[i].rm_so += from;
+			matched[i].rm_eo += from;
+			from = matched[i].rm_eo;
 			i = ++matched_total % -(*count);
 		}
 
 		*count = MIN(*count + matched_total, 0);
-		if (!*count) {
-			/* successful -> i points to stack bottom */
-			matched_from = matched[i].from;
-			matched_to = matched[i].to;
+		if (rc != REG_NOMATCH) {
+			// FIXME: Use regerror()
+			g_set_error_literal(error, TECO_ERROR, TECO_ERROR_FAILED,
+			                    "Error executing regular expression");
+			return FALSE;
 		}
+
+		/* successful -> i points to stack bottom */
+		matched_from = matched[i].rm_so;
+		matched_to = matched[i].rm_eo;
 	}
 
 	if (matched_from >= 0 && matched_to >= 0)
@@ -560,14 +549,11 @@ teco_do_search(GRegex *re, gint from, gint to, gint *count, GError **error)
 static gboolean
 teco_state_search_process(teco_machine_main_t *ctx, const teco_string_t *str, gsize new_chars, GError **error)
 {
-	/* FIXME: Should G_REGEX_OPTIMIZE be added under certain circumstances? */
-	GRegexCompileFlags flags = G_REGEX_CASELESS | G_REGEX_MULTILINE | G_REGEX_DOTALL;
+	gint flags = REG_EXTENDED | REG_ICASE;
 
 	/* this is set in teco_state_search_initial() */
-	if (ctx->expectstring.machine.codepage != SC_CP_UTF8) {
-		/* single byte encoding */
-		flags |= G_REGEX_RAW;
-	} else if (!teco_string_validate_utf8(str)) {
+	if (ctx->expectstring.machine.codepage == SC_CP_UTF8 &&
+	    !teco_string_validate_utf8(str)) {
 		/*
 		 * While SciTECO code is always guaranteed to be in valid UTF-8,
 		 * the result of string building may not (eg. if ^EQq inserts garbage).
@@ -588,7 +574,7 @@ teco_state_search_process(teco_machine_main_t *ctx, const teco_string_t *str, gs
 	    !search_reg->vtable->set_integer(search_reg, TECO_FAILURE, error))
 		return FALSE;
 
-	g_autoptr(GRegex) re = NULL;
+	g_auto(regex_t) re = {0};
 	teco_string_t pattern = *str;
 	g_autofree gchar *re_pattern;
 	/* NOTE: teco_pattern2regexp() modifies str pointer */
@@ -602,10 +588,18 @@ teco_state_search_process(teco_machine_main_t *ctx, const teco_string_t *str, gs
 	if (!*re_pattern)
 		goto failure;
 	/*
-	 * FIXME: Should we propagate at least some of the errors?
+	 * FIXME: We don't have to escape null characters in re_pattern.
 	 */
-	re = g_regex_new(re_pattern, flags, 0, NULL);
-	if (!re)
+#if 0
+	gint rc = ctx->expectstring.machine.codepage == SC_CP_UTF8
+			? re_wcomp(&re, re_pattern, strlen(re_pattern), flags)
+			: re_comp(&re, re_pattern, strlen(re_pattern), flags);
+#endif
+	// FIXME: Apparently this is the ASCII-only version, while re_wcomp() is the widechar version
+	// which expects UTF-32.
+	// This means that teco_pattern2regexp() would have to return an UTF-32 version.
+	gint rc = re_comp(&re, re_pattern, strlen(re_pattern), flags);
+	if (rc)
 		goto failure;
 
 	if (!teco_qreg_current &&
@@ -616,7 +610,7 @@ teco_state_search_process(teco_machine_main_t *ctx, const teco_string_t *str, gs
 
 	gint count = teco_search_parameters.count;
 
-	if (!teco_do_search(re, teco_search_parameters.from, teco_search_parameters.to, &count, error))
+	if (!teco_do_search(&re, teco_search_parameters.from, teco_search_parameters.to, &count, error))
 		return FALSE;
 
 	if (teco_search_parameters.to_buffer && count) {
@@ -631,12 +625,12 @@ teco_state_search_process(teco_machine_main_t *ctx, const teco_string_t *str, gs
 				teco_buffer_edit(buffer);
 
 				if (buffer == teco_search_parameters.to_buffer) {
-					if (!teco_do_search(re, 0, teco_search_parameters.dot, &count, error))
+					if (!teco_do_search(&re, 0, teco_search_parameters.dot, &count, error))
 						return FALSE;
 					break;
 				}
 
-				if (!teco_do_search(re, 0, teco_interface_ssm(SCI_GETLENGTH, 0, 0),
+				if (!teco_do_search(&re, 0, teco_interface_ssm(SCI_GETLENGTH, 0, 0),
 				                    &count, error))
 					return FALSE;
 			} while (count);
@@ -646,14 +640,14 @@ teco_state_search_process(teco_machine_main_t *ctx, const teco_string_t *str, gs
 				teco_buffer_edit(buffer);
 
 				if (buffer == teco_search_parameters.to_buffer) {
-					if (!teco_do_search(re, teco_search_parameters.dot,
+					if (!teco_do_search(&re, teco_search_parameters.dot,
 					                    teco_interface_ssm(SCI_GETLENGTH, 0, 0),
 					                    &count, error))
 						return FALSE;
 					break;
 				}
 
-				if (!teco_do_search(re, 0, teco_interface_ssm(SCI_GETLENGTH, 0, 0),
+				if (!teco_do_search(&re, 0, teco_interface_ssm(SCI_GETLENGTH, 0, 0),
 				                    &count, error))
 					return FALSE;
 			} while (count);