aboutsummaryrefslogtreecommitdiffhomepage
path: root/contrib
diff options
context:
space:
mode:
authorRobin Haberkorn <robin.haberkorn@googlemail.com>2024-09-14 19:00:01 +0200
committerRobin Haberkorn <robin.haberkorn@googlemail.com>2024-09-14 19:00:01 +0200
commitab0d97147d8c19eabc41b11698dff13cd04d67ae (patch)
treeb9f7cb00405b41fce77c5df3ac9a7ea0e2ebfc1c /contrib
parent07d9cdfd3d1462f5f19cfa1422d9b5710c9e139d (diff)
downloadsciteco-ab0d97147d8c19eabc41b11698dff13cd04d67ae.tar.gz
imported Henry Spencer's regex implementation from Tcl
Source: github.com/garyhouston/hsrex * This version should be a Thompson NFA, using backtracking only for backreferences, so it should be much safer than PCRE (GRegex). Search times should be linear and there should be no way to cause stack overflows (unless we would generate backreferences). * Importing the lib makes sure we don't add another compile-time dependency. Also, we could implement our own regcomp() which translates directly from TECO patterns. * This is still WIP and currently only works with the ASCII version. The widechar version does not define re_comp() and re_exec(). * Apparently we can't have an ASCII and widechar version at the same time, so we must build two libtool libraries and somehow mangle the names. * Ideally the widechar version will also work with UTF-8 strings. * An alternative might be to import the Gnulib regex module. How does it choose the encoding anyway? * Or we could just use Oniguruma - but this would have to be a new external library dependency.
Diffstat (limited to 'contrib')
-rw-r--r--contrib/hsrex/Makefile.am10
-rw-r--r--contrib/hsrex/regalone.c267
-rw-r--r--contrib/hsrex/regalone.h250
-rw-r--r--contrib/hsrex/regc_color.c848
-rw-r--r--contrib/hsrex/regc_cvec.c146
-rw-r--r--contrib/hsrex/regc_lex.c1185
-rw-r--r--contrib/hsrex/regc_locale.c1163
-rw-r--r--contrib/hsrex/regc_nfa.c1873
-rw-r--r--contrib/hsrex/regcomp.c2169
-rw-r--r--contrib/hsrex/regcustom.h185
-rw-r--r--contrib/hsrex/rege_dfa.c816
-rw-r--r--contrib/hsrex/regerror.c129
-rw-r--r--contrib/hsrex/regerrs.h19
-rw-r--r--contrib/hsrex/regex.h336
-rw-r--r--contrib/hsrex/regexec.c1215
-rw-r--r--contrib/hsrex/regfree.c60
-rw-r--r--contrib/hsrex/regguts.h428
17 files changed, 11099 insertions, 0 deletions
diff --git a/contrib/hsrex/Makefile.am b/contrib/hsrex/Makefile.am
new file mode 100644
index 0000000..11b979a
--- /dev/null
+++ b/contrib/hsrex/Makefile.am
@@ -0,0 +1,10 @@
+# FIXME: We probably need both ASCII and widechar versions
+# as separate libraries.
+AM_CPPFLAGS = -DREGEX_STANDALONE
+# -DREGEX_WCHAR
+
+noinst_LTLIBRARIES = libhswrex.la
+libhswrex_la_SOURCES = regcomp.c regexec.c regerror.c regfree.c regalone.c \
+ regalone.h regcustom.h regerrs.h regex.h regguts.h
+# included from regcomp.c and regexec.c
+EXTRA_libhswrex_la_SOURCES = regc_color.c regc_cvec.c regc_lex.c regc_locale.c regc_nfa.c
diff --git a/contrib/hsrex/regalone.c b/contrib/hsrex/regalone.c
new file mode 100644
index 0000000..e0a5fcc
--- /dev/null
+++ b/contrib/hsrex/regalone.c
@@ -0,0 +1,267 @@
+#ifdef REGEX_WCHAR
+
+#include "regcustom.h"
+
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_DStringInit --
+ *
+ * Initializes a dynamic string, discarding any previous contents of the
+ * string (Tcl_DStringFree should have been called already if the dynamic
+ * string was previously in use).
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * The dynamic string is initialized to be empty.
+ *
+ *----------------------------------------------------------------------
+ */
+
+void
+Tcl_DStringInit(
+ Tcl_DString *dsPtr) /* Pointer to structure for dynamic string. */
+{
+ dsPtr->string = dsPtr->staticSpace;
+ dsPtr->length = 0;
+ dsPtr->spaceAvl = TCL_DSTRING_STATIC_SIZE;
+ dsPtr->staticSpace[0] = '\0';
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_DStringSetLength --
+ *
+ * Change the length of a dynamic string. This can cause the string to
+ * either grow or shrink, depending on the value of length.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * The length of dsPtr is changed to length and a null byte is stored at
+ * that position in the string. If length is larger than the space
+ * allocated for dsPtr, then a panic occurs.
+ *
+ *----------------------------------------------------------------------
+ */
+
+void
+Tcl_DStringSetLength(
+ Tcl_DString *dsPtr, /* Structure describing dynamic string. */
+ int length) /* New length for dynamic string. */
+{
+ int newsize;
+
+ if (length < 0) {
+ length = 0;
+ }
+ if (length >= dsPtr->spaceAvl) {
+ /*
+ * There are two interesting cases here. In the first case, the user
+ * may be trying to allocate a large buffer of a specific size. It
+ * would be wasteful to overallocate that buffer, so we just allocate
+ * enough for the requested size plus the trailing null byte. In the
+ * second case, we are growing the buffer incrementally, so we need
+ * behavior similar to Tcl_DStringAppend. The requested length will
+ * usually be a small delta above the current spaceAvl, so we'll end
+ * up doubling the old size. This won't grow the buffer quite as
+ * quickly, but it should be close enough.
+ */
+
+ newsize = dsPtr->spaceAvl * 2;
+ if (length < newsize) {
+ dsPtr->spaceAvl = newsize;
+ } else {
+ dsPtr->spaceAvl = length + 1;
+ }
+ if (dsPtr->string == dsPtr->staticSpace) {
+ char *newString = ckalloc((unsigned) dsPtr->spaceAvl);
+
+ memcpy(newString, dsPtr->string, (size_t) dsPtr->length);
+ dsPtr->string = newString;
+ } else {
+ dsPtr->string = (char *) ckrealloc((void *) dsPtr->string,
+ (size_t) dsPtr->spaceAvl);
+ }
+ }
+ dsPtr->length = length;
+ dsPtr->string[length] = 0;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_DStringFree --
+ *
+ * Frees up any memory allocated for the dynamic string and reinitializes
+ * the string to an empty state.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * The previous contents of the dynamic string are lost, and the new
+ * value is an empty string.
+ *
+ *----------------------------------------------------------------------
+ */
+
+void
+Tcl_DStringFree(
+ Tcl_DString *dsPtr) /* Structure describing dynamic string. */
+{
+ if (dsPtr->string != dsPtr->staticSpace) {
+ ckfree(dsPtr->string);
+ }
+ dsPtr->string = dsPtr->staticSpace;
+ dsPtr->length = 0;
+ dsPtr->spaceAvl = TCL_DSTRING_STATIC_SIZE;
+ dsPtr->staticSpace[0] = '\0';
+}
+
+
+
+/*
+ * Unicode characters less than this value are represented by themselves in
+ * UTF-8 strings.
+ */
+
+#define UNICODE_SELF 0x80
+
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * Tcl_UniCharToUtf --
+ *
+ * Store the given Tcl_UniChar as a sequence of UTF-8 bytes in the
+ * provided buffer. Equivalent to Plan 9 runetochar().
+ *
+ * Results:
+ * The return values is the number of bytes in the buffer that were
+ * consumed.
+ *
+ * Side effects:
+ * None.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+INLINE int
+Tcl_UniCharToUtf(
+ int ch, /* The Tcl_UniChar to be stored in the
+ * buffer. */
+ char *buf) /* Buffer in which the UTF-8 representation of
+ * the Tcl_UniChar is stored. Buffer must be
+ * large enough to hold the UTF-8 character
+ * (at most TCL_UTF_MAX bytes). */
+{
+ if ((ch > 0) && (ch < UNICODE_SELF)) {
+ buf[0] = (char) ch;
+ return 1;
+ }
+ if (ch >= 0) {
+ if (ch <= 0x7FF) {
+ buf[1] = (char) ((ch | 0x80) & 0xBF);
+ buf[0] = (char) ((ch >> 6) | 0xC0);
+ return 2;
+ }
+ if (ch <= 0xFFFF) {
+ three:
+ buf[2] = (char) ((ch | 0x80) & 0xBF);
+ buf[1] = (char) (((ch >> 6) | 0x80) & 0xBF);
+ buf[0] = (char) ((ch >> 12) | 0xE0);
+ return 3;
+ }
+
+#if TCL_UTF_MAX > 3
+ if (ch <= 0x1FFFFF) {
+ buf[3] = (char) ((ch | 0x80) & 0xBF);
+ buf[2] = (char) (((ch >> 6) | 0x80) & 0xBF);
+ buf[1] = (char) (((ch >> 12) | 0x80) & 0xBF);
+ buf[0] = (char) ((ch >> 18) | 0xF0);
+ return 4;
+ }
+ if (ch <= 0x3FFFFFF) {
+ buf[4] = (char) ((ch | 0x80) & 0xBF);
+ buf[3] = (char) (((ch >> 6) | 0x80) & 0xBF);
+ buf[2] = (char) (((ch >> 12) | 0x80) & 0xBF);
+ buf[1] = (char) (((ch >> 18) | 0x80) & 0xBF);
+ buf[0] = (char) ((ch >> 24) | 0xF8);
+ return 5;
+ }
+ if (ch <= 0x7FFFFFFF) {
+ buf[5] = (char) ((ch | 0x80) & 0xBF);
+ buf[4] = (char) (((ch >> 6) | 0x80) & 0xBF);
+ buf[3] = (char) (((ch >> 12) | 0x80) & 0xBF);
+ buf[2] = (char) (((ch >> 18) | 0x80) & 0xBF);
+ buf[1] = (char) (((ch >> 24) | 0x80) & 0xBF);
+ buf[0] = (char) ((ch >> 30) | 0xFC);
+ return 6;
+ }
+#endif
+ }
+
+ ch = 0xFFFD;
+ goto three;
+}
+
+/*
+ *---------------------------------------------------------------------------
+ *
+ * Tcl_UniCharToUtfDString --
+ *
+ * Convert the given Unicode string to UTF-8.
+ *
+ * Results:
+ * The return value is a pointer to the UTF-8 representation of the
+ * Unicode string. Storage for the return value is appended to the end of
+ * dsPtr.
+ *
+ * Side effects:
+ * None.
+ *
+ *---------------------------------------------------------------------------
+ */
+
+char *
+Tcl_UniCharToUtfDString(
+ const Tcl_UniChar *uniStr, /* Unicode string to convert to UTF-8. */
+ int uniLength, /* Length of Unicode string in Tcl_UniChars
+ * (must be >= 0). */
+ Tcl_DString *dsPtr) /* UTF-8 representation of string is appended
+ * to this previously initialized DString. */
+{
+ const Tcl_UniChar *w, *wEnd;
+ char *p, *string;
+ int oldLength;
+
+ /*
+ * UTF-8 string length in bytes will be <= Unicode string length *
+ * TCL_UTF_MAX.
+ */
+
+ oldLength = Tcl_DStringLength(dsPtr);
+ Tcl_DStringSetLength(dsPtr, (oldLength + uniLength + 1) * TCL_UTF_MAX);
+ string = Tcl_DStringValue(dsPtr) + oldLength;
+
+ p = string;
+ wEnd = uniStr + uniLength;
+ for (w = uniStr; w < wEnd; ) {
+ p += Tcl_UniCharToUtf(*w, p);
+ w++;
+ }
+ Tcl_DStringSetLength(dsPtr, oldLength + (p - string));
+
+ return string;
+}
+
+#endif /* REGEX_WCHAR */
diff --git a/contrib/hsrex/regalone.h b/contrib/hsrex/regalone.h
new file mode 100644
index 0000000..940c11d
--- /dev/null
+++ b/contrib/hsrex/regalone.h
@@ -0,0 +1,250 @@
+
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifndef REGEX_STANDALONE
+# define REGEX_STANDALONE
+#endif
+
+#ifdef REGEX_WCHAR
+# include <wctype.h>
+# include <wchar.h>
+ typedef wchar_t chr;
+ typedef chr Tcl_UniChar;
+#else
+# include <ctype.h>
+ typedef unsigned char chr;
+ typedef wchar_t Tcl_UniChar;
+#endif
+
+/*
+ * In The standalone version we are more concerned with performance,
+ * so an automatic var is our best choice.
+ */
+#define AllocVars(vPtr) \
+ struct vars regex_autovar; \
+ register struct vars *vPtr = &regex_autovar;
+
+#define MALLOC(n) calloc(1,n)
+#define FREE(p) free(VS(p))
+#define REALLOC(p,n) realloc(VS(p),n)
+#define ckalloc(n) calloc(1,n)
+#define ckrealloc(p,n) realloc(p,n)
+#define ckfree(p) free(p)
+
+#ifdef REGEX_WCHAR
+# define Tcl_UniCharToLower(c) towlower(c)
+# define Tcl_UniCharToUpper(c) towupper(c)
+# define Tcl_UniCharToTitle(c) towupper(c)
+# define Tcl_UniCharIsAlpha(c) iswalpha(c)
+# define Tcl_UniCharIsAlnum(c) iswalnum(c)
+# define Tcl_UniCharIsDigit(c) iswdigit(c)
+# define Tcl_UniCharIsSpace(c) iswspace(c)
+#else
+# define Tcl_DStringInit(ds)
+# define Tcl_UniCharToUtfDString(s,l,ds) (s)
+# define Tcl_DStringFree(ds)
+# define Tcl_UniCharToLower(c) tolower(c)
+# define Tcl_UniCharToUpper(c) toupper(c)
+# define Tcl_UniCharToTitle(c) toupper(c)
+# define Tcl_UniCharIsAlpha(c) isalpha(c)
+# define Tcl_UniCharIsAlnum(c) isalnum(c)
+# define Tcl_UniCharIsDigit(c) isdigit(c)
+# define Tcl_UniCharIsSpace(c) isspace(c)
+#endif
+
+
+/*
+ * The maximum number of bytes that are necessary to represent a single
+ * Unicode character in UTF-8. The valid values should be 3 or 6 (or perhaps 1
+ * if we want to support a non-unicode enabled core). If 3, then Tcl_UniChar
+ * must be 2-bytes in size (UCS-2) (the default). If 6, then Tcl_UniChar must
+ * be 4-bytes in size (UCS-4). At this time UCS-2 mode is the default and
+ * recommended mode. UCS-4 is experimental and not recommended. It works for
+ * the core, but most extensions expect UCS-2.
+ */
+
+#ifndef TCL_UTF_MAX
+#define TCL_UTF_MAX 3
+#endif
+
+
+/*
+ * The structure defined below is used to hold dynamic strings. The only
+ * fields that clients should use are string and length, accessible via the
+ * macros Tcl_DStringValue and Tcl_DStringLength.
+ */
+
+#define TCL_DSTRING_STATIC_SIZE 200
+typedef struct Tcl_DString {
+ char *string; /* Points to beginning of string: either
+ * staticSpace below or a malloced array. */
+ int length; /* Number of non-NULL characters in the
+ * string. */
+ int spaceAvl; /* Total number of bytes available for the
+ * string and its terminating NULL char. */
+ char staticSpace[TCL_DSTRING_STATIC_SIZE];
+ /* Space to use in common case where string is
+ * small. */
+} Tcl_DString;
+
+#define Tcl_DStringLength(dsPtr) ((dsPtr)->length)
+#define Tcl_DStringValue(dsPtr) ((dsPtr)->string)
+
+
+/*
+ * The macro below is used to modify a "char" value (e.g. by casting it to an
+ * unsigned character) so that it can be used safely with macros such as
+ * isspace.
+ */
+
+#define UCHAR(c) ((unsigned char) (c))
+
+
+/*
+ * Used to tag functions that are only to be visible within the module being
+ * built and not outside it (where this is supported by the linker).
+ */
+
+#ifndef MODULE_SCOPE
+# ifdef __cplusplus
+# define MODULE_SCOPE extern "C"
+# else
+# define MODULE_SCOPE extern
+# endif
+#endif
+
+
+/*
+ * Macros used to declare a function to be exported by a DLL. Used by Windows,
+ * maps to no-op declarations on non-Windows systems. The default build on
+ * windows is for a DLL, which causes the DLLIMPORT and DLLEXPORT macros to be
+ * nonempty. To build a static library, the macro STATIC_BUILD should be
+ * defined.
+ *
+ * Note: when building static but linking dynamically to MSVCRT we must still
+ * correctly decorate the C library imported function. Use CRTIMPORT
+ * for this purpose. _DLL is defined by the compiler when linking to
+ * MSVCRT.
+ */
+
+#if (defined(__WIN32__) && (defined(_MSC_VER) || (__BORLANDC__ >= 0x0550) || defined(__LCC__) || defined(__WATCOMC__) || (defined(__GNUC__) && defined(__declspec))))
+# define HAVE_DECLSPEC 1
+# ifdef STATIC_BUILD
+# define DLLIMPORT
+# define DLLEXPORT
+# ifdef _DLL
+# define CRTIMPORT __declspec(dllimport)
+# else
+# define CRTIMPORT
+# endif
+# else
+# define DLLIMPORT __declspec(dllimport)
+# define DLLEXPORT __declspec(dllexport)
+# define CRTIMPORT __declspec(dllimport)
+# endif
+#else
+# define DLLIMPORT
+# if defined(__GNUC__) && __GNUC__ > 3
+# define DLLEXPORT __attribute__ ((visibility("default")))
+# else
+# define DLLEXPORT
+# endif
+# define CRTIMPORT
+#endif
+
+/*
+ * These macros are used to control whether functions are being declared for
+ * import or export. If a function is being declared while it is being built
+ * to be included in a shared library, then it should have the DLLEXPORT
+ * storage class. If is being declared for use by a module that is going to
+ * link against the shared library, then it should have the DLLIMPORT storage
+ * class. If the symbol is beind declared for a static build or for use from a
+ * stub library, then the storage class should be empty.
+ *
+ * The convention is that a macro called BUILD_xxxx, where xxxx is the name of
+ * a library we are building, is set on the compile line for sources that are
+ * to be placed in the library. When this macro is set, the storage class will
+ * be set to DLLEXPORT. At the end of the header file, the storage class will
+ * be reset to DLLIMPORT.
+ */
+
+#undef TCL_STORAGE_CLASS
+#ifdef BUILD_tcl
+# define TCL_STORAGE_CLASS DLLEXPORT
+#else
+# ifdef USE_TCL_STUBS
+# define TCL_STORAGE_CLASS
+# else
+# define TCL_STORAGE_CLASS DLLIMPORT
+# endif
+#endif
+
+/*
+ * Definitions that allow this header file to be used either with or without
+ * ANSI C features like function prototypes.
+ */
+
+#undef _ANSI_ARGS_
+#undef CONST
+#ifndef INLINE
+# define INLINE
+#endif
+
+#ifndef NO_CONST
+# define CONST const
+#else
+# define CONST
+#endif
+
+#ifndef NO_PROTOTYPES
+# define _ANSI_ARGS_(x) x
+#else
+# define _ANSI_ARGS_(x) ()
+#endif
+
+#ifdef USE_NON_CONST
+# ifdef USE_COMPAT_CONST
+# error define at most one of USE_NON_CONST and USE_COMPAT_CONST
+# endif
+# define CONST84
+# define CONST84_RETURN
+#else
+# ifdef USE_COMPAT_CONST
+# define CONST84
+# define CONST84_RETURN CONST
+# else
+# define CONST84 CONST
+# define CONST84_RETURN CONST
+# endif
+#endif
+
+#ifndef CONST86
+# define CONST86 CONST
+#endif
+
+/*
+ * Make sure EXTERN isn't defined elsewhere
+ */
+
+#ifdef EXTERN
+# undef EXTERN
+#endif /* EXTERN */
+
+#ifdef __cplusplus
+# define EXTERN extern "C" TCL_STORAGE_CLASS
+#else
+# define EXTERN extern TCL_STORAGE_CLASS
+#endif
+
+
+#ifdef REGEX_WCHAR
+EXTERN void Tcl_DStringFree (Tcl_DString * dsPtr);
+EXTERN void Tcl_DStringInit (Tcl_DString * dsPtr);
+EXTERN char * Tcl_UniCharToUtfDString (CONST Tcl_UniChar * uniStr,
+ int uniLength, Tcl_DString * dsPtr);
+EXTERN void Tcl_DStringSetLength (Tcl_DString * dsPtr,
+ int length);
+#endif /* REGEX_WCHAR */
diff --git a/contrib/hsrex/regc_color.c b/contrib/hsrex/regc_color.c
new file mode 100644
index 0000000..7a98dcb
--- /dev/null
+++ b/contrib/hsrex/regc_color.c
@@ -0,0 +1,848 @@
+/*
+ * colorings of characters
+ * This file is #included by regcomp.c.
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation of
+ * software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Note that there are some incestuous relationships between this code and NFA
+ * arc maintenance, which perhaps ought to be cleaned up sometime.
+ */
+
+#define CISERR() VISERR(cm->v)
+#define CERR(e) VERR(cm->v, (e))
+
+/*
+ - initcm - set up new colormap
+ ^ static void initcm(struct vars *, struct colormap *);
+ */
+static void
+initcm(
+ struct vars *v,
+ struct colormap *cm)
+{
+ int i;
+ int j;
+ union tree *t;
+ union tree *nextt;
+ struct colordesc *cd;
+
+ cm->magic = CMMAGIC;
+ cm->v = v;
+
+ cm->ncds = NINLINECDS;
+ cm->cd = cm->cdspace;
+ cm->max = 0;
+ cm->free = 0;
+
+ cd = cm->cd; /* cm->cd[WHITE] */
+ cd->sub = NOSUB;
+ cd->arcs = NULL;
+ cd->flags = 0;
+ cd->nchrs = CHR_MAX - CHR_MIN + 1;
+
+ /*
+ * Upper levels of tree.
+ */
+
+ for (t=&cm->tree[0], j=NBYTS-1 ; j>0 ; t=nextt, j--) {
+ nextt = t + 1;
+ for (i=BYTTAB-1 ; i>=0 ; i--) {
+ t->tptr[i] = nextt;
+ }
+ }
+
+ /*
+ * Bottom level is solid white.
+ */
+
+ t = &cm->tree[NBYTS-1];
+ for (i=BYTTAB-1 ; i>=0 ; i--) {
+ t->tcolor[i] = WHITE;
+ }
+ cd->block = t;
+}
+
+/*
+ - freecm - free dynamically-allocated things in a colormap
+ ^ static void freecm(struct colormap *);
+ */
+static void
+freecm(
+ struct colormap *cm)
+{
+ size_t i;
+ union tree *cb;
+
+ cm->magic = 0;
+ if (NBYTS > 1) {
+ cmtreefree(cm, cm->tree, 0);
+ }
+ for (i=1 ; i<=cm->max ; i++) { /* skip WHITE */
+ if (!UNUSEDCOLOR(&cm->cd[i])) {
+ cb = cm->cd[i].block;
+ if (cb != NULL) {
+ FREE(cb);
+ }
+ }
+ }
+ if (cm->cd != cm->cdspace) {
+ FREE(cm->cd);
+ }
+}
+
+/*
+ - cmtreefree - free a non-terminal part of a colormap tree
+ ^ static void cmtreefree(struct colormap *, union tree *, int);
+ */
+static void
+cmtreefree(
+ struct colormap *cm,
+ union tree *tree,
+ int level) /* level number (top == 0) of this block */
+{
+ int i;
+ union tree *t;
+ union tree *fillt = &cm->tree[level+1];
+ union tree *cb;
+
+ assert(level < NBYTS-1); /* this level has pointers */
+ for (i=BYTTAB-1 ; i>=0 ; i--) {
+ t = tree->tptr[i];
+ assert(t != NULL);
+ if (t != fillt) {
+ if (level < NBYTS-2) { /* more pointer blocks below */
+ cmtreefree(cm, t, level+1);
+ FREE(t);
+ } else { /* color block below */
+ cb = cm->cd[t->tcolor[0]].block;
+ if (t != cb) { /* not a solid block */
+ FREE(t);
+ }
+ }
+ }
+ }
+}
+
+/*
+ - setcolor - set the color of a character in a colormap
+ ^ static color setcolor(struct colormap *, pchr, pcolor);
+ */
+static color /* previous color */
+setcolor(
+ struct colormap *cm,
+ pchr c,
+ pcolor co)
+{
+ uchr uc = c;
+ int shift;
+ int level;
+ int b;
+ int bottom;
+ union tree *t;
+ union tree *newt;
+ union tree *fillt;
+ union tree *lastt;
+ union tree *cb;
+ color prev;
+
+ assert(cm->magic == CMMAGIC);
+ if (CISERR() || co == COLORLESS) {
+ return COLORLESS;
+ }
+
+ t = cm->tree;
+ for (level=0, shift=BYTBITS*(NBYTS-1) ; shift>0; level++, shift-=BYTBITS){
+ b = (uc >> shift) & BYTMASK;
+ lastt = t;
+ t = lastt->tptr[b];
+ assert(t != NULL);
+ fillt = &cm->tree[level+1];
+ bottom = (shift <= BYTBITS) ? 1 : 0;
+ cb = (bottom) ? cm->cd[t->tcolor[0]].block : fillt;
+ if (t == fillt || t == cb) { /* must allocate a new block */
+ newt = (union tree *) MALLOC((bottom) ?
+ sizeof(struct colors) : sizeof(struct ptrs));
+ if (newt == NULL) {
+ CERR(REG_ESPACE);
+ return COLORLESS;
+ }
+ if (bottom) {
+ memcpy(newt->tcolor, t->tcolor, BYTTAB*sizeof(color));
+ } else {
+ memcpy(newt->tptr, t->tptr, BYTTAB*sizeof(union tree *));
+ }
+ t = newt;
+ lastt->tptr[b] = t;
+ }
+ }
+
+ b = uc & BYTMASK;
+ prev = t->tcolor[b];
+ t->tcolor[b] = (color) co;
+ return prev;
+}
+
+/*
+ - maxcolor - report largest color number in use
+ ^ static color maxcolor(struct colormap *);
+ */
+static color
+maxcolor(
+ struct colormap *cm)
+{
+ if (CISERR()) {
+ return COLORLESS;
+ }
+
+ return (color) cm->max;
+}
+
+/*
+ - newcolor - find a new color (must be subject of setcolor at once)
+ * Beware: may relocate the colordescs.
+ ^ static color newcolor(struct colormap *);
+ */
+static color /* COLORLESS for error */
+newcolor(
+ struct colormap *cm)
+{
+ struct colordesc *cd;
+ size_t n;
+
+ if (CISERR()) {
+ return COLORLESS;
+ }
+
+ if (cm->free != 0) {
+ assert(cm->free > 0);
+ assert((size_t) cm->free < cm->ncds);
+ cd = &cm->cd[cm->free];
+ assert(UNUSEDCOLOR(cd));
+ assert(cd->arcs == NULL);
+ cm->free = cd->sub;
+ } else if (cm->max < cm->ncds - 1) {
+ cm->max++;
+ cd = &cm->cd[cm->max];
+ } else {
+ struct colordesc *newCd;
+
+ /*
+ * Oops, must allocate more.
+ */
+
+ n = cm->ncds * 2;
+ if (cm->cd == cm->cdspace) {
+ newCd = (struct colordesc *) MALLOC(n * sizeof(struct colordesc));
+ if (newCd != NULL) {
+ memcpy(newCd, cm->cdspace,
+ cm->ncds * sizeof(struct colordesc));
+ }
+ } else {
+ newCd = (struct colordesc *)
+ REALLOC(cm->cd, n * sizeof(struct colordesc));
+ }
+ if (newCd == NULL) {
+ CERR(REG_ESPACE);
+ return COLORLESS;
+ }
+ cm->cd = newCd;
+ cm->ncds = n;
+ assert(cm->max < cm->ncds - 1);
+ cm->max++;
+ cd = &cm->cd[cm->max];
+ }
+
+ cd->nchrs = 0;
+ cd->sub = NOSUB;
+ cd->arcs = NULL;
+ cd->flags = 0;
+ cd->block = NULL;
+
+ return (color) (cd - cm->cd);
+}
+
+/*
+ - freecolor - free a color (must have no arcs or subcolor)
+ ^ static void freecolor(struct colormap *, pcolor);
+ */
+static void
+freecolor(
+ struct colormap *cm,
+ pcolor co)
+{
+ struct colordesc *cd = &cm->cd[co];
+ color pco, nco; /* for freelist scan */
+
+ assert(co >= 0);
+ if (co == WHITE) {
+ return;
+ }
+
+ assert(cd->arcs == NULL);
+ assert(cd->sub == NOSUB);
+ assert(cd->nchrs == 0);
+ cd->flags = FREECOL;
+ if (cd->block != NULL) {
+ FREE(cd->block);
+ cd->block = NULL; /* just paranoia */
+ }
+
+ if ((size_t) co == cm->max) {
+ while (cm->max > WHITE && UNUSEDCOLOR(&cm->cd[cm->max])) {
+ cm->max--;
+ }
+ assert(cm->free >= 0);
+ while ((size_t) cm->free > cm->max) {
+ cm->free = cm->cd[cm->free].sub;
+ }
+ if (cm->free > 0) {
+ assert(cm->free < cm->max);
+ pco = cm->free;
+ nco = cm->cd[pco].sub;
+ while (nco > 0) {
+ if ((size_t) nco > cm->max) {
+ /*
+ * Take this one out of freelist.
+ */
+
+ nco = cm->cd[nco].sub;
+ cm->cd[pco].sub = nco;
+ } else {
+ assert(nco < cm->max);
+ pco = nco;
+ nco = cm->cd[pco].sub;
+ }
+ }
+ }
+ } else {
+ cd->sub = cm->free;
+ cm->free = (color) (cd - cm->cd);
+ }
+}
+
+/*
+ - pseudocolor - allocate a false color, to be managed by other means
+ ^ static color pseudocolor(struct colormap *);
+ */
+static color
+pseudocolor(
+ struct colormap *cm)
+{
+ color co;
+
+ co = newcolor(cm);
+ if (CISERR()) {
+ return COLORLESS;
+ }
+ cm->cd[co].nchrs = 1;
+ cm->cd[co].flags = PSEUDO;
+ return co;
+}
+
+/*
+ - subcolor - allocate a new subcolor (if necessary) to this chr
+ ^ static color subcolor(struct colormap *, pchr c);
+ */
+static color
+subcolor(
+ struct colormap *cm,
+ pchr c)
+{
+ color co; /* current color of c */
+ color sco; /* new subcolor */
+
+ co = GETCOLOR(cm, c);
+ sco = newsub(cm, co);
+ if (CISERR()) {
+ return COLORLESS;
+ }
+ assert(sco != COLORLESS);
+
+ if (co == sco) { /* already in an open subcolor */
+ return co; /* rest is redundant */
+ }
+ cm->cd[co].nchrs--;
+ cm->cd[sco].nchrs++;
+ setcolor(cm, c, sco);
+ return sco;
+}
+
+/*
+ - newsub - allocate a new subcolor (if necessary) for a color
+ ^ static color newsub(struct colormap *, pcolor);
+ */
+static color
+newsub(
+ struct colormap *cm,
+ pcolor co)
+{
+ color sco; /* new subcolor */
+
+ sco = cm->cd[co].sub;
+ if (sco == NOSUB) { /* color has no open subcolor */
+ if (cm->cd[co].nchrs == 1) { /* optimization */
+ return co;
+ }
+ sco = newcolor(cm); /* must create subcolor */
+ if (sco == COLORLESS) {
+ assert(CISERR());
+ return COLORLESS;
+ }
+ cm->cd[co].sub = sco;
+ cm->cd[sco].sub = sco; /* open subcolor points to self */
+ }
+ assert(sco != NOSUB);
+
+ return sco;
+}
+
+/*
+ - subrange - allocate new subcolors to this range of chrs, fill in arcs
+ ^ static void subrange(struct vars *, pchr, pchr, struct state *,
+ ^ struct state *);
+ */
+static void
+subrange(
+ struct vars *v,
+ pchr from,
+ pchr to,
+ struct state *lp,
+ struct state *rp)
+{
+ uchr uf;
+ int i;
+
+ assert(from <= to);
+
+ /*
+ * First, align "from" on a tree-block boundary
+ */
+
+ uf = (uchr) from;
+ i = (int) (((uf + BYTTAB - 1) & (uchr) ~BYTMASK) - uf);
+ for (; from<=to && i>0; i--, from++) {
+ newarc(v->nfa, PLAIN, subcolor(v->cm, from), lp, rp);
+ }
+ if (from > to) { /* didn't reach a boundary */
+ return;
+ }
+
+ /*
+ * Deal with whole blocks.
+ */
+
+ for (; to-from>=BYTTAB ; from+=BYTTAB) {
+ subblock(v, from, lp, rp);
+ }
+
+ /*
+ * Clean up any remaining partial table.
+ */
+
+ for (; from<=to ; from++) {
+ newarc(v->nfa, PLAIN, subcolor(v->cm, from), lp, rp);
+ }
+}
+
+/*
+ - subblock - allocate new subcolors for one tree block of chrs, fill in arcs
+ ^ static void subblock(struct vars *, pchr, struct state *, struct state *);
+ */
+static void
+subblock(
+ struct vars *v,
+ pchr start, /* first of BYTTAB chrs */
+ struct state *lp,
+ struct state *rp)
+{
+ uchr uc = start;
+ struct colormap *cm = v->cm;
+ int shift;
+ int level;
+ int i;
+ int b;
+ union tree *t;
+ union tree *cb;
+ union tree *fillt;
+ union tree *lastt;
+ int previ;
+ int ndone;
+ color co;
+ color sco;
+
+ assert((uc % BYTTAB) == 0);
+
+ /*
+ * Find its color block, making new pointer blocks as needed.
+ */
+
+ t = cm->tree;
+ fillt = NULL;
+ for (level=0, shift=BYTBITS*(NBYTS-1); shift>0; level++, shift-=BYTBITS) {
+ b = (uc >> shift) & BYTMASK;
+ lastt = t;
+ t = lastt->tptr[b];
+ assert(t != NULL);
+ fillt = &cm->tree[level+1];
+ if (t == fillt && shift > BYTBITS) { /* need new ptr block */
+ t = (union tree *) MALLOC(sizeof(struct ptrs));
+ if (t == NULL) {
+ CERR(REG_ESPACE);
+ return;
+ }
+ memcpy(t->tptr, fillt->tptr, BYTTAB*sizeof(union tree *));
+ lastt->tptr[b] = t;
+ }
+ }
+
+ /*
+ * Special cases: fill block or solid block.
+ */
+ co = t->tcolor[0];
+ cb = cm->cd[co].block;
+ if (t == fillt || t == cb) {
+ /*
+ * Either way, we want a subcolor solid block.
+ */
+
+ sco = newsub(cm, co);
+ t = cm->cd[sco].block;
+ if (t == NULL) { /* must set it up */
+ t = (union tree *) MALLOC(sizeof(struct colors));
+ if (t == NULL) {
+ CERR(REG_ESPACE);
+ return;
+ }
+ for (i=0 ; i<BYTTAB ; i++) {
+ t->tcolor[i] = sco;
+ }
+ cm->cd[sco].block = t;
+ }
+
+ /*
+ * Find loop must have run at least once.
+ */
+
+ lastt->tptr[b] = t;
+ newarc(v->nfa, PLAIN, sco, lp, rp);
+ cm->cd[co].nchrs -= BYTTAB;
+ cm->cd[sco].nchrs += BYTTAB;
+ return;
+ }
+
+ /*
+ * General case, a mixed block to be altered.
+ */
+
+ i = 0;
+ while (i < BYTTAB) {
+ co = t->tcolor[i];
+ sco = newsub(cm, co);
+ newarc(v->nfa, PLAIN, sco, lp, rp);
+ previ = i;
+ do {
+ t->tcolor[i++] = sco;
+ } while (i < BYTTAB && t->tcolor[i] == co);
+ ndone = i - previ;
+ cm->cd[co].nchrs -= ndone;
+ cm->cd[sco].nchrs += ndone;
+ }
+}
+
+/*
+ - okcolors - promote subcolors to full colors
+ ^ static void okcolors(struct nfa *, struct colormap *);
+ */
+static void
+okcolors(
+ struct nfa *nfa,
+ struct colormap *cm)
+{
+ struct colordesc *cd;
+ struct colordesc *end = CDEND(cm);
+ struct colordesc *scd;
+ struct arc *a;
+ color co;
+ color sco;
+
+ for (cd=cm->cd, co=0 ; cd<end ; cd++, co++) {
+ sco = cd->sub;
+ if (UNUSEDCOLOR(cd) || sco == NOSUB) {
+ /*
+ * Has no subcolor, no further action.
+ */
+ } else if (sco == co) {
+ /*
+ * Is subcolor, let parent deal with it.
+ */
+ } else if (cd->nchrs == 0) {
+ /*
+ * Parent empty, its arcs change color to subcolor.
+ */
+
+ cd->sub = NOSUB;
+ scd = &cm->cd[sco];
+ assert(scd->nchrs > 0);
+ assert(scd->sub == sco);
+ scd->sub = NOSUB;
+ while ((a = cd->arcs) != NULL) {
+ assert(a->co == co);
+ uncolorchain(cm, a);
+ a->co = sco;
+ colorchain(cm, a);
+ }
+ freecolor(cm, co);
+ } else {
+ /*
+ * Parent's arcs must gain parallel subcolor arcs.
+ */
+
+ cd->sub = NOSUB;
+ scd = &cm->cd[sco];
+ assert(scd->nchrs > 0);
+ assert(scd->sub == sco);
+ scd->sub = NOSUB;
+ for (a=cd->arcs ; a!=NULL ; a=a->colorchain) {
+ assert(a->co == co);
+ newarc(nfa, a->type, sco, a->from, a->to);
+ }
+ }
+ }
+}
+
+/*
+ - colorchain - add this arc to the color chain of its color
+ ^ static void colorchain(struct colormap *, struct arc *);
+ */
+static void
+colorchain(
+ struct colormap *cm,
+ struct arc *a)
+{
+ struct colordesc *cd = &cm->cd[a->co];
+
+ if (cd->arcs != NULL) {
+ cd->arcs->colorchainRev = a;
+ }
+ a->colorchain = cd->arcs;
+ a->colorchainRev = NULL;
+ cd->arcs = a;
+}
+
+/*
+ - uncolorchain - delete this arc from the color chain of its color
+ ^ static void uncolorchain(struct colormap *, struct arc *);
+ */
+static void
+uncolorchain(
+ struct colormap *cm,
+ struct arc *a)
+{
+ struct colordesc *cd = &cm->cd[a->co];
+ struct arc *aa = a->colorchainRev;
+
+ if (aa == NULL) {
+ assert(cd->arcs == a);
+ cd->arcs = a->colorchain;
+ } else {
+ assert(aa->colorchain == a);
+ aa->colorchain = a->colorchain;
+ }
+ if (a->colorchain != NULL) {
+ a->colorchain->colorchainRev = aa;
+ }
+ a->colorchain = NULL; /* paranoia */
+ a->colorchainRev = NULL;
+}
+
+/*
+ - rainbow - add arcs of all full colors (but one) between specified states
+ ^ static void rainbow(struct nfa *, struct colormap *, int, pcolor,
+ ^ struct state *, struct state *);
+ */
+static void
+rainbow(
+ struct nfa *nfa,
+ struct colormap *cm,
+ int type,
+ pcolor but, /* COLORLESS if no exceptions */
+ struct state *from,
+ struct state *to)
+{
+ struct colordesc *cd;
+ struct colordesc *end = CDEND(cm);
+ color co;
+
+ for (cd=cm->cd, co=0 ; cd<end && !CISERR(); cd++, co++) {
+ if (!UNUSEDCOLOR(cd) && (cd->sub != co) && (co != but)
+ && !(cd->flags&PSEUDO)) {
+ newarc(nfa, type, co, from, to);
+ }
+ }
+}
+
+/*
+ - colorcomplement - add arcs of complementary colors
+ * The calling sequence ought to be reconciled with cloneouts().
+ ^ static void colorcomplement(struct nfa *, struct colormap *, int,
+ ^ struct state *, struct state *, struct state *);
+ */
+static void
+colorcomplement(
+ struct nfa *nfa,
+ struct colormap *cm,
+ int type,
+ struct state *of, /* complements of this guy's PLAIN outarcs */
+ struct state *from,
+ struct state *to)
+{
+ struct colordesc *cd;
+ struct colordesc *end = CDEND(cm);
+ color co;
+
+ assert(of != from);
+ for (cd=cm->cd, co=0 ; cd<end && !CISERR() ; cd++, co++) {
+ if (!UNUSEDCOLOR(cd) && !(cd->flags&PSEUDO)) {
+ if (findarc(of, PLAIN, co) == NULL) {
+ newarc(nfa, type, co, from, to);
+ }
+ }
+ }
+}
+
+#ifdef REG_DEBUG
+/*
+ ^ #ifdef REG_DEBUG
+ */
+
+/*
+ - dumpcolors - debugging output
+ ^ static void dumpcolors(struct colormap *, FILE *);
+ */
+static void
+dumpcolors(
+ struct colormap *cm,
+ FILE *f)
+{
+ struct colordesc *cd;
+ struct colordesc *end;
+ color co;
+ chr c;
+ char *has;
+
+ fprintf(f, "max %ld\n", (long) cm->max);
+ if (NBYTS > 1) {
+ fillcheck(cm, cm->tree, 0, f);
+ }
+ end = CDEND(cm);
+ for (cd=cm->cd+1, co=1 ; cd<end ; cd++, co++) { /* skip 0 */
+ if (!UNUSEDCOLOR(cd)) {
+ assert(cd->nchrs > 0);
+ has = (cd->block != NULL) ? "#" : "";
+ if (cd->flags&PSEUDO) {
+ fprintf(f, "#%2ld%s(ps): ", (long) co, has);
+ } else {
+ fprintf(f, "#%2ld%s(%2d): ", (long) co, has, cd->nchrs);
+ }
+
+ /*
+ * It's hard to do this more efficiently.
+ */
+
+ for (c=CHR_MIN ; c<CHR_MAX ; c++) {
+ if (GETCOLOR(cm, c) == co) {
+ dumpchr(c, f);
+ }
+ }
+ assert(c == CHR_MAX);
+ if (GETCOLOR(cm, c) == co) {
+ dumpchr(c, f);
+ }
+ fprintf(f, "\n");
+ }
+ }
+}
+
+/*
+ - fillcheck - check proper filling of a tree
+ ^ static void fillcheck(struct colormap *, union tree *, int, FILE *);
+ */
+static void
+fillcheck(
+ struct colormap *cm,
+ union tree *tree,
+ int level, /* level number (top == 0) of this block */
+ FILE *f)
+{
+ int i;
+ union tree *t;
+ union tree *fillt = &cm->tree[level+1];
+
+ assert(level < NBYTS-1); /* this level has pointers */
+ for (i=BYTTAB-1 ; i>=0 ; i--) {
+ t = tree->tptr[i];
+ if (t == NULL) {
+ fprintf(f, "NULL found in filled tree!\n");
+ } else if (t == fillt) {
+ /* empty body */
+ } else if (level < NBYTS-2) { /* more pointer blocks below */
+ fillcheck(cm, t, level+1, f);
+ }
+ }
+}
+
+/*
+ - dumpchr - print a chr
+ * Kind of char-centric but works well enough for debug use.
+ ^ static void dumpchr(pchr, FILE *);
+ */
+static void
+dumpchr(
+ pchr c,
+ FILE *f)
+{
+ if (c == '\\') {
+ fprintf(f, "\\\\");
+ } else if (c > ' ' && c <= '~') {
+ putc((char) c, f);
+ } else {
+ fprintf(f, "\\u%04lx", (long) c);
+ }
+}
+
+/*
+ ^ #endif
+ */
+#endif /* ifdef REG_DEBUG */
+
+/*
+ * Local Variables:
+ * mode: c
+ * c-basic-offset: 4
+ * fill-column: 78
+ * End:
+ */
diff --git a/contrib/hsrex/regc_cvec.c b/contrib/hsrex/regc_cvec.c
new file mode 100644
index 0000000..0247521
--- /dev/null
+++ b/contrib/hsrex/regc_cvec.c
@@ -0,0 +1,146 @@
+/*
+ * Utility functions for handling cvecs
+ * This file is #included by regcomp.c.
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation of
+ * software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Notes:
+ * Only (selected) functions in _this_ file should treat chr* as non-constant.
+ */
+
+/*
+ - newcvec - allocate a new cvec
+ ^ static struct cvec *newcvec(int, int);
+ */
+static struct cvec *
+newcvec(
+ int nchrs, /* to hold this many chrs... */
+ int nranges) /* ... and this many ranges... */
+{
+ size_t nc = (size_t)nchrs + (size_t)nranges*2;
+ size_t n = sizeof(struct cvec) + nc*sizeof(chr);
+ struct cvec *cv = (struct cvec *) MALLOC(n);
+
+ if (cv == NULL) {
+ return NULL;
+ }
+ cv->chrspace = nchrs;
+ cv->chrs = (chr *)(((char *)cv)+sizeof(struct cvec));
+ cv->ranges = cv->chrs + nchrs;
+ cv->rangespace = nranges;
+ return clearcvec(cv);
+}
+
+/*
+ - clearcvec - clear a possibly-new cvec
+ * Returns pointer as convenience.
+ ^ static struct cvec *clearcvec(struct cvec *);
+ */
+static struct cvec *
+clearcvec(
+ struct cvec *cv) /* character vector */
+{
+ assert(cv != NULL);
+ cv->nchrs = 0;
+ cv->nranges = 0;
+ return cv;
+}
+
+/*
+ - addchr - add a chr to a cvec
+ ^ static void addchr(struct cvec *, pchr);
+ */
+static void
+addchr(
+ struct cvec *cv, /* character vector */
+ pchr c) /* character to add */
+{
+ cv->chrs[cv->nchrs++] = (chr)c;
+}
+
+/*
+ - addrange - add a range to a cvec
+ ^ static void addrange(struct cvec *, pchr, pchr);
+ */
+static void
+addrange(
+ struct cvec *cv, /* character vector */
+ pchr from, /* first character of range */
+ pchr to) /* last character of range */
+{
+ assert(cv->nranges < cv->rangespace);
+ cv->ranges[cv->nranges*2] = (chr)from;
+ cv->ranges[cv->nranges*2 + 1] = (chr)to;
+ cv->nranges++;
+}
+
+/*
+ - getcvec - get a cvec, remembering it as v->cv
+ ^ static struct cvec *getcvec(struct vars *, int, int);
+ */
+static struct cvec *
+getcvec(
+ struct vars *v, /* context */
+ int nchrs, /* to hold this many chrs... */
+ int nranges) /* ... and this many ranges... */
+{
+ if ((v->cv != NULL) && (nchrs <= v->cv->chrspace) &&
+ (nranges <= v->cv->rangespace)) {
+ return clearcvec(v->cv);
+ }
+
+ if (v->cv != NULL) {
+ freecvec(v->cv);
+ }
+ v->cv = newcvec(nchrs, nranges);
+ if (v->cv == NULL) {
+ ERR(REG_ESPACE);
+ }
+
+ return v->cv;
+}
+
+/*
+ - freecvec - free a cvec
+ ^ static void freecvec(struct cvec *);
+ */
+static void
+freecvec(
+ struct cvec *cv) /* character vector */
+{
+ FREE(cv);
+}
+
+/*
+ * Local Variables:
+ * mode: c
+ * c-basic-offset: 4
+ * fill-column: 78
+ * End:
+ */
diff --git a/contrib/hsrex/regc_lex.c b/contrib/hsrex/regc_lex.c
new file mode 100644
index 0000000..4be02c6
--- /dev/null
+++ b/contrib/hsrex/regc_lex.c
@@ -0,0 +1,1185 @@
+/*
+ * lexical analyzer
+ * This file is #included by regcomp.c.
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation of
+ * software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* scanning macros (know about v) */
+#define ATEOS() (v->now >= v->stop)
+#define HAVE(n) (v->stop - v->now >= (n))
+#define NEXT1(c) (!ATEOS() && *v->now == CHR(c))
+#define NEXT2(a,b) (HAVE(2) && *v->now == CHR(a) && *(v->now+1) == CHR(b))
+#define NEXT3(a,b,c) \
+ (HAVE(3) && *v->now == CHR(a) && \
+ *(v->now+1) == CHR(b) && \
+ *(v->now+2) == CHR(c))
+#define SET(c) (v->nexttype = (c))
+#define SETV(c, n) (v->nexttype = (c), v->nextvalue = (n))
+#define RET(c) return (SET(c), 1)
+#define RETV(c, n) return (SETV(c, n), 1)
+#define FAILW(e) return (ERR(e), 0) /* ERR does SET(EOS) */
+#define LASTTYPE(t) (v->lasttype == (t))
+
+/* lexical contexts */
+#define L_ERE 1 /* mainline ERE/ARE */
+#define L_BRE 2 /* mainline BRE */
+#define L_Q 3 /* REG_QUOTE */
+#define L_EBND 4 /* ERE/ARE bound */
+#define L_BBND 5 /* BRE bound */
+#define L_BRACK 6 /* brackets */
+#define L_CEL 7 /* collating element */
+#define L_ECL 8 /* equivalence class */
+#define L_CCL 9 /* character class */
+#define INTOCON(c) (v->lexcon = (c))
+#define INCON(con) (v->lexcon == (con))
+
+/* construct pointer past end of chr array */
+#define ENDOF(array) ((array) + sizeof(array)/sizeof(chr))
+
+/*
+ - lexstart - set up lexical stuff, scan leading options
+ ^ static void lexstart(struct vars *);
+ */
+static void
+lexstart(
+ struct vars *v)
+{
+ prefixes(v); /* may turn on new type bits etc. */
+ NOERR();
+
+ if (v->cflags&REG_QUOTE) {
+ assert(!(v->cflags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE)));
+ INTOCON(L_Q);
+ } else if (v->cflags&REG_EXTENDED) {
+ assert(!(v->cflags&REG_QUOTE));
+ INTOCON(L_ERE);
+ } else {
+ assert(!(v->cflags&(REG_QUOTE|REG_ADVF)));
+ INTOCON(L_BRE);
+ }
+
+ v->nexttype = EMPTY; /* remember we were at the start */
+ next(v); /* set up the first token */
+}
+
+/*
+ - prefixes - implement various special prefixes
+ ^ static void prefixes(struct vars *);
+ */
+static void
+prefixes(
+ struct vars *v)
+{
+ /*
+ * Literal string doesn't get any of this stuff.
+ */
+
+ if (v->cflags&REG_QUOTE) {
+ return;
+ }
+
+ /*
+ * Initial "***" gets special things.
+ */
+
+ if (HAVE(4) && NEXT3('*', '*', '*')) {
+ switch (*(v->now + 3)) {
+ case CHR('?'): /* "***?" error, msg shows version */
+ ERR(REG_BADPAT);
+ return; /* proceed no further */
+ break;
+ case CHR('='): /* "***=" shifts to literal string */
+ NOTE(REG_UNONPOSIX);
+ v->cflags |= REG_QUOTE;
+ v->cflags &= ~(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE);
+ v->now += 4;
+ return; /* and there can be no more prefixes */
+ break;
+ case CHR(':'): /* "***:" shifts to AREs */
+ NOTE(REG_UNONPOSIX);
+ v->cflags |= REG_ADVANCED;
+ v->now += 4;
+ break;
+ default: /* otherwise *** is just an error */
+ ERR(REG_BADRPT);
+ return;
+ break;
+ }
+ }
+
+ /*
+ * BREs and EREs don't get embedded options.
+ */
+
+ if ((v->cflags&REG_ADVANCED) != REG_ADVANCED) {
+ return;
+ }
+
+ /*
+ * Embedded options (AREs only).
+ */
+
+ if (HAVE(3) && NEXT2('(', '?') && iscalpha(*(v->now + 2))) {
+ NOTE(REG_UNONPOSIX);
+ v->now += 2;
+ for (; !ATEOS() && iscalpha(*v->now); v->now++) {
+ switch (*v->now) {
+ case CHR('b'): /* BREs (but why???) */
+ v->cflags &= ~(REG_ADVANCED|REG_QUOTE);
+ break;
+ case CHR('c'): /* case sensitive */
+ v->cflags &= ~REG_ICASE;
+ break;
+ case CHR('e'): /* plain EREs */
+ v->cflags |= REG_EXTENDED;
+ v->cflags &= ~(REG_ADVF|REG_QUOTE);
+ break;
+ case CHR('i'): /* case insensitive */
+ v->cflags |= REG_ICASE;
+ break;
+ case CHR('m'): /* Perloid synonym for n */
+ case CHR('n'): /* \n affects ^ $ . [^ */
+ v->cflags |= REG_NEWLINE;
+ break;
+ case CHR('p'): /* ~Perl, \n affects . [^ */
+ v->cflags |= REG_NLSTOP;
+ v->cflags &= ~REG_NLANCH;
+ break;
+ case CHR('q'): /* literal string */
+ v->cflags |= REG_QUOTE;
+ v->cflags &= ~REG_ADVANCED;
+ break;
+ case CHR('s'): /* single line, \n ordinary */
+ v->cflags &= ~REG_NEWLINE;
+ break;
+ case CHR('t'): /* tight syntax */
+ v->cflags &= ~REG_EXPANDED;
+ break;
+ case CHR('w'): /* weird, \n affects ^ $ only */
+ v->cflags &= ~REG_NLSTOP;
+ v->cflags |= REG_NLANCH;
+ break;
+ case CHR('x'): /* expanded syntax */
+ v->cflags |= REG_EXPANDED;
+ break;
+ default:
+ ERR(REG_BADOPT);
+ return;
+ }
+ }
+ if (!NEXT1(')')) {
+ ERR(REG_BADOPT);
+ return;
+ }
+ v->now++;
+ if (v->cflags&REG_QUOTE) {
+ v->cflags &= ~(REG_EXPANDED|REG_NEWLINE);
+ }
+ }
+}
+
+/*
+ - lexnest - "call a subroutine", interpolating string at the lexical level
+ * Note, this is not a very general facility. There are a number of
+ * implicit assumptions about what sorts of strings can be subroutines.
+ ^ static void lexnest(struct vars *, const chr *, const chr *);
+ */
+static void
+lexnest(
+ struct vars *v,
+ const chr *beginp, /* start of interpolation */
+ const chr *endp) /* one past end of interpolation */
+{
+ assert(v->savenow == NULL); /* only one level of nesting */
+ v->savenow = v->now;
+ v->savestop = v->stop;
+ v->now = beginp;
+ v->stop = endp;
+}
+
+/*
+ * string constants to interpolate as expansions of things like \d
+ */
+
+static const chr backd[] = { /* \d */
+ CHR('['), CHR('['), CHR(':'),
+ CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
+ CHR(':'), CHR(']'), CHR(']')
+};
+static const chr backD[] = { /* \D */
+ CHR('['), CHR('^'), CHR('['), CHR(':'),
+ CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
+ CHR(':'), CHR(']'), CHR(']')
+};
+static const chr brbackd[] = { /* \d within brackets */
+ CHR('['), CHR(':'),
+ CHR('d'), CHR('i'), CHR('g'), CHR('i'), CHR('t'),
+ CHR(':'), CHR(']')
+};
+static const chr backs[] = { /* \s */
+ CHR('['), CHR('['), CHR(':'),
+ CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
+ CHR(':'), CHR(']'), CHR(']')
+};
+static const chr backS[] = { /* \S */
+ CHR('['), CHR('^'), CHR('['), CHR(':'),
+ CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
+ CHR(':'), CHR(']'), CHR(']')
+};
+static const chr brbacks[] = { /* \s within brackets */
+ CHR('['), CHR(':'),
+ CHR('s'), CHR('p'), CHR('a'), CHR('c'), CHR('e'),
+ CHR(':'), CHR(']')
+};
+static const chr backw[] = { /* \w */
+ CHR('['), CHR('['), CHR(':'),
+ CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
+ CHR(':'), CHR(']'), CHR('_'), CHR(']')
+};
+static const chr backW[] = { /* \W */
+ CHR('['), CHR('^'), CHR('['), CHR(':'),
+ CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
+ CHR(':'), CHR(']'), CHR('_'), CHR(']')
+};
+static const chr brbackw[] = { /* \w within brackets */
+ CHR('['), CHR(':'),
+ CHR('a'), CHR('l'), CHR('n'), CHR('u'), CHR('m'),
+ CHR(':'), CHR(']'), CHR('_')
+};
+
+/*
+ - lexword - interpolate a bracket expression for word characters
+ * Possibly ought to inquire whether there is a "word" character class.
+ ^ static void lexword(struct vars *);
+ */
+static void
+lexword(
+ struct vars *v)
+{
+ lexnest(v, backw, ENDOF(backw));
+}
+
+/*
+ - next - get next token
+ ^ static int next(struct vars *);
+ */
+static int /* 1 normal, 0 failure */
+next(
+ struct vars *v)
+{
+ chr c;
+
+ /*
+ * Errors yield an infinite sequence of failures.
+ */
+
+ if (ISERR()) {
+ return 0; /* the error has set nexttype to EOS */
+ }
+
+ /*
+ * Remember flavor of last token.
+ */
+
+ v->lasttype = v->nexttype;
+
+ /*
+ * REG_BOSONLY
+ */
+
+ if (v->nexttype == EMPTY && (v->cflags&REG_BOSONLY)) {
+ /* at start of a REG_BOSONLY RE */
+ RETV(SBEGIN, 0); /* same as \A */
+ }
+
+ /*
+ * If we're nested and we've hit end, return to outer level.
+ */
+
+ if (v->savenow != NULL && ATEOS()) {
+ v->now = v->savenow;
+ v->stop = v->savestop;
+ v->savenow = v->savestop = NULL;
+ }
+
+ /*
+ * Skip white space etc. if appropriate (not in literal or [])
+ */
+
+ if (v->cflags&REG_EXPANDED) {
+ switch (v->lexcon) {
+ case L_ERE:
+ case L_BRE:
+ case L_EBND:
+ case L_BBND:
+ skip(v);
+ break;
+ }
+ }
+
+ /*
+ * Handle EOS, depending on context.
+ */
+
+ if (ATEOS()) {
+ switch (v->lexcon) {
+ case L_ERE:
+ case L_BRE:
+ case L_Q:
+ RET(EOS);
+ break;
+ case L_EBND:
+ case L_BBND:
+ FAILW(REG_EBRACE);
+ break;
+ case L_BRACK:
+ case L_CEL:
+ case L_ECL:
+ case L_CCL:
+ FAILW(REG_EBRACK);
+ break;
+ }
+ assert(NOTREACHED);
+ }
+
+ /*
+ * Okay, time to actually get a character.
+ */
+
+ c = *v->now++;
+
+ /*
+ * Deal with the easy contexts, punt EREs to code below.
+ */
+
+ switch (v->lexcon) {
+ case L_BRE: /* punt BREs to separate function */
+ return brenext(v, c);
+ break;
+ case L_ERE: /* see below */
+ break;
+ case L_Q: /* literal strings are easy */
+ RETV(PLAIN, c);
+ break;
+ case L_BBND: /* bounds are fairly simple */
+ case L_EBND:
+ switch (c) {
+ case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'):
+ case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'):
+ case CHR('8'): case CHR('9'):
+ RETV(DIGIT, (chr)DIGITVAL(c));
+ break;
+ case CHR(','):
+ RET(',');
+ break;
+ case CHR('}'): /* ERE bound ends with } */
+ if (INCON(L_EBND)) {
+ INTOCON(L_ERE);
+ if ((v->cflags&REG_ADVF) && NEXT1('?')) {
+ v->now++;
+ NOTE(REG_UNONPOSIX);
+ RETV('}', 0);
+ }
+ RETV('}', 1);
+ } else {
+ FAILW(REG_BADBR);
+ }
+ break;
+ case CHR('\\'): /* BRE bound ends with \} */
+ if (INCON(L_BBND) && NEXT1('}')) {
+ v->now++;
+ INTOCON(L_BRE);
+ RET('}');
+ } else {
+ FAILW(REG_BADBR);
+ }
+ break;
+ default:
+ FAILW(REG_BADBR);
+ break;
+ }
+ assert(NOTREACHED);
+ break;
+ case L_BRACK: /* brackets are not too hard */
+ switch (c) {
+ case CHR(']'):
+ if (LASTTYPE('[')) {
+ RETV(PLAIN, c);
+ } else {
+ INTOCON((v->cflags&REG_EXTENDED) ? L_ERE : L_BRE);
+ RET(']');
+ }
+ break;
+ case CHR('\\'):
+ NOTE(REG_UBBS);
+ if (!(v->cflags&REG_ADVF)) {
+ RETV(PLAIN, c);
+ }
+ NOTE(REG_UNONPOSIX);
+ if (ATEOS()) {
+ FAILW(REG_EESCAPE);
+ }
+ (DISCARD)lexescape(v);
+ switch (v->nexttype) { /* not all escapes okay here */
+ case PLAIN:
+ return 1;
+ break;
+ case CCLASS:
+ switch (v->nextvalue) {
+ case 'd':
+ lexnest(v, brbackd, ENDOF(brbackd));
+ break;
+ case 's':
+ lexnest(v, brbacks, ENDOF(brbacks));
+ break;
+ case 'w':
+ lexnest(v, brbackw, ENDOF(brbackw));
+ break;
+ default:
+ FAILW(REG_EESCAPE);
+ break;
+ }
+
+ /*
+ * lexnest() done, back up and try again.
+ */
+
+ v->nexttype = v->lasttype;
+ return next(v);
+ break;
+ }
+
+ /*
+ * Not one of the acceptable escapes.
+ */
+
+ FAILW(REG_EESCAPE);
+ break;
+ case CHR('-'):
+ if (LASTTYPE('[') || NEXT1(']')) {
+ RETV(PLAIN, c);
+ } else {
+ RETV(RANGE, c);
+ }
+ break;
+ case CHR('['):
+ if (ATEOS()) {
+ FAILW(REG_EBRACK);
+ }
+ switch (*v->now++) {
+ case CHR('.'):
+ INTOCON(L_CEL);
+
+ /*
+ * Might or might not be locale-specific.
+ */
+
+ RET(COLLEL);
+ break;
+ case CHR('='):
+ INTOCON(L_ECL);
+ NOTE(REG_ULOCALE);
+ RET(ECLASS);
+ break;
+ case CHR(':'):
+ INTOCON(L_CCL);
+ NOTE(REG_ULOCALE);
+ RET(CCLASS);
+ break;
+ default: /* oops */
+ v->now--;
+ RETV(PLAIN, c);
+ break;
+ }
+ assert(NOTREACHED);
+ break;
+ default:
+ RETV(PLAIN, c);
+ break;
+ }
+ assert(NOTREACHED);
+ break;
+ case L_CEL: /* collating elements are easy */
+ if (c == CHR('.') && NEXT1(']')) {
+ v->now++;
+ INTOCON(L_BRACK);
+ RETV(END, '.');
+ } else {
+ RETV(PLAIN, c);
+ }
+ break;
+ case L_ECL: /* ditto equivalence classes */
+ if (c == CHR('=') && NEXT1(']')) {
+ v->now++;
+ INTOCON(L_BRACK);
+ RETV(END, '=');
+ } else {
+ RETV(PLAIN, c);
+ }
+ break;
+ case L_CCL: /* ditto character classes */
+ if (c == CHR(':') && NEXT1(']')) {
+ v->now++;
+ INTOCON(L_BRACK);
+ RETV(END, ':');
+ } else {
+ RETV(PLAIN, c);
+ }
+ break;
+ default:
+ assert(NOTREACHED);
+ break;
+ }
+
+ /*
+ * That got rid of everything except EREs and AREs.
+ */
+
+ assert(INCON(L_ERE));
+
+ /*
+ * Deal with EREs and AREs, except for backslashes.
+ */
+
+ switch (c) {
+ case CHR('|'):
+ RET('|');
+ break;
+ case CHR('*'):
+ if ((v->cflags&REG_ADVF) && NEXT1('?')) {
+ v->now++;
+ NOTE(REG_UNONPOSIX);
+ RETV('*', 0);
+ }
+ RETV('*', 1);
+ break;
+ case CHR('+'):
+ if ((v->cflags&REG_ADVF) && NEXT1('?')) {
+ v->now++;
+ NOTE(REG_UNONPOSIX);
+ RETV('+', 0);
+ }
+ RETV('+', 1);
+ break;
+ case CHR('?'):
+ if ((v->cflags&REG_ADVF) && NEXT1('?')) {
+ v->now++;
+ NOTE(REG_UNONPOSIX);
+ RETV('?', 0);
+ }
+ RETV('?', 1);
+ break;
+ case CHR('{'): /* bounds start or plain character */
+ if (v->cflags&REG_EXPANDED) {
+ skip(v);
+ }
+ if (ATEOS() || !iscdigit(*v->now)) {
+ NOTE(REG_UBRACES);
+ NOTE(REG_UUNSPEC);
+ RETV(PLAIN, c);
+ } else {
+ NOTE(REG_UBOUNDS);
+ INTOCON(L_EBND);
+ RET('{');
+ }
+ assert(NOTREACHED);
+ break;
+ case CHR('('): /* parenthesis, or advanced extension */
+ if ((v->cflags&REG_ADVF) && NEXT1('?')) {
+ NOTE(REG_UNONPOSIX);
+ v->now++;
+ switch (*v->now++) {
+ case CHR(':'): /* non-capturing paren */
+ RETV('(', 0);
+ break;
+ case CHR('#'): /* comment */
+ while (!ATEOS() && *v->now != CHR(')')) {
+ v->now++;
+ }
+ if (!ATEOS()) {
+ v->now++;
+ }
+ assert(v->nexttype == v->lasttype);
+ return next(v);
+ break;
+ case CHR('='): /* positive lookahead */
+ NOTE(REG_ULOOKAHEAD);
+ RETV(LACON, 1);
+ break;
+ case CHR('!'): /* negative lookahead */
+ NOTE(REG_ULOOKAHEAD);
+ RETV(LACON, 0);
+ break;
+ default:
+ FAILW(REG_BADRPT);
+ break;
+ }
+ assert(NOTREACHED);
+ }
+ if (v->cflags&REG_NOSUB) {
+ RETV('(', 0); /* all parens non-capturing */
+ } else {
+ RETV('(', 1);
+ }
+ break;
+ case CHR(')'):
+ if (LASTTYPE('(')) {
+ NOTE(REG_UUNSPEC);
+ }
+ RETV(')', c);
+ break;
+ case CHR('['): /* easy except for [[:<:]] and [[:>:]] */
+ if (HAVE(6) && *(v->now+0) == CHR('[') &&
+ *(v->now+1) == CHR(':') &&
+ (*(v->now+2) == CHR('<') || *(v->now+2) == CHR('>')) &&
+ *(v->now+3) == CHR(':') &&
+ *(v->now+4) == CHR(']') &&
+ *(v->now+5) == CHR(']')) {
+ c = *(v->now+2);
+ v->now += 6;
+ NOTE(REG_UNONPOSIX);
+ RET((c == CHR('<')) ? '<' : '>');
+ }
+ INTOCON(L_BRACK);
+ if (NEXT1('^')) {
+ v->now++;
+ RETV('[', 0);
+ }
+ RETV('[', 1);
+ break;
+ case CHR('.'):
+ RET('.');
+ break;
+ case CHR('^'):
+ RET('^');
+ break;
+ case CHR('$'):
+ RET('$');
+ break;
+ case CHR('\\'): /* mostly punt backslashes to code below */
+ if (ATEOS()) {
+ FAILW(REG_EESCAPE);
+ }
+ break;
+ default: /* ordinary character */
+ RETV(PLAIN, c);
+ break;
+ }
+
+ /*
+ * ERE/ARE backslash handling; backslash already eaten.
+ */
+
+ assert(!ATEOS());
+ if (!(v->cflags&REG_ADVF)) {/* only AREs have non-trivial escapes */
+ if (iscalnum(*v->now)) {
+ NOTE(REG_UBSALNUM);
+ NOTE(REG_UUNSPEC);
+ }
+ RETV(PLAIN, *v->now++);
+ }
+ (DISCARD)lexescape(v);
+ if (ISERR()) {
+ FAILW(REG_EESCAPE);
+ }
+ if (v->nexttype == CCLASS) {/* fudge at lexical level */
+ switch (v->nextvalue) {
+ case 'd': lexnest(v, backd, ENDOF(backd)); break;
+ case 'D': lexnest(v, backD, ENDOF(backD)); break;
+ case 's': lexnest(v, backs, ENDOF(backs)); break;
+ case 'S': lexnest(v, backS, ENDOF(backS)); break;
+ case 'w': lexnest(v, backw, ENDOF(backw)); break;
+ case 'W': lexnest(v, backW, ENDOF(backW)); break;
+ default:
+ assert(NOTREACHED);
+ FAILW(REG_ASSERT);
+ break;
+ }
+ /* lexnest done, back up and try again */
+ v->nexttype = v->lasttype;
+ return next(v);
+ }
+
+ /*
+ * Otherwise, lexescape has already done the work.
+ */
+
+ return !ISERR();
+}
+
+/*
+ - lexescape - parse an ARE backslash escape (backslash already eaten)
+ * Note slightly nonstandard use of the CCLASS type code.
+ ^ static int lexescape(struct vars *);
+ */
+static int /* not actually used, but convenient for RETV */
+lexescape(
+ struct vars *v)
+{
+ chr c;
+ static chr alert[] = {
+ CHR('a'), CHR('l'), CHR('e'), CHR('r'), CHR('t')
+ };
+ static chr esc[] = {
+ CHR('E'), CHR('S'), CHR('C')
+ };
+ const chr *save;
+
+ assert(v->cflags&REG_ADVF);
+
+ assert(!ATEOS());
+ c = *v->now++;
+ if (!iscalnum(c)) {
+ RETV(PLAIN, c);
+ }
+
+ NOTE(REG_UNONPOSIX);
+ switch (c) {
+ case CHR('a'):
+ RETV(PLAIN, chrnamed(v, alert, ENDOF(alert), CHR('\007')));
+ break;
+ case CHR('A'):
+ RETV(SBEGIN, 0);
+ break;
+ case CHR('b'):
+ RETV(PLAIN, CHR('\b'));
+ break;
+ case CHR('B'):
+ RETV(PLAIN, CHR('\\'));
+ break;
+ case CHR('c'):
+ NOTE(REG_UUNPORT);
+ if (ATEOS()) {
+ FAILW(REG_EESCAPE);
+ }
+ RETV(PLAIN, (chr)(*v->now++ & 037));
+ break;
+ case CHR('d'):
+ NOTE(REG_ULOCALE);
+ RETV(CCLASS, 'd');
+ break;
+ case CHR('D'):
+ NOTE(REG_ULOCALE);
+ RETV(CCLASS, 'D');
+ break;
+ case CHR('e'):
+ NOTE(REG_UUNPORT);
+ RETV(PLAIN, chrnamed(v, esc, ENDOF(esc), CHR('\033')));
+ break;
+ case CHR('f'):
+ RETV(PLAIN, CHR('\f'));
+ break;
+ case CHR('m'):
+ RET('<');
+ break;
+ case CHR('M'):
+ RET('>');
+ break;
+ case CHR('n'):
+ RETV(PLAIN, CHR('\n'));
+ break;
+ case CHR('r'):
+ RETV(PLAIN, CHR('\r'));
+ break;
+ case CHR('s'):
+ NOTE(REG_ULOCALE);
+ RETV(CCLASS, 's');
+ break;
+ case CHR('S'):
+ NOTE(REG_ULOCALE);
+ RETV(CCLASS, 'S');
+ break;
+ case CHR('t'):
+ RETV(PLAIN, CHR('\t'));
+ break;
+ case CHR('u'):
+ c = lexdigits(v, 16, 4, 4);
+ if (ISERR()) {
+ FAILW(REG_EESCAPE);
+ }
+ RETV(PLAIN, c);
+ break;
+ case CHR('U'):
+ c = lexdigits(v, 16, 8, 8);
+ if (ISERR()) {
+ FAILW(REG_EESCAPE);
+ }
+ RETV(PLAIN, c);
+ break;
+ case CHR('v'):
+ RETV(PLAIN, CHR('\v'));
+ break;
+ case CHR('w'):
+ NOTE(REG_ULOCALE);
+ RETV(CCLASS, 'w');
+ break;
+ case CHR('W'):
+ NOTE(REG_ULOCALE);
+ RETV(CCLASS, 'W');
+ break;
+ case CHR('x'):
+ NOTE(REG_UUNPORT);
+ c = lexdigits(v, 16, 1, 255); /* REs >255 long outside spec */
+ if (ISERR()) {
+ FAILW(REG_EESCAPE);
+ }
+ RETV(PLAIN, c);
+ break;
+ case CHR('y'):
+ NOTE(REG_ULOCALE);
+ RETV(WBDRY, 0);
+ break;
+ case CHR('Y'):
+ NOTE(REG_ULOCALE);
+ RETV(NWBDRY, 0);
+ break;
+ case CHR('Z'):
+ RETV(SEND, 0);
+ break;
+ case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):
+ case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):
+ case CHR('9'):
+ save = v->now;
+ v->now--; /* put first digit back */
+ c = lexdigits(v, 10, 1, 255); /* REs >255 long outside spec */
+ if (ISERR()) {
+ FAILW(REG_EESCAPE);
+ }
+
+ /*
+ * Ugly heuristic (first test is "exactly 1 digit?")
+ */
+
+ if (v->now - save == 0 || ((int) c > 0 && (int)c <= v->nsubexp)) {
+ NOTE(REG_UBACKREF);
+ RETV(BACKREF, (chr)c);
+ }
+
+ /*
+ * Oops, doesn't look like it's a backref after all...
+ */
+
+ v->now = save;
+
+ /*
+ * And fall through into octal number.
+ */
+
+ case CHR('0'):
+ NOTE(REG_UUNPORT);
+ v->now--; /* put first digit back */
+ c = lexdigits(v, 8, 1, 3);
+ if (ISERR()) {
+ FAILW(REG_EESCAPE);
+ }
+ RETV(PLAIN, c);
+ break;
+ default:
+ assert(iscalpha(c));
+ FAILW(REG_EESCAPE); /* unknown alphabetic escape */
+ break;
+ }
+ assert(NOTREACHED);
+}
+
+/*
+ - lexdigits - slurp up digits and return chr value
+ ^ static chr lexdigits(struct vars *, int, int, int);
+ */
+static chr /* chr value; errors signalled via ERR */
+lexdigits(
+ struct vars *v,
+ int base,
+ int minlen,
+ int maxlen)
+{
+ uchr n; /* unsigned to avoid overflow misbehavior */
+ int len;
+ chr c;
+ int d;
+ const uchr ub = (uchr) base;
+
+ n = 0;
+ for (len = 0; len < maxlen && !ATEOS(); len++) {
+ c = *v->now++;
+ switch (c) {
+ case CHR('0'): case CHR('1'): case CHR('2'): case CHR('3'):
+ case CHR('4'): case CHR('5'): case CHR('6'): case CHR('7'):
+ case CHR('8'): case CHR('9'):
+ d = DIGITVAL(c);
+ break;
+ case CHR('a'): case CHR('A'): d = 10; break;
+ case CHR('b'): case CHR('B'): d = 11; break;
+ case CHR('c'): case CHR('C'): d = 12; break;
+ case CHR('d'): case CHR('D'): d = 13; break;
+ case CHR('e'): case CHR('E'): d = 14; break;
+ case CHR('f'): case CHR('F'): d = 15; break;
+ default:
+ v->now--; /* oops, not a digit at all */
+ d = -1;
+ break;
+ }
+
+ if (d >= base) { /* not a plausible digit */
+ v->now--;
+ d = -1;
+ }
+ if (d < 0) {
+ break; /* NOTE BREAK OUT */
+ }
+ n = n*ub + (uchr)d;
+ }
+ if (len < minlen) {
+ ERR(REG_EESCAPE);
+ }
+
+ return (chr)n;
+}
+
+/*
+ - brenext - get next BRE token
+ * This is much like EREs except for all the stupid backslashes and the
+ * context-dependency of some things.
+ ^ static int brenext(struct vars *, pchr);
+ */
+static int /* 1 normal, 0 failure */
+brenext(
+ struct vars *v,
+ pchr pc)
+{
+ chr c = (chr)pc;
+
+ switch (c) {
+ case CHR('*'):
+ if (LASTTYPE(EMPTY) || LASTTYPE('(') || LASTTYPE('^')) {
+ RETV(PLAIN, c);
+ }
+ RET('*');
+ break;
+ case CHR('['):
+ if (HAVE(6) && *(v->now+0) == CHR('[') &&
+ *(v->now+1) == CHR(':') &&
+ (*(v->now+2) == CHR('<') || *(v->now+2) == CHR('>')) &&
+ *(v->now+3) == CHR(':') &&
+ *(v->now+4) == CHR(']') &&
+ *(v->now+5) == CHR(']')) {
+ c = *(v->now+2);
+ v->now += 6;
+ NOTE(REG_UNONPOSIX);
+ RET((c == CHR('<')) ? '<' : '>');
+ }
+ INTOCON(L_BRACK);
+ if (NEXT1('^')) {
+ v->now++;
+ RETV('[', 0);
+ }
+ RETV('[', 1);
+ break;
+ case CHR('.'):
+ RET('.');
+ break;
+ case CHR('^'):
+ if (LASTTYPE(EMPTY)) {
+ RET('^');
+ }
+ if (LASTTYPE('(')) {
+ NOTE(REG_UUNSPEC);
+ RET('^');
+ }
+ RETV(PLAIN, c);
+ break;
+ case CHR('$'):
+ if (v->cflags&REG_EXPANDED) {
+ skip(v);
+ }
+ if (ATEOS()) {
+ RET('$');
+ }
+ if (NEXT2('\\', ')')) {
+ NOTE(REG_UUNSPEC);
+ RET('$');
+ }
+ RETV(PLAIN, c);
+ break;
+ case CHR('\\'):
+ break; /* see below */
+ default:
+ RETV(PLAIN, c);
+ break;
+ }
+
+ assert(c == CHR('\\'));
+
+ if (ATEOS()) {
+ FAILW(REG_EESCAPE);
+ }
+
+ c = *v->now++;
+ switch (c) {
+ case CHR('{'):
+ INTOCON(L_BBND);
+ NOTE(REG_UBOUNDS);
+ RET('{');
+ break;
+ case CHR('('):
+ RETV('(', 1);
+ break;
+ case CHR(')'):
+ RETV(')', c);
+ break;
+ case CHR('<'):
+ NOTE(REG_UNONPOSIX);
+ RET('<');
+ break;
+ case CHR('>'):
+ NOTE(REG_UNONPOSIX);
+ RET('>');
+ break;
+ case CHR('1'): case CHR('2'): case CHR('3'): case CHR('4'):
+ case CHR('5'): case CHR('6'): case CHR('7'): case CHR('8'):
+ case CHR('9'):
+ NOTE(REG_UBACKREF);
+ RETV(BACKREF, (chr)DIGITVAL(c));
+ break;
+ default:
+ if (iscalnum(c)) {
+ NOTE(REG_UBSALNUM);
+ NOTE(REG_UUNSPEC);
+ }
+ RETV(PLAIN, c);
+ break;
+ }
+
+ assert(NOTREACHED);
+}
+
+/*
+ - skip - skip white space and comments in expanded form
+ ^ static void skip(struct vars *);
+ */
+static void
+skip(
+ struct vars *v)
+{
+ const chr *start = v->now;
+
+ assert(v->cflags&REG_EXPANDED);
+
+ for (;;) {
+ while (!ATEOS() && iscspace(*v->now)) {
+ v->now++;
+ }
+ if (ATEOS() || *v->now != CHR('#')) {
+ break; /* NOTE BREAK OUT */
+ }
+ assert(NEXT1('#'));
+ while (!ATEOS() && *v->now != CHR('\n')) {
+ v->now++;
+ }
+
+ /*
+ * Leave the newline to be picked up by the iscspace loop.
+ */
+ }
+
+ if (v->now != start) {
+ NOTE(REG_UNONPOSIX);
+ }
+}
+
+/*
+ - newline - return the chr for a newline
+ * This helps confine use of CHR to this source file.
+ ^ static chr newline(NOPARMS);
+ */
+static chr
+newline(void)
+{
+ return CHR('\n');
+}
+
+/*
+ - ch - return the chr sequence for regc_locale.c's fake collating element ch
+ * This helps confine use of CHR to this source file. Beware that the caller
+ * knows how long the sequence is.
+ ^ #ifdef REG_DEBUG
+ ^ static const chr *ch(NOPARMS);
+ ^ #endif
+ */
+#ifdef REG_DEBUG
+static const chr *
+ch(void)
+{
+ static chr chstr[] = { CHR('c'), CHR('h'), CHR('\0') };
+
+ return chstr;
+}
+#endif
+
+/*
+ - chrnamed - return the chr known by a given (chr string) name
+ * The code is a bit clumsy, but this routine gets only such specialized
+ * use that it hardly matters.
+ ^ static chr chrnamed(struct vars *, const chr *, const chr *, pchr);
+ */
+static chr
+chrnamed(
+ struct vars *v,
+ const chr *startp, /* start of name */
+ const chr *endp, /* just past end of name */
+ pchr lastresort) /* what to return if name lookup fails */
+{
+ celt c;
+ int errsave;
+ int e;
+ struct cvec *cv;
+
+ errsave = v->err;
+ v->err = 0;
+ c = element(v, startp, endp);
+ e = v->err;
+ v->err = errsave;
+
+ if (e != 0) {
+ return (chr)lastresort;
+ }
+
+ cv = range(v, c, c, 0);
+ if (cv->nchrs == 0) {
+ return (chr)lastresort;
+ }
+ return cv->chrs[0];
+}
+
+/*
+ * Local Variables:
+ * mode: c
+ * c-basic-offset: 4
+ * fill-column: 78
+ * End:
+ */
diff --git a/contrib/hsrex/regc_locale.c b/contrib/hsrex/regc_locale.c
new file mode 100644
index 0000000..a6bc3af
--- /dev/null
+++ b/contrib/hsrex/regc_locale.c
@@ -0,0 +1,1163 @@
+/*
+ * regc_locale.c --
+ *
+ * This file contains the Unicode locale specific regexp routines.
+ * This file is #included by regcomp.c.
+ *
+ * Copyright (c) 1998 by Scriptics Corporation.
+ *
+ * See the file "license.terms" for information on usage and redistribution of
+ * this file, and for a DISCLAIMER OF ALL WARRANTIES.
+ *
+ * RCS: @(#) $Id: regc_locale.c,v 1.20 2007/12/13 15:23:14 dgp Exp $
+ */
+
+/* ASCII character-name table */
+
+static const struct cname {
+ const char *name;
+ const char code;
+} cnames[] = {
+ {"NUL", '\0'},
+ {"SOH", '\001'},
+ {"STX", '\002'},
+ {"ETX", '\003'},
+ {"EOT", '\004'},
+ {"ENQ", '\005'},
+ {"ACK", '\006'},
+ {"BEL", '\007'},
+ {"alert", '\007'},
+ {"BS", '\010'},
+ {"backspace", '\b'},
+ {"HT", '\011'},
+ {"tab", '\t'},
+ {"LF", '\012'},
+ {"newline", '\n'},
+ {"VT", '\013'},
+ {"vertical-tab", '\v'},
+ {"FF", '\014'},
+ {"form-feed", '\f'},
+ {"CR", '\015'},
+ {"carriage-return", '\r'},
+ {"SO", '\016'},
+ {"SI", '\017'},
+ {"DLE", '\020'},
+ {"DC1", '\021'},
+ {"DC2", '\022'},
+ {"DC3", '\023'},
+ {"DC4", '\024'},
+ {"NAK", '\025'},
+ {"SYN", '\026'},
+ {"ETB", '\027'},
+ {"CAN", '\030'},
+ {"EM", '\031'},
+ {"SUB", '\032'},
+ {"ESC", '\033'},
+ {"IS4", '\034'},
+ {"FS", '\034'},
+ {"IS3", '\035'},
+ {"GS", '\035'},
+ {"IS2", '\036'},
+ {"RS", '\036'},
+ {"IS1", '\037'},
+ {"US", '\037'},
+ {"space", ' '},
+ {"exclamation-mark",'!'},
+ {"quotation-mark", '"'},
+ {"number-sign", '#'},
+ {"dollar-sign", '$'},
+ {"percent-sign", '%'},
+ {"ampersand", '&'},
+ {"apostrophe", '\''},
+ {"left-parenthesis",'('},
+ {"right-parenthesis", ')'},
+ {"asterisk", '*'},
+ {"plus-sign", '+'},
+ {"comma", ','},
+ {"hyphen", '-'},
+ {"hyphen-minus", '-'},
+ {"period", '.'},
+ {"full-stop", '.'},
+ {"slash", '/'},
+ {"solidus", '/'},
+ {"zero", '0'},
+ {"one", '1'},
+ {"two", '2'},
+ {"three", '3'},
+ {"four", '4'},
+ {"five", '5'},
+ {"six", '6'},
+ {"seven", '7'},
+ {"eight", '8'},
+ {"nine", '9'},
+ {"colon", ':'},
+ {"semicolon", ';'},
+ {"less-than-sign", '<'},
+ {"equals-sign", '='},
+ {"greater-than-sign", '>'},
+ {"question-mark", '?'},
+ {"commercial-at", '@'},
+ {"left-square-bracket", '['},
+ {"backslash", '\\'},
+ {"reverse-solidus", '\\'},
+ {"right-square-bracket", ']'},
+ {"circumflex", '^'},
+ {"circumflex-accent", '^'},
+ {"underscore", '_'},
+ {"low-line", '_'},
+ {"grave-accent", '`'},
+ {"left-brace", '{'},
+ {"left-curly-bracket", '{'},
+ {"vertical-line", '|'},
+ {"right-brace", '}'},
+ {"right-curly-bracket", '}'},
+ {"tilde", '~'},
+ {"DEL", '\177'},
+ {NULL, 0}
+};
+
+/*
+ * Unicode character-class tables.
+ */
+
+typedef struct crange {
+ chr start;
+ chr end;
+} crange;
+
+#if defined(REGEX_STANDALONE) && ! defined(REGEX_WCHAR)
+
+static const crange alphaRangeTable[] = {
+ {0x41, 0x5a}, {0x61, 0x7a}
+};
+
+#define NUM_ALPHA_RANGE (sizeof(alphaRangeTable)/sizeof(crange))
+
+static const chr alphaCharTable[] = {
+};
+
+#define NUM_ALPHA_CHAR (sizeof(alphaCharTable)/sizeof(chr))
+
+static const crange digitRangeTable[] = {
+ {0x30, 0x39}
+};
+
+#define NUM_DIGIT_RANGE (sizeof(digitRangeTable)/sizeof(crange))
+
+static const crange punctRangeTable[] = {
+ {0x21, 0x23}, {0x25, 0x2a}, {0x2c, 0x2f}, {0x5b, 0x5d},
+};
+
+#define NUM_PUNCT_RANGE (sizeof(punctRangeTable)/sizeof(crange))
+
+static const chr punctCharTable[] = {
+ 0x3a, 0x3b, 0x3f, 0x40, 0x5f, 0x7b, 0x7d
+};
+
+#define NUM_PUNCT_CHAR (sizeof(punctCharTable)/sizeof(chr))
+
+static const crange spaceRangeTable[] = {
+ {0x09, 0x0d}
+};
+
+#define NUM_SPACE_RANGE (sizeof(spaceRangeTable)/sizeof(crange))
+
+static const chr spaceCharTable[] = {
+ 0x20
+};
+
+#define NUM_SPACE_CHAR (sizeof(spaceCharTable)/sizeof(chr))
+
+static const crange lowerRangeTable[] = {
+ {0x61, 0x7a}
+};
+
+#define NUM_LOWER_RANGE (sizeof(lowerRangeTable)/sizeof(crange))
+
+static const chr lowerCharTable[] = {
+};
+
+#define NUM_LOWER_CHAR (sizeof(lowerCharTable)/sizeof(chr))
+
+static const crange upperRangeTable[] = {
+ {0x41, 0x5a}
+};
+
+#define NUM_UPPER_RANGE (sizeof(upperRangeTable)/sizeof(crange))
+
+static const chr upperCharTable[] = {
+};
+
+#define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(chr))
+
+static const crange graphRangeTable[] = {
+ {0x21, 0x7e}
+};
+
+#define NUM_GRAPH_RANGE (sizeof(graphRangeTable)/sizeof(crange))
+
+static const chr graphCharTable[] = {
+};
+
+#define NUM_GRAPH_CHAR (sizeof(graphCharTable)/sizeof(chr))
+
+static const crange printRangeTable[] = {
+ {0x20, 0x7E}
+};
+
+#define NUM_PRINT_RANGE (sizeof(printRangeTable)/sizeof(crange))
+
+static const chr printCharTable[] = {
+};
+
+#define NUM_PRINT_CHAR (sizeof(printCharTable)/sizeof(chr))
+#else
+
+/*
+ * Declarations of Unicode character ranges. This code
+ * is automatically generated by the tools/uniClass.tcl script
+ * and used in generic/regc_locale.c. Do not modify by hand.
+ */
+
+/* Unicode: alphabetic characters */
+
+static const crange alphaRangeTable[] = {
+ {0x0041, 0x005a}, {0x0061, 0x007a}, {0x00c0, 0x00d6}, {0x00d8, 0x00f6},
+ {0x00f8, 0x021f}, {0x0222, 0x0233}, {0x0250, 0x02ad}, {0x02b0, 0x02b8},
+ {0x02bb, 0x02c1}, {0x02e0, 0x02e4}, {0x0388, 0x038a}, {0x038e, 0x03a1},
+ {0x03a3, 0x03ce}, {0x03d0, 0x03d7}, {0x03da, 0x03f5}, {0x0400, 0x0481},
+ {0x048c, 0x04c4}, {0x04d0, 0x04f5}, {0x0531, 0x0556}, {0x0561, 0x0587},
+ {0x05d0, 0x05ea}, {0x05f0, 0x05f2}, {0x0621, 0x063a}, {0x0640, 0x064a},
+ {0x0671, 0x06d3}, {0x06fa, 0x06fc}, {0x0712, 0x072c}, {0x0780, 0x07a5},
+ {0x0905, 0x0939}, {0x0958, 0x0961}, {0x0985, 0x098c}, {0x0993, 0x09a8},
+ {0x09aa, 0x09b0}, {0x09b6, 0x09b9}, {0x09df, 0x09e1}, {0x0a05, 0x0a0a},
+ {0x0a13, 0x0a28}, {0x0a2a, 0x0a30}, {0x0a59, 0x0a5c}, {0x0a72, 0x0a74},
+ {0x0a85, 0x0a8b}, {0x0a8f, 0x0a91}, {0x0a93, 0x0aa8}, {0x0aaa, 0x0ab0},
+ {0x0ab5, 0x0ab9}, {0x0b05, 0x0b0c}, {0x0b13, 0x0b28}, {0x0b2a, 0x0b30},
+ {0x0b36, 0x0b39}, {0x0b5f, 0x0b61}, {0x0b85, 0x0b8a}, {0x0b8e, 0x0b90},
+ {0x0b92, 0x0b95}, {0x0ba8, 0x0baa}, {0x0bae, 0x0bb5}, {0x0bb7, 0x0bb9},
+ {0x0c05, 0x0c0c}, {0x0c0e, 0x0c10}, {0x0c12, 0x0c28}, {0x0c2a, 0x0c33},
+ {0x0c35, 0x0c39}, {0x0c85, 0x0c8c}, {0x0c8e, 0x0c90}, {0x0c92, 0x0ca8},
+ {0x0caa, 0x0cb3}, {0x0cb5, 0x0cb9}, {0x0d05, 0x0d0c}, {0x0d0e, 0x0d10},
+ {0x0d12, 0x0d28}, {0x0d2a, 0x0d39}, {0x0d85, 0x0d96}, {0x0d9a, 0x0db1},
+ {0x0db3, 0x0dbb}, {0x0dc0, 0x0dc6}, {0x0e01, 0x0e30}, {0x0e40, 0x0e46},
+ {0x0e94, 0x0e97}, {0x0e99, 0x0e9f}, {0x0ea1, 0x0ea3}, {0x0ead, 0x0eb0},
+ {0x0ec0, 0x0ec4}, {0x0f40, 0x0f47}, {0x0f49, 0x0f6a}, {0x0f88, 0x0f8b},
+ {0x1000, 0x1021}, {0x1023, 0x1027}, {0x1050, 0x1055}, {0x10a0, 0x10c5},
+ {0x10d0, 0x10f6}, {0x1100, 0x1159}, {0x115f, 0x11a2}, {0x11a8, 0x11f9},
+ {0x1200, 0x1206}, {0x1208, 0x1246}, {0x124a, 0x124d}, {0x1250, 0x1256},
+ {0x125a, 0x125d}, {0x1260, 0x1286}, {0x128a, 0x128d}, {0x1290, 0x12ae},
+ {0x12b2, 0x12b5}, {0x12b8, 0x12be}, {0x12c2, 0x12c5}, {0x12c8, 0x12ce},
+ {0x12d0, 0x12d6}, {0x12d8, 0x12ee}, {0x12f0, 0x130e}, {0x1312, 0x1315},
+ {0x1318, 0x131e}, {0x1320, 0x1346}, {0x1348, 0x135a}, {0x13a0, 0x13f4},
+ {0x1401, 0x166c}, {0x166f, 0x1676}, {0x1681, 0x169a}, {0x16a0, 0x16ea},
+ {0x1780, 0x17b3}, {0x1820, 0x1877}, {0x1880, 0x18a8}, {0x1e00, 0x1e9b},
+ {0x1ea0, 0x1ef9}, {0x1f00, 0x1f15}, {0x1f18, 0x1f1d}, {0x1f20, 0x1f45},
+ {0x1f48, 0x1f4d}, {0x1f50, 0x1f57}, {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4},
+ {0x1fb6, 0x1fbc}, {0x1fc2, 0x1fc4}, {0x1fc6, 0x1fcc}, {0x1fd0, 0x1fd3},
+ {0x1fd6, 0x1fdb}, {0x1fe0, 0x1fec}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffc},
+ {0x210a, 0x2113}, {0x2119, 0x211d}, {0x212a, 0x212d}, {0x212f, 0x2131},
+ {0x2133, 0x2139}, {0x3031, 0x3035}, {0x3041, 0x3094}, {0x30a1, 0x30fa},
+ {0x30fc, 0x30fe}, {0x3105, 0x312c}, {0x3131, 0x318e}, {0x31a0, 0x31b7},
+ {0x3400, 0x4db5}, {0x4e00, 0x9fa5}, {0xa000, 0xa48c}, {0xac00, 0xd7a3},
+ {0xf900, 0xfa2d}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1f, 0xfb28},
+ {0xfb2a, 0xfb36}, {0xfb38, 0xfb3c}, {0xfb46, 0xfbb1}, {0xfbd3, 0xfd3d},
+ {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfb}, {0xfe70, 0xfe72},
+ {0xfe76, 0xfefc}, {0xff21, 0xff3a}, {0xff41, 0xff5a}, {0xff66, 0xffbe},
+ {0xffc2, 0xffc7}, {0xffca, 0xffcf}, {0xffd2, 0xffd7}, {0xffda, 0xffdc}
+};
+
+#define NUM_ALPHA_RANGE (sizeof(alphaRangeTable)/sizeof(crange))
+
+static const chr alphaCharTable[] = {
+ 0x00aa, 0x00b5, 0x00ba, 0x02d0, 0x02d1, 0x02ee, 0x037a, 0x0386, 0x038c,
+ 0x04c7, 0x04c8, 0x04cb, 0x04cc, 0x04f8, 0x04f9, 0x0559, 0x06d5, 0x06e5,
+ 0x06e6, 0x0710, 0x093d, 0x0950, 0x098f, 0x0990, 0x09b2, 0x09dc, 0x09dd,
+ 0x09f0, 0x09f1, 0x0a0f, 0x0a10, 0x0a32, 0x0a33, 0x0a35, 0x0a36, 0x0a38,
+ 0x0a39, 0x0a5e, 0x0a8d, 0x0ab2, 0x0ab3, 0x0abd, 0x0ad0, 0x0ae0, 0x0b0f,
+ 0x0b10, 0x0b32, 0x0b33, 0x0b3d, 0x0b5c, 0x0b5d, 0x0b99, 0x0b9a, 0x0b9c,
+ 0x0b9e, 0x0b9f, 0x0ba3, 0x0ba4, 0x0c60, 0x0c61, 0x0cde, 0x0ce0, 0x0ce1,
+ 0x0d60, 0x0d61, 0x0dbd, 0x0e32, 0x0e33, 0x0e81, 0x0e82, 0x0e84, 0x0e87,
+ 0x0e88, 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0eaa, 0x0eab, 0x0eb2, 0x0eb3,
+ 0x0ebd, 0x0ec6, 0x0edc, 0x0edd, 0x0f00, 0x1029, 0x102a, 0x1248, 0x1258,
+ 0x1288, 0x12b0, 0x12c0, 0x1310, 0x1f59, 0x1f5b, 0x1f5d, 0x1fbe, 0x207f,
+ 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x3005, 0x3006, 0x309d,
+ 0x309e, 0xfb1d, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfe74, 0xfffe
+};
+
+#define NUM_ALPHA_CHAR (sizeof(alphaCharTable)/sizeof(chr))
+
+/*
+ * Unicode: decimal digit characters
+ */
+
+static const crange digitRangeTable[] = {
+ {0x0030, 0x0039}, {0x0660, 0x0669}, {0x06f0, 0x06f9}, {0x0966, 0x096f},
+ {0x09e6, 0x09ef}, {0x0a66, 0x0a6f}, {0x0ae6, 0x0aef}, {0x0b66, 0x0b6f},
+ {0x0be7, 0x0bef}, {0x0c66, 0x0c6f}, {0x0ce6, 0x0cef}, {0x0d66, 0x0d6f},
+ {0x0e50, 0x0e59}, {0x0ed0, 0x0ed9}, {0x0f20, 0x0f29}, {0x1040, 0x1049},
+ {0x1369, 0x1371}, {0x17e0, 0x17e9}, {0x1810, 0x1819}, {0xff10, 0xff19}
+};
+
+#define NUM_DIGIT_RANGE (sizeof(digitRangeTable)/sizeof(crange))
+
+/*
+ * no singletons of digit characters.
+ */
+
+/*
+ * Unicode: punctuation characters.
+ */
+
+static const crange punctRangeTable[] = {
+ {0x0021, 0x0023}, {0x0025, 0x002a}, {0x002c, 0x002f}, {0x005b, 0x005d},
+ {0x055a, 0x055f}, {0x066a, 0x066d}, {0x0700, 0x070d}, {0x0f04, 0x0f12},
+ {0x0f3a, 0x0f3d}, {0x104a, 0x104f}, {0x1361, 0x1368}, {0x16eb, 0x16ed},
+ {0x17d4, 0x17da}, {0x1800, 0x180a}, {0x2010, 0x2027}, {0x2030, 0x2043},
+ {0x2048, 0x204d}, {0x3001, 0x3003}, {0x3008, 0x3011}, {0x3014, 0x301f},
+ {0xfe30, 0xfe44}, {0xfe49, 0xfe52}, {0xfe54, 0xfe61}, {0xff01, 0xff03},
+ {0xff05, 0xff0a}, {0xff0c, 0xff0f}, {0xff3b, 0xff3d}, {0xff61, 0xff65}
+};
+
+#define NUM_PUNCT_RANGE (sizeof(punctRangeTable)/sizeof(crange))
+
+static const chr punctCharTable[] = {
+ 0x003a, 0x003b, 0x003f, 0x0040, 0x005f, 0x007b, 0x007d, 0x00a1, 0x00ab,
+ 0x00ad, 0x00b7, 0x00bb, 0x00bf, 0x037e, 0x0387, 0x0589, 0x058a, 0x05be,
+ 0x05c0, 0x05c3, 0x05f3, 0x05f4, 0x060c, 0x061b, 0x061f, 0x06d4, 0x0964,
+ 0x0965, 0x0970, 0x0df4, 0x0e4f, 0x0e5a, 0x0e5b, 0x0f85, 0x10fb, 0x166d,
+ 0x166e, 0x169b, 0x169c, 0x17dc, 0x2045, 0x2046, 0x207d, 0x207e, 0x208d,
+ 0x208e, 0x2329, 0x232a, 0x3030, 0x30fb, 0xfd3e, 0xfd3f, 0xfe63, 0xfe68,
+ 0xfe6a, 0xfe6b, 0xff1a, 0xff1b, 0xff1f, 0xff20, 0xff3f, 0xff5b, 0xff5d
+};
+
+#define NUM_PUNCT_CHAR (sizeof(punctCharTable)/sizeof(chr))
+
+/*
+ * Unicode: white space characters.
+ */
+
+static const crange spaceRangeTable[] = {
+ {0x0009, 0x000d}, {0x2000, 0x200b}
+};
+
+#define NUM_SPACE_RANGE (sizeof(spaceRangeTable)/sizeof(crange))
+
+static const chr spaceCharTable[] = {
+ 0x0020, 0x00a0, 0x1680, 0x2028, 0x2029, 0x202f, 0x3000
+};
+
+#define NUM_SPACE_CHAR (sizeof(spaceCharTable)/sizeof(chr))
+
+/*
+ * Unicode: lowercase characters
+ */
+
+static const crange lowerRangeTable[] = {
+ {0x0061, 0x007a}, {0x00df, 0x00f6}, {0x00f8, 0x00ff}, {0x017e, 0x0180},
+ {0x0199, 0x019b}, {0x01bd, 0x01bf}, {0x0250, 0x02ad}, {0x03ac, 0x03ce},
+ {0x03d5, 0x03d7}, {0x03ef, 0x03f3}, {0x0430, 0x045f}, {0x0561, 0x0587},
+ {0x1e95, 0x1e9b}, {0x1f00, 0x1f07}, {0x1f10, 0x1f15}, {0x1f20, 0x1f27},
+ {0x1f30, 0x1f37}, {0x1f40, 0x1f45}, {0x1f50, 0x1f57}, {0x1f60, 0x1f67},
+ {0x1f70, 0x1f7d}, {0x1f80, 0x1f87}, {0x1f90, 0x1f97}, {0x1fa0, 0x1fa7},
+ {0x1fb0, 0x1fb4}, {0x1fc2, 0x1fc4}, {0x1fd0, 0x1fd3}, {0x1fe0, 0x1fe7},
+ {0x1ff2, 0x1ff4}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xff41, 0xff5a}
+};
+
+#define NUM_LOWER_RANGE (sizeof(lowerRangeTable)/sizeof(crange))
+
+static const chr lowerCharTable[] = {
+ 0x00aa, 0x00b5, 0x00ba, 0x0101, 0x0103, 0x0105, 0x0107, 0x0109, 0x010b,
+ 0x010d, 0x010f, 0x0111, 0x0113, 0x0115, 0x0117, 0x0119, 0x011b, 0x011d,
+ 0x011f, 0x0121, 0x0123, 0x0125, 0x0127, 0x0129, 0x012b, 0x012d, 0x012f,
+ 0x0131, 0x0133, 0x0135, 0x0137, 0x0138, 0x013a, 0x013c, 0x013e, 0x0140,
+ 0x0142, 0x0144, 0x0146, 0x0148, 0x0149, 0x014b, 0x014d, 0x014f, 0x0151,
+ 0x0153, 0x0155, 0x0157, 0x0159, 0x015b, 0x015d, 0x015f, 0x0161, 0x0163,
+ 0x0165, 0x0167, 0x0169, 0x016b, 0x016d, 0x016f, 0x0171, 0x0173, 0x0175,
+ 0x0177, 0x017a, 0x017c, 0x0183, 0x0185, 0x0188, 0x018c, 0x018d, 0x0192,
+ 0x0195, 0x019e, 0x01a1, 0x01a3, 0x01a5, 0x01a8, 0x01aa, 0x01ab, 0x01ad,
+ 0x01b0, 0x01b4, 0x01b6, 0x01b9, 0x01ba, 0x01c6, 0x01c9, 0x01cc, 0x01ce,
+ 0x01d0, 0x01d2, 0x01d4, 0x01d6, 0x01d8, 0x01da, 0x01dc, 0x01dd, 0x01df,
+ 0x01e1, 0x01e3, 0x01e5, 0x01e7, 0x01e9, 0x01eb, 0x01ed, 0x01ef, 0x01f0,
+ 0x01f3, 0x01f5, 0x01f9, 0x01fb, 0x01fd, 0x01ff, 0x0201, 0x0203, 0x0205,
+ 0x0207, 0x0209, 0x020b, 0x020d, 0x020f, 0x0211, 0x0213, 0x0215, 0x0217,
+ 0x0219, 0x021b, 0x021d, 0x021f, 0x0223, 0x0225, 0x0227, 0x0229, 0x022b,
+ 0x022d, 0x022f, 0x0231, 0x0233, 0x0390, 0x03d0, 0x03d1, 0x03db, 0x03dd,
+ 0x03df, 0x03e1, 0x03e3, 0x03e5, 0x03e7, 0x03e9, 0x03eb, 0x03ed, 0x03f5,
+ 0x0461, 0x0463, 0x0465, 0x0467, 0x0469, 0x046b, 0x046d, 0x046f, 0x0471,
+ 0x0473, 0x0475, 0x0477, 0x0479, 0x047b, 0x047d, 0x047f, 0x0481, 0x048d,
+ 0x048f, 0x0491, 0x0493, 0x0495, 0x0497, 0x0499, 0x049b, 0x049d, 0x049f,
+ 0x04a1, 0x04a3, 0x04a5, 0x04a7, 0x04a9, 0x04ab, 0x04ad, 0x04af, 0x04b1,
+ 0x04b3, 0x04b5, 0x04b7, 0x04b9, 0x04bb, 0x04bd, 0x04bf, 0x04c2, 0x04c4,
+ 0x04c8, 0x04cc, 0x04d1, 0x04d3, 0x04d5, 0x04d7, 0x04d9, 0x04db, 0x04dd,
+ 0x04df, 0x04e1, 0x04e3, 0x04e5, 0x04e7, 0x04e9, 0x04eb, 0x04ed, 0x04ef,
+ 0x04f1, 0x04f3, 0x04f5, 0x04f9, 0x1e01, 0x1e03, 0x1e05, 0x1e07, 0x1e09,
+ 0x1e0b, 0x1e0d, 0x1e0f, 0x1e11, 0x1e13, 0x1e15, 0x1e17, 0x1e19, 0x1e1b,
+ 0x1e1d, 0x1e1f, 0x1e21, 0x1e23, 0x1e25, 0x1e27, 0x1e29, 0x1e2b, 0x1e2d,
+ 0x1e2f, 0x1e31, 0x1e33, 0x1e35, 0x1e37, 0x1e39, 0x1e3b, 0x1e3d, 0x1e3f,
+ 0x1e41, 0x1e43, 0x1e45, 0x1e47, 0x1e49, 0x1e4b, 0x1e4d, 0x1e4f, 0x1e51,
+ 0x1e53, 0x1e55, 0x1e57, 0x1e59, 0x1e5b, 0x1e5d, 0x1e5f, 0x1e61, 0x1e63,
+ 0x1e65, 0x1e67, 0x1e69, 0x1e6b, 0x1e6d, 0x1e6f, 0x1e71, 0x1e73, 0x1e75,
+ 0x1e77, 0x1e79, 0x1e7b, 0x1e7d, 0x1e7f, 0x1e81, 0x1e83, 0x1e85, 0x1e87,
+ 0x1e89, 0x1e8b, 0x1e8d, 0x1e8f, 0x1e91, 0x1e93, 0x1ea1, 0x1ea3, 0x1ea5,
+ 0x1ea7, 0x1ea9, 0x1eab, 0x1ead, 0x1eaf, 0x1eb1, 0x1eb3, 0x1eb5, 0x1eb7,
+ 0x1eb9, 0x1ebb, 0x1ebd, 0x1ebf, 0x1ec1, 0x1ec3, 0x1ec5, 0x1ec7, 0x1ec9,
+ 0x1ecb, 0x1ecd, 0x1ecf, 0x1ed1, 0x1ed3, 0x1ed5, 0x1ed7, 0x1ed9, 0x1edb,
+ 0x1edd, 0x1edf, 0x1ee1, 0x1ee3, 0x1ee5, 0x1ee7, 0x1ee9, 0x1eeb, 0x1eed,
+ 0x1eef, 0x1ef1, 0x1ef3, 0x1ef5, 0x1ef7, 0x1ef9, 0x1fb6, 0x1fb7, 0x1fbe,
+ 0x1fc6, 0x1fc7, 0x1fd6, 0x1fd7, 0x1ff6, 0x1ff7, 0x207f, 0x210a, 0x210e,
+ 0x210f, 0x2113, 0x212f, 0x2134, 0x2139
+};
+
+#define NUM_LOWER_CHAR (sizeof(lowerCharTable)/sizeof(chr))
+
+/*
+ * Unicode: uppercase characters.
+ */
+
+static const crange upperRangeTable[] = {
+ {0x0041, 0x005a}, {0x00c0, 0x00d6}, {0x00d8, 0x00de}, {0x0189, 0x018b},
+ {0x018e, 0x0191}, {0x0196, 0x0198}, {0x01b1, 0x01b3}, {0x01f6, 0x01f8},
+ {0x0388, 0x038a}, {0x0391, 0x03a1}, {0x03a3, 0x03ab}, {0x03d2, 0x03d4},
+ {0x0400, 0x042f}, {0x0531, 0x0556}, {0x10a0, 0x10c5}, {0x1f08, 0x1f0f},
+ {0x1f18, 0x1f1d}, {0x1f28, 0x1f2f}, {0x1f38, 0x1f3f}, {0x1f48, 0x1f4d},
+ {0x1f68, 0x1f6f}, {0x1fb8, 0x1fbb}, {0x1fc8, 0x1fcb}, {0x1fd8, 0x1fdb},
+ {0x1fe8, 0x1fec}, {0x1ff8, 0x1ffb}, {0x210b, 0x210d}, {0x2110, 0x2112},
+ {0x2119, 0x211d}, {0x212a, 0x212d}, {0xff21, 0xff3a}
+};
+
+#define NUM_UPPER_RANGE (sizeof(upperRangeTable)/sizeof(crange))
+
+static const chr upperCharTable[] = {
+ 0x0100, 0x0102, 0x0104, 0x0106, 0x0108, 0x010a, 0x010c, 0x010e, 0x0110,
+ 0x0112, 0x0114, 0x0116, 0x0118, 0x011a, 0x011c, 0x011e, 0x0120, 0x0122,
+ 0x0124, 0x0126, 0x0128, 0x012a, 0x012c, 0x012e, 0x0130, 0x0132, 0x0134,
+ 0x0136, 0x0139, 0x013b, 0x013d, 0x013f, 0x0141, 0x0143, 0x0145, 0x0147,
+ 0x014a, 0x014c, 0x014e, 0x0150, 0x0152, 0x0154, 0x0156, 0x0158, 0x015a,
+ 0x015c, 0x015e, 0x0160, 0x0162, 0x0164, 0x0166, 0x0168, 0x016a, 0x016c,
+ 0x016e, 0x0170, 0x0172, 0x0174, 0x0176, 0x0178, 0x0179, 0x017b, 0x017d,
+ 0x0181, 0x0182, 0x0184, 0x0186, 0x0187, 0x0193, 0x0194, 0x019c, 0x019d,
+ 0x019f, 0x01a0, 0x01a2, 0x01a4, 0x01a6, 0x01a7, 0x01a9, 0x01ac, 0x01ae,
+ 0x01af, 0x01b5, 0x01b7, 0x01b8, 0x01bc, 0x01c4, 0x01c7, 0x01ca, 0x01cd,
+ 0x01cf, 0x01d1, 0x01d3, 0x01d5, 0x01d7, 0x01d9, 0x01db, 0x01de, 0x01e0,
+ 0x01e2, 0x01e4, 0x01e6, 0x01e8, 0x01ea, 0x01ec, 0x01ee, 0x01f1, 0x01f4,
+ 0x01fa, 0x01fc, 0x01fe, 0x0200, 0x0202, 0x0204, 0x0206, 0x0208, 0x020a,
+ 0x020c, 0x020e, 0x0210, 0x0212, 0x0214, 0x0216, 0x0218, 0x021a, 0x021c,
+ 0x021e, 0x0222, 0x0224, 0x0226, 0x0228, 0x022a, 0x022c, 0x022e, 0x0230,
+ 0x0232, 0x0386, 0x038c, 0x038e, 0x038f, 0x03da, 0x03dc, 0x03de, 0x03e0,
+ 0x03e2, 0x03e4, 0x03e6, 0x03e8, 0x03ea, 0x03ec, 0x03ee, 0x03f4, 0x0460,
+ 0x0462, 0x0464, 0x0466, 0x0468, 0x046a, 0x046c, 0x046e, 0x0470, 0x0472,
+ 0x0474, 0x0476, 0x0478, 0x047a, 0x047c, 0x047e, 0x0480, 0x048c, 0x048e,
+ 0x0490, 0x0492, 0x0494, 0x0496, 0x0498, 0x049a, 0x049c, 0x049e, 0x04a0,
+ 0x04a2, 0x04a4, 0x04a6, 0x04a8, 0x04aa, 0x04ac, 0x04ae, 0x04b0, 0x04b2,
+ 0x04b4, 0x04b6, 0x04b8, 0x04ba, 0x04bc, 0x04be, 0x04c0, 0x04c1, 0x04c3,
+ 0x04c7, 0x04cb, 0x04d0, 0x04d2, 0x04d4, 0x04d6, 0x04d8, 0x04da, 0x04dc,
+ 0x04de, 0x04e0, 0x04e2, 0x04e4, 0x04e6, 0x04e8, 0x04ea, 0x04ec, 0x04ee,
+ 0x04f0, 0x04f2, 0x04f4, 0x04f8, 0x1e00, 0x1e02, 0x1e04, 0x1e06, 0x1e08,
+ 0x1e0a, 0x1e0c, 0x1e0e, 0x1e10, 0x1e12, 0x1e14, 0x1e16, 0x1e18, 0x1e1a,
+ 0x1e1c, 0x1e1e, 0x1e20, 0x1e22, 0x1e24, 0x1e26, 0x1e28, 0x1e2a, 0x1e2c,
+ 0x1e2e, 0x1e30, 0x1e32, 0x1e34, 0x1e36, 0x1e38, 0x1e3a, 0x1e3c, 0x1e3e,
+ 0x1e40, 0x1e42, 0x1e44, 0x1e46, 0x1e48, 0x1e4a, 0x1e4c, 0x1e4e, 0x1e50,
+ 0x1e52, 0x1e54, 0x1e56, 0x1e58, 0x1e5a, 0x1e5c, 0x1e5e, 0x1e60, 0x1e62,
+ 0x1e64, 0x1e66, 0x1e68, 0x1e6a, 0x1e6c, 0x1e6e, 0x1e70, 0x1e72, 0x1e74,
+ 0x1e76, 0x1e78, 0x1e7a, 0x1e7c, 0x1e7e, 0x1e80, 0x1e82, 0x1e84, 0x1e86,
+ 0x1e88, 0x1e8a, 0x1e8c, 0x1e8e, 0x1e90, 0x1e92, 0x1e94, 0x1ea0, 0x1ea2,
+ 0x1ea4, 0x1ea6, 0x1ea8, 0x1eaa, 0x1eac, 0x1eae, 0x1eb0, 0x1eb2, 0x1eb4,
+ 0x1eb6, 0x1eb8, 0x1eba, 0x1ebc, 0x1ebe, 0x1ec0, 0x1ec2, 0x1ec4, 0x1ec6,
+ 0x1ec8, 0x1eca, 0x1ecc, 0x1ece, 0x1ed0, 0x1ed2, 0x1ed4, 0x1ed6, 0x1ed8,
+ 0x1eda, 0x1edc, 0x1ede, 0x1ee0, 0x1ee2, 0x1ee4, 0x1ee6, 0x1ee8, 0x1eea,
+ 0x1eec, 0x1eee, 0x1ef0, 0x1ef2, 0x1ef4, 0x1ef6, 0x1ef8, 0x1f59, 0x1f5b,
+ 0x1f5d, 0x1f5f, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x2130,
+ 0x2131, 0x2133
+};
+
+#define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(chr))
+
+/*
+ * Unicode: unicode print characters excluding space.
+ */
+
+static const crange graphRangeTable[] = {
+ {0x0021, 0x007e}, {0x00a0, 0x011f}, {0x0121, 0x021f}, {0x0222, 0x0233},
+ {0x0250, 0x02ad}, {0x02b0, 0x02ee}, {0x0300, 0x031f}, {0x0321, 0x034e},
+ {0x0360, 0x0362}, {0x0384, 0x038a}, {0x038e, 0x03a1}, {0x03a3, 0x03ce},
+ {0x03d0, 0x03d7}, {0x03da, 0x03f5}, {0x0400, 0x041f}, {0x0421, 0x0486},
+ {0x048c, 0x04c4}, {0x04d0, 0x04f5}, {0x0531, 0x0556}, {0x0559, 0x055f},
+ {0x0561, 0x0587}, {0x0591, 0x05a1}, {0x05a3, 0x05b9}, {0x05bb, 0x05c4},
+ {0x05d0, 0x05ea}, {0x05f0, 0x05f4}, {0x0621, 0x063a}, {0x0640, 0x0655},
+ {0x0660, 0x066d}, {0x0670, 0x06ed}, {0x06f0, 0x06fe}, {0x0700, 0x070d},
+ {0x0710, 0x071f}, {0x0721, 0x072c}, {0x0730, 0x074a}, {0x0780, 0x07b0},
+ {0x0901, 0x0903}, {0x0905, 0x091f}, {0x0921, 0x0939}, {0x093c, 0x094d},
+ {0x0950, 0x0954}, {0x0958, 0x0970}, {0x0981, 0x0983}, {0x0985, 0x098c},
+ {0x0993, 0x09a8}, {0x09aa, 0x09b0}, {0x09b6, 0x09b9}, {0x09be, 0x09c4},
+ {0x09cb, 0x09cd}, {0x09df, 0x09e3}, {0x09e6, 0x09fa}, {0x0a05, 0x0a0a},
+ {0x0a13, 0x0a1f}, {0x0a21, 0x0a28}, {0x0a2a, 0x0a30}, {0x0a3e, 0x0a42},
+ {0x0a4b, 0x0a4d}, {0x0a59, 0x0a5c}, {0x0a66, 0x0a74}, {0x0a81, 0x0a83},
+ {0x0a85, 0x0a8b}, {0x0a8f, 0x0a91}, {0x0a93, 0x0aa8}, {0x0aaa, 0x0ab0},
+ {0x0ab5, 0x0ab9}, {0x0abc, 0x0ac5}, {0x0ac7, 0x0ac9}, {0x0acb, 0x0acd},
+ {0x0ae6, 0x0aef}, {0x0b01, 0x0b03}, {0x0b05, 0x0b0c}, {0x0b13, 0x0b1f},
+ {0x0b21, 0x0b28}, {0x0b2a, 0x0b30}, {0x0b36, 0x0b39}, {0x0b3c, 0x0b43},
+ {0x0b4b, 0x0b4d}, {0x0b5f, 0x0b61}, {0x0b66, 0x0b70}, {0x0b85, 0x0b8a},
+ {0x0b8e, 0x0b90}, {0x0b92, 0x0b95}, {0x0ba8, 0x0baa}, {0x0bae, 0x0bb5},
+ {0x0bb7, 0x0bb9}, {0x0bbe, 0x0bc2}, {0x0bc6, 0x0bc8}, {0x0bca, 0x0bcd},
+ {0x0be7, 0x0bf2}, {0x0c01, 0x0c03}, {0x0c05, 0x0c0c}, {0x0c0e, 0x0c10},
+ {0x0c12, 0x0c1f}, {0x0c21, 0x0c28}, {0x0c2a, 0x0c33}, {0x0c35, 0x0c39},
+ {0x0c3e, 0x0c44}, {0x0c46, 0x0c48}, {0x0c4a, 0x0c4d}, {0x0c66, 0x0c6f},
+ {0x0c85, 0x0c8c}, {0x0c8e, 0x0c90}, {0x0c92, 0x0ca8}, {0x0caa, 0x0cb3},
+ {0x0cb5, 0x0cb9}, {0x0cbe, 0x0cc4}, {0x0cc6, 0x0cc8}, {0x0cca, 0x0ccd},
+ {0x0ce6, 0x0cef}, {0x0d05, 0x0d0c}, {0x0d0e, 0x0d10}, {0x0d12, 0x0d1f},
+ {0x0d21, 0x0d28}, {0x0d2a, 0x0d39}, {0x0d3e, 0x0d43}, {0x0d46, 0x0d48},
+ {0x0d4a, 0x0d4d}, {0x0d66, 0x0d6f}, {0x0d85, 0x0d96}, {0x0d9a, 0x0db1},
+ {0x0db3, 0x0dbb}, {0x0dc0, 0x0dc6}, {0x0dcf, 0x0dd4}, {0x0dd8, 0x0ddf},
+ {0x0df2, 0x0df4}, {0x0e01, 0x0e1f}, {0x0e21, 0x0e3a}, {0x0e3f, 0x0e5b},
+ {0x0e94, 0x0e97}, {0x0e99, 0x0e9f}, {0x0ea1, 0x0ea3}, {0x0ead, 0x0eb9},
+ {0x0ebb, 0x0ebd}, {0x0ec0, 0x0ec4}, {0x0ec8, 0x0ecd}, {0x0ed0, 0x0ed9},
+ {0x0f00, 0x0f1f}, {0x0f21, 0x0f47}, {0x0f49, 0x0f6a}, {0x0f71, 0x0f8b},
+ {0x0f90, 0x0f97}, {0x0f99, 0x0fbc}, {0x0fbe, 0x0fcc}, {0x1000, 0x101f},
+ {0x1023, 0x1027}, {0x102c, 0x1032}, {0x1036, 0x1039}, {0x1040, 0x1059},
+ {0x10a0, 0x10c5}, {0x10d0, 0x10f6}, {0x1100, 0x111f}, {0x1121, 0x1159},
+ {0x115f, 0x11a2}, {0x11a8, 0x11f9}, {0x1200, 0x1206}, {0x1208, 0x121f},
+ {0x1221, 0x1246}, {0x124a, 0x124d}, {0x1250, 0x1256}, {0x125a, 0x125d},
+ {0x1260, 0x1286}, {0x128a, 0x128d}, {0x1290, 0x12ae}, {0x12b2, 0x12b5},
+ {0x12b8, 0x12be}, {0x12c2, 0x12c5}, {0x12c8, 0x12ce}, {0x12d0, 0x12d6},
+ {0x12d8, 0x12ee}, {0x12f0, 0x130e}, {0x1312, 0x1315}, {0x1318, 0x131e},
+ {0x1321, 0x1346}, {0x1348, 0x135a}, {0x1361, 0x137c}, {0x13a0, 0x13f4},
+ {0x1401, 0x141f}, {0x1421, 0x151f}, {0x1521, 0x161f}, {0x1621, 0x1676},
+ {0x1680, 0x169c}, {0x16a0, 0x16f0}, {0x1780, 0x17dc}, {0x17e0, 0x17e9},
+ {0x1800, 0x180a}, {0x1810, 0x1819}, {0x1821, 0x1877}, {0x1880, 0x18a9},
+ {0x1e00, 0x1e1f}, {0x1e21, 0x1e9b}, {0x1ea0, 0x1ef9}, {0x1f00, 0x1f15},
+ {0x1f18, 0x1f1d}, {0x1f21, 0x1f45}, {0x1f48, 0x1f4d}, {0x1f50, 0x1f57},
+ {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, {0x1fb6, 0x1fc4}, {0x1fc6, 0x1fd3},
+ {0x1fd6, 0x1fdb}, {0x1fdd, 0x1fef}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffe},
+ {0x2000, 0x200b}, {0x2010, 0x201f}, {0x2021, 0x2029}, {0x202f, 0x2046},
+ {0x2048, 0x204d}, {0x2074, 0x208e}, {0x20a0, 0x20af}, {0x20d0, 0x20e3},
+ {0x2100, 0x211f}, {0x2121, 0x213a}, {0x2153, 0x2183}, {0x2190, 0x21f3},
+ {0x2200, 0x221f}, {0x2221, 0x22f1}, {0x2300, 0x231f}, {0x2321, 0x237b},
+ {0x237d, 0x239a}, {0x2400, 0x241f}, {0x2421, 0x2426}, {0x2440, 0x244a},
+ {0x2460, 0x24ea}, {0x2500, 0x251f}, {0x2521, 0x2595}, {0x25a0, 0x25f7},
+ {0x2600, 0x2613}, {0x2619, 0x261f}, {0x2621, 0x2671}, {0x2701, 0x2704},
+ {0x2706, 0x2709}, {0x270c, 0x271f}, {0x2721, 0x2727}, {0x2729, 0x274b},
+ {0x274f, 0x2752}, {0x2758, 0x275e}, {0x2761, 0x2767}, {0x2776, 0x2794},
+ {0x2798, 0x27af}, {0x27b1, 0x27be}, {0x2800, 0x281f}, {0x2821, 0x28ff},
+ {0x2e80, 0x2e99}, {0x2e9b, 0x2ef3}, {0x2f00, 0x2f1f}, {0x2f21, 0x2fd5},
+ {0x2ff0, 0x2ffb}, {0x3000, 0x301f}, {0x3021, 0x303a}, {0x3041, 0x3094},
+ {0x3099, 0x309e}, {0x30a1, 0x30fe}, {0x3105, 0x311f}, {0x3121, 0x312c},
+ {0x3131, 0x318e}, {0x3190, 0x31b7}, {0x3200, 0x321c}, {0x3221, 0x3243},
+ {0x3260, 0x327b}, {0x327f, 0x32b0}, {0x32c0, 0x32cb}, {0x32d0, 0x32fe},
+ {0x3300, 0x331f}, {0x3321, 0x3376}, {0x337b, 0x33dd}, {0x33e0, 0x33fe},
+ {0x3400, 0x341f}, {0x3421, 0x351f}, {0x3521, 0x361f}, {0x3621, 0x371f},
+ {0x3721, 0x381f}, {0x3821, 0x391f}, {0x3921, 0x3a1f}, {0x3a21, 0x3b1f},
+ {0x3b21, 0x3c1f}, {0x3c21, 0x3d1f}, {0x3d21, 0x3e1f}, {0x3e21, 0x3f1f},
+ {0x3f21, 0x401f}, {0x4021, 0x411f}, {0x4121, 0x421f}, {0x4221, 0x431f},
+ {0x4321, 0x441f}, {0x4421, 0x451f}, {0x4521, 0x461f}, {0x4621, 0x471f},
+ {0x4721, 0x481f}, {0x4821, 0x491f}, {0x4921, 0x4a1f}, {0x4a21, 0x4b1f},
+ {0x4b21, 0x4c1f}, {0x4c21, 0x4d1f}, {0x4d21, 0x4db5}, {0x4e00, 0x4e1f},
+ {0x4e21, 0x4f1f}, {0x4f21, 0x501f}, {0x5021, 0x511f}, {0x5121, 0x521f},
+ {0x5221, 0x531f}, {0x5321, 0x541f}, {0x5421, 0x551f}, {0x5521, 0x561f},
+ {0x5621, 0x571f}, {0x5721, 0x581f}, {0x5821, 0x591f}, {0x5921, 0x5a1f},
+ {0x5a21, 0x5b1f}, {0x5b21, 0x5c1f}, {0x5c21, 0x5d1f}, {0x5d21, 0x5e1f},
+ {0x5e21, 0x5f1f}, {0x5f21, 0x601f}, {0x6021, 0x611f}, {0x6121, 0x621f},
+ {0x6221, 0x631f}, {0x6321, 0x641f}, {0x6421, 0x651f}, {0x6521, 0x661f},
+ {0x6621, 0x671f}, {0x6721, 0x681f}, {0x6821, 0x691f}, {0x6921, 0x6a1f},
+ {0x6a21, 0x6b1f}, {0x6b21, 0x6c1f}, {0x6c21, 0x6d1f}, {0x6d21, 0x6e1f},
+ {0x6e21, 0x6f1f}, {0x6f21, 0x701f}, {0x7021, 0x711f}, {0x7121, 0x721f},
+ {0x7221, 0x731f}, {0x7321, 0x741f}, {0x7421, 0x751f}, {0x7521, 0x761f},
+ {0x7621, 0x771f}, {0x7721, 0x781f}, {0x7821, 0x791f}, {0x7921, 0x7a1f},
+ {0x7a21, 0x7b1f}, {0x7b21, 0x7c1f}, {0x7c21, 0x7d1f}, {0x7d21, 0x7e1f},
+ {0x7e21, 0x7f1f}, {0x7f21, 0x801f}, {0x8021, 0x811f}, {0x8121, 0x821f},
+ {0x8221, 0x831f}, {0x8321, 0x841f}, {0x8421, 0x851f}, {0x8521, 0x861f},
+ {0x8621, 0x871f}, {0x8721, 0x881f}, {0x8821, 0x891f}, {0x8921, 0x8a1f},
+ {0x8a21, 0x8b1f}, {0x8b21, 0x8c1f}, {0x8c21, 0x8d1f}, {0x8d21, 0x8e1f},
+ {0x8e21, 0x8f1f}, {0x8f21, 0x901f}, {0x9021, 0x911f}, {0x9121, 0x921f},
+ {0x9221, 0x931f}, {0x9321, 0x941f}, {0x9421, 0x951f}, {0x9521, 0x961f},
+ {0x9621, 0x971f}, {0x9721, 0x981f}, {0x9821, 0x991f}, {0x9921, 0x9a1f},
+ {0x9a21, 0x9b1f}, {0x9b21, 0x9c1f}, {0x9c21, 0x9d1f}, {0x9d21, 0x9e1f},
+ {0x9e21, 0x9f1f}, {0x9f21, 0x9fa5}, {0xa000, 0xa01f}, {0xa021, 0xa11f},
+ {0xa121, 0xa21f}, {0xa221, 0xa31f}, {0xa321, 0xa41f}, {0xa421, 0xa48c},
+ {0xa490, 0xa4a1}, {0xa4a4, 0xa4b3}, {0xa4b5, 0xa4c0}, {0xa4c2, 0xa4c4},
+ {0xac00, 0xac1f}, {0xac21, 0xad1f}, {0xad21, 0xae1f}, {0xae21, 0xaf1f},
+ {0xaf21, 0xb01f}, {0xb021, 0xb11f}, {0xb121, 0xb21f}, {0xb221, 0xb31f},
+ {0xb321, 0xb41f}, {0xb421, 0xb51f}, {0xb521, 0xb61f}, {0xb621, 0xb71f},
+ {0xb721, 0xb81f}, {0xb821, 0xb91f}, {0xb921, 0xba1f}, {0xba21, 0xbb1f},
+ {0xbb21, 0xbc1f}, {0xbc21, 0xbd1f}, {0xbd21, 0xbe1f}, {0xbe21, 0xbf1f},
+ {0xbf21, 0xc01f}, {0xc021, 0xc11f}, {0xc121, 0xc21f}, {0xc221, 0xc31f},
+ {0xc321, 0xc41f}, {0xc421, 0xc51f}, {0xc521, 0xc61f}, {0xc621, 0xc71f},
+ {0xc721, 0xc81f}, {0xc821, 0xc91f}, {0xc921, 0xca1f}, {0xca21, 0xcb1f},
+ {0xcb21, 0xcc1f}, {0xcc21, 0xcd1f}, {0xcd21, 0xce1f}, {0xce21, 0xcf1f},
+ {0xcf21, 0xd01f}, {0xd021, 0xd11f}, {0xd121, 0xd21f}, {0xd221, 0xd31f},
+ {0xd321, 0xd41f}, {0xd421, 0xd51f}, {0xd521, 0xd61f}, {0xd621, 0xd71f},
+ {0xd721, 0xd7a3}, {0xf900, 0xf91f}, {0xf921, 0xfa1f}, {0xfa21, 0xfa2d},
+ {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1d, 0xfb1f}, {0xfb21, 0xfb36},
+ {0xfb38, 0xfb3c}, {0xfb46, 0xfbb1}, {0xfbd3, 0xfc1f}, {0xfc21, 0xfd1f},
+ {0xfd21, 0xfd3f}, {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfb},
+ {0xfe21, 0xfe23}, {0xfe30, 0xfe44}, {0xfe49, 0xfe52}, {0xfe54, 0xfe66},
+ {0xfe68, 0xfe6b}, {0xfe70, 0xfe72}, {0xfe76, 0xfefc}, {0xff01, 0xff1f},
+ {0xff21, 0xff5e}, {0xff61, 0xffbe}, {0xffc2, 0xffc7}, {0xffca, 0xffcf},
+ {0xffd2, 0xffd7}, {0xffda, 0xffdc}, {0xffe0, 0xffe6}, {0xffe8, 0xffee},
+ {0xfffc, 0xffff}
+};
+
+#define NUM_GRAPH_RANGE (sizeof(graphRangeTable)/sizeof(crange))
+
+static const chr graphCharTable[] = {
+ 0x0374, 0x0375, 0x037a, 0x037e, 0x038c, 0x0488, 0x0489, 0x04c7, 0x04c8,
+ 0x04cb, 0x04cc, 0x04f8, 0x04f9, 0x0589, 0x058a, 0x060c, 0x061b, 0x061f,
+ 0x098f, 0x0990, 0x09b2, 0x09bc, 0x09c7, 0x09c8, 0x09d7, 0x09dc, 0x09dd,
+ 0x0a02, 0x0a0f, 0x0a10, 0x0a32, 0x0a33, 0x0a35, 0x0a36, 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a47, 0x0a48, 0x0a5e, 0x0a8d, 0x0ab2, 0x0ab3, 0x0ad0, 0x0ae0,
+ 0x0b0f, 0x0b10, 0x0b32, 0x0b33, 0x0b47, 0x0b48, 0x0b56, 0x0b57, 0x0b5c,
+ 0x0b5d, 0x0b82, 0x0b83, 0x0b99, 0x0b9a, 0x0b9c, 0x0b9e, 0x0b9f, 0x0ba3,
+ 0x0ba4, 0x0bd7, 0x0c55, 0x0c56, 0x0c60, 0x0c61, 0x0c82, 0x0c83, 0x0cd5,
+ 0x0cd6, 0x0cde, 0x0ce0, 0x0ce1, 0x0d02, 0x0d03, 0x0d57, 0x0d60, 0x0d61,
+ 0x0d82, 0x0d83, 0x0dbd, 0x0dca, 0x0dd6, 0x0e81, 0x0e82, 0x0e84, 0x0e87,
+ 0x0e88, 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0eaa, 0x0eab, 0x0ec6, 0x0edc,
+ 0x0edd, 0x0fcf, 0x1021, 0x1029, 0x102a, 0x10fb, 0x1248, 0x1258, 0x1288,
+ 0x12b0, 0x12c0, 0x1310, 0x1f59, 0x1f5b, 0x1f5d, 0x2070, 0x274d, 0x2756,
+ 0x303e, 0x303f, 0xa4c6, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfe74
+};
+
+#define NUM_GRAPH_CHAR (sizeof(graphCharTable)/sizeof(chr))
+
+/*
+ * Unicode: unicode print characters including space, i.e. all Letters (class
+ * L*), Numbers (N*), Punctuation (P*), Symbols (S*) and Spaces (Zs).
+ */
+
+static const crange printRangeTable[] = {
+ {0x0020, 0x007E}, {0x00A0, 0x01F5}, {0x01FA, 0x0217}, {0x0250, 0x02A8},
+ {0x02B0, 0x02DE}, {0x02E0, 0x02E9}, {0x0374, 0x0375}, {0x0384, 0x038A},
+ {0x038E, 0x03A1}, {0x03A3, 0x03CE}, {0x03D0, 0x03D6}, {0x03E2, 0x03F3},
+ {0x0401, 0x040C}, {0x040E, 0x044F}, {0x0451, 0x045C}, {0x045E, 0x0482},
+ {0x0490, 0x04C4}, {0x04C7, 0x04C8}, {0x04CB, 0x04CC}, {0x04D0, 0x04EB},
+ {0x04EE, 0x04F5}, {0x04F8, 0x04F9}, {0x0531, 0x0556}, {0x0559, 0x055F},
+ {0x0561, 0x0587}, {0x05D0, 0x05EA}, {0x05F0, 0x05F4}, {0x0621, 0x063A},
+ {0x0640, 0x064A}, {0x0660, 0x066D}, {0x0671, 0x06B7}, {0x06BA, 0x06BE},
+ {0x06C0, 0x06CE}, {0x06D0, 0x06D5}, {0x06E5, 0x06E6}, {0x06F0, 0x06F9},
+ {0x0905, 0x0939}, {0x0958, 0x0961}, {0x0964, 0x0970}, {0x0985, 0x098C},
+ {0x098F, 0x0990}, {0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B6, 0x09B9},
+ {0x09DC, 0x09DD}, {0x09DF, 0x09E1}, {0x09E6, 0x09FA}, {0x0A05, 0x0A0A},
+ {0x0A0F, 0x0A10}, {0x0A13, 0x0A28}, {0x0A2A, 0x0A30}, {0x0A32, 0x0A33},
+ {0x0A35, 0x0A36}, {0x0A38, 0x0A39}, {0x0A59, 0x0A5C}, {0x0A66, 0x0A6F},
+ {0x0A72, 0x0A74}, {0x0A85, 0x0A8B}, {0x0A8F, 0x0A91}, {0x0A93, 0x0AA8},
+ {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3}, {0x0AB5, 0x0AB9}, {0x0AE6, 0x0AEF},
+ {0x0B05, 0x0B0C}, {0x0B0F, 0x0B10}, {0x0B13, 0x0B28}, {0x0B2A, 0x0B30},
+ {0x0B32, 0x0B33}, {0x0B36, 0x0B39}, {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B61},
+ {0x0B66, 0x0B70}, {0x0B85, 0x0B8A}, {0x0B8E, 0x0B90}, {0x0B92, 0x0B95},
+ {0x0B99, 0x0B9A}, {0x0B9E, 0x0B9F}, {0x0BA3, 0x0BA4}, {0x0BA8, 0x0BAA},
+ {0x0BAE, 0x0BB5}, {0x0BB7, 0x0BB9}, {0x0BE7, 0x0BF2}, {0x0C05, 0x0C0C},
+ {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C33}, {0x0C35, 0x0C39},
+ {0x0C60, 0x0C61}, {0x0C66, 0x0C6F}, {0x0C85, 0x0C8C}, {0x0C8E, 0x0C90},
+ {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3}, {0x0CB5, 0x0CB9}, {0x0CE0, 0x0CE1},
+ {0x0CE6, 0x0CEF}, {0x0D05, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D28},
+ {0x0D2A, 0x0D39}, {0x0D60, 0x0D61}, {0x0D66, 0x0D6F}, {0x0E3F, 0x0E46},
+ {0x0E4F, 0x0E5B}, {0x0E99, 0x0E9F}, {0x0EA1, 0x0EA3}, {0x0EAA, 0x0EAB},
+ {0x0EAD, 0x0EB0}, {0x0EB2, 0x0EB3}, {0x0EC0, 0x0EC4}, {0x0ED0, 0x0ED9},
+ {0x0EDC, 0x0EDD}, {0x0F00, 0x0F17}, {0x0F1A, 0x0F34}, {0x0F3A, 0x0F3D},
+ {0x0F40, 0x0F47}, {0x0F49, 0x0F69}, {0x0F88, 0x0F8B}, {0x10A0, 0x10C5},
+ {0x10D0, 0x10F6}, {0x1100, 0x1159}, {0x115F, 0x11A2}, {0x11A8, 0x11F9},
+ {0x1E00, 0x1E9B}, {0x1EA0, 0x1EF9}, {0x1F00, 0x1F15}, {0x1F18, 0x1F1D},
+ {0x1F20, 0x1F45}, {0x1F48, 0x1F4D}, {0x1F50, 0x1F57}, {0x1F5F, 0x1F7D},
+ {0x1F80, 0x1FB4}, {0x1FB6, 0x1FC4}, {0x1FC6, 0x1FD3}, {0x1FD6, 0x1FDB},
+ {0x1FDD, 0x1FEF}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFE}, {0x2000, 0x200B},
+ {0x2010, 0x2027}, {0x2030, 0x2046}, {0x2074, 0x208E}, {0x20A0, 0x20AC},
+ {0x2100, 0x2138}, {0x2153, 0x2182}, {0x2190, 0x21EA}, {0x2200, 0x22F1},
+ {0x2302, 0x237A}, {0x2400, 0x2424}, {0x2440, 0x244A}, {0x2460, 0x24EA},
+ {0x2500, 0x2595}, {0x25A0, 0x25EF}, {0x2600, 0x2613}, {0x261A, 0x266F},
+ {0x2701, 0x2704}, {0x2706, 0x2709}, {0x270C, 0x2727}, {0x2729, 0x274B},
+ {0x274F, 0x2752}, {0x2758, 0x275E}, {0x2761, 0x2767}, {0x2776, 0x2794},
+ {0x2798, 0x27AF}, {0x27B1, 0x27BE}, {0x3000, 0x3029}, {0x3030, 0x3037},
+ {0x3041, 0x3094}, {0x309B, 0x309E}, {0x30A1, 0x30FE}, {0x3105, 0x312C},
+ {0x3131, 0x318E}, {0x3190, 0x319F}, {0x3200, 0x321C}, {0x3220, 0x3243},
+ {0x3260, 0x327B}, {0x327F, 0x32B0}, {0x32C0, 0x32CB}, {0x32D0, 0x32FE},
+ {0x3300, 0x3376}, {0x337B, 0x33DD}, {0x33E0, 0x33FE}, {0x4E00, 0x9FA5},
+ {0xAC00, 0xD7A3}, {0xF900, 0xFA2D}, {0xFB00, 0xFB06}, {0xFB13, 0xFB17},
+ {0xFB1F, 0xFB36}, {0xFB38, 0xFB3C}, {0xFB40, 0xFB41}, {0xFB43, 0xFB44},
+ {0xFB46, 0xFBB1}, {0xFBD3, 0xFD3F}, {0xFD50, 0xFD8F}, {0xFD92, 0xFDC7},
+ {0xFDF0, 0xFDFB}, {0xFE30, 0xFE44}, {0xFE49, 0xFE52}, {0xFE54, 0xFE66},
+ {0xFE68, 0xFE6B}, {0xFE70, 0xFE72}, {0xFE76, 0xFEFC}, {0xFF01, 0xFF5E},
+ {0xFF61, 0xFFBE}, {0xFFC2, 0xFFC7}, {0xFFCA, 0xFFCF}, {0xFFD2, 0xFFD7},
+ {0xFFDA, 0xFFDC}, {0xFFE0, 0xFFE6}, {0xFFE8, 0xFFEE}, {0xFFFC, 0xFFFD}
+};
+
+#define NUM_PRINT_RANGE (sizeof(printRangeTable)/sizeof(crange))
+
+static const chr printCharTable[] = {
+ 0x037A, 0x037E, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0589, 0x05BE,
+ 0x05C0, 0x05C3, 0x060C, 0x061B, 0x061F, 0x06E9, 0x093D, 0x0950, 0x09B2,
+ 0x0A5E, 0x0A8D, 0x0ABD, 0x0AD0, 0x0AE0, 0x0B3D, 0x0B9C, 0x0CDE, 0x0E01,
+ 0x0E32, 0x0E81, 0x0E84, 0x0E87, 0x0E8A, 0x0E8D, 0x0E94, 0x0EA5, 0x0EA7,
+ 0x0EBD, 0x0EC6, 0x0F36, 0x0F38, 0x0F85, 0x10FB, 0x1F59, 0x1F5B, 0x1F5D,
+ 0x2070, 0x2300, 0x274D, 0x2756, 0x303F, 0xFB3E, 0xFE74
+};
+
+#define NUM_PRINT_CHAR (sizeof(printCharTable)/sizeof(chr))
+#endif
+
+/*
+ * End of auto-generated Unicode character ranges declarations.
+ */
+
+#define CH NOCELT
+
+/*
+ - element - map collating-element name to celt
+ ^ static celt element(struct vars *, const chr *, const chr *);
+ */
+static celt
+element(
+ struct vars *v, /* context */
+ const chr *startp, /* points to start of name */
+ const chr *endp) /* points just past end of name */
+{
+ const struct cname *cn;
+ size_t len;
+ Tcl_DString ds;
+ const char *np;
+
+ /*
+ * Generic: one-chr names stand for themselves.
+ */
+
+ assert(startp < endp);
+ len = endp - startp;
+ if (len == 1) {
+ return *startp;
+ }
+
+ NOTE(REG_ULOCALE);
+
+ /*
+ * Search table.
+ */
+
+ Tcl_DStringInit(&ds);
+ np = Tcl_UniCharToUtfDString(startp, (int)len, &ds);
+ for (cn=cnames; cn->name!=NULL; cn++) {
+ if (strlen(cn->name)==len && strncmp(cn->name, np, len)==0) {
+ break; /* NOTE BREAK OUT */
+ }
+ }
+ Tcl_DStringFree(&ds);
+ if (cn->name != NULL) {
+ return CHR(cn->code);
+ }
+
+ /*
+ * Couldn't find it.
+ */
+
+ ERR(REG_ECOLLATE);
+ return 0;
+}
+
+/*
+ - range - supply cvec for a range, including legality check
+ ^ static struct cvec *range(struct vars *, celt, celt, int);
+ */
+static struct cvec *
+range(
+ struct vars *v, /* context */
+ celt a, /* range start */
+ celt b, /* range end, might equal a */
+ int cases) /* case-independent? */
+{
+ int nchrs;
+ struct cvec *cv;
+ celt c, lc, uc, tc;
+
+ if (a != b && !before(a, b)) {
+ ERR(REG_ERANGE);
+ return NULL;
+ }
+
+ if (!cases) { /* easy version */
+ cv = getcvec(v, 0, 1);
+ NOERRN();
+ addrange(cv, a, b);
+ return cv;
+ }
+
+ /*
+ * When case-independent, it's hard to decide when cvec ranges are usable,
+ * so for now at least, we won't try. We allocate enough space for two
+ * case variants plus a little extra for the two title case variants.
+ */
+
+ nchrs = (b - a + 1)*2 + 4;
+
+ cv = getcvec(v, nchrs, 0);
+ NOERRN();
+
+ for (c=a; c<=b; c++) {
+ addchr(cv, c);
+ lc = Tcl_UniCharToLower((chr)c);
+ uc = Tcl_UniCharToUpper((chr)c);
+ tc = Tcl_UniCharToTitle((chr)c);
+ if (c != lc) {
+ addchr(cv, lc);
+ }
+ if (c != uc) {
+ addchr(cv, uc);
+ }
+ if (c != tc && tc != uc) {
+ addchr(cv, tc);
+ }
+ }
+
+ return cv;
+}
+
+/*
+ - before - is celt x before celt y, for purposes of range legality?
+ ^ static int before(celt, celt);
+ */
+static int /* predicate */
+before(
+ celt x, celt y) /* collating elements */
+{
+ if (x < y) {
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ - eclass - supply cvec for an equivalence class
+ * Must include case counterparts on request.
+ ^ static struct cvec *eclass(struct vars *, celt, int);
+ */
+static struct cvec *
+eclass(
+ struct vars *v, /* context */
+ celt c, /* Collating element representing the
+ * equivalence class. */
+ int cases) /* all cases? */
+{
+ struct cvec *cv;
+
+ /*
+ * Crude fake equivalence class for testing.
+ */
+
+ if ((v->cflags&REG_FAKE) && c == 'x') {
+ cv = getcvec(v, 4, 0);
+ addchr(cv, (chr)'x');
+ addchr(cv, (chr)'y');
+ if (cases) {
+ addchr(cv, (chr)'X');
+ addchr(cv, (chr)'Y');
+ }
+ return cv;
+ }
+
+ /*
+ * Otherwise, none.
+ */
+
+ if (cases) {
+ return allcases(v, c);
+ }
+ cv = getcvec(v, 1, 0);
+ assert(cv != NULL);
+ addchr(cv, (chr)c);
+ return cv;
+}
+
+/*
+ - cclass - supply cvec for a character class
+ * Must include case counterparts on request.
+ ^ static struct cvec *cclass(struct vars *, const chr *, const chr *, int);
+ */
+static struct cvec *
+cclass(
+ struct vars *v, /* context */
+ const chr *startp, /* where the name starts */
+ const chr *endp, /* just past the end of the name */
+ int cases) /* case-independent? */
+{
+ size_t len;
+ struct cvec *cv = NULL;
+ Tcl_DString ds;
+ const char *np;
+ const char **namePtr;
+ int i, index;
+
+ /*
+ * The following arrays define the valid character class names.
+ */
+
+ static const char *classNames[] = {
+ "alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph",
+ "lower", "print", "punct", "space", "upper", "xdigit", NULL
+ };
+
+ enum classes {
+ CC_ALNUM, CC_ALPHA, CC_ASCII, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH,
+ CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_XDIGIT
+ };
+
+
+ /*
+ * Extract the class name
+ */
+
+ len = endp - startp;
+ Tcl_DStringInit(&ds);
+ np = Tcl_UniCharToUtfDString(startp, (int)len, &ds);
+
+ /*
+ * Remap lower and upper to alpha if the match is case insensitive.
+ */
+
+ if (cases && len == 5 && (strncmp("lower", np, 5) == 0
+ || strncmp("upper", np, 5) == 0)) {
+ np = "alpha";
+ }
+
+ /*
+ * Map the name to the corresponding enumerated value.
+ */
+
+ index = -1;
+ for (namePtr=classNames,i=0 ; *namePtr!=NULL ; namePtr++,i++) {
+ if ((strlen(*namePtr) == len) && (strncmp(*namePtr, np, len) == 0)) {
+ index = i;
+ break;
+ }
+ }
+ Tcl_DStringFree(&ds);
+ if (index == -1) {
+ ERR(REG_ECTYPE);
+ return NULL;
+ }
+
+ /*
+ * Now compute the character class contents.
+ */
+
+ switch((enum classes) index) {
+ case CC_PRINT:
+ cv = getcvec(v, NUM_PRINT_CHAR, NUM_PRINT_RANGE);
+ if (cv) {
+ for (i=0 ; (size_t)i<NUM_PRINT_CHAR ; i++) {
+ addchr(cv, printCharTable[i]);
+ }
+ for (i=0 ; (size_t)i<NUM_PRINT_RANGE ; i++) {
+ addrange(cv, printRangeTable[i].start,
+ printRangeTable[i].end);
+ }
+ }
+ break;
+ case CC_ALNUM:
+ cv = getcvec(v, NUM_ALPHA_CHAR, NUM_DIGIT_RANGE + NUM_ALPHA_RANGE);
+ if (cv) {
+ for (i=0 ; (size_t)i<NUM_ALPHA_CHAR ; i++) {
+ addchr(cv, alphaCharTable[i]);
+ }
+ for (i=0 ; (size_t)i<NUM_ALPHA_RANGE ; i++) {
+ addrange(cv, alphaRangeTable[i].start,
+ alphaRangeTable[i].end);
+ }
+ for (i=0 ; (size_t)i<NUM_DIGIT_RANGE ; i++) {
+ addrange(cv, digitRangeTable[i].start,
+ digitRangeTable[i].end);
+ }
+ }
+ break;
+ case CC_ALPHA:
+ cv = getcvec(v, NUM_ALPHA_CHAR, NUM_ALPHA_RANGE);
+ if (cv) {
+ for (i=0 ; (size_t)i<NUM_ALPHA_RANGE ; i++) {
+ addrange(cv, alphaRangeTable[i].start,
+ alphaRangeTable[i].end);
+ }
+ for (i=0 ; (size_t)i<NUM_ALPHA_CHAR ; i++) {
+ addchr(cv, alphaCharTable[i]);
+ }
+ }
+ break;
+ case CC_ASCII:
+ cv = getcvec(v, 0, 1);
+ if (cv) {
+ addrange(cv, 0, 0x7f);
+ }
+ break;
+ case CC_BLANK:
+ cv = getcvec(v, 2, 0);
+ addchr(cv, '\t');
+ addchr(cv, ' ');
+ break;
+ case CC_CNTRL:
+ cv = getcvec(v, 0, 2);
+ addrange(cv, 0x0, 0x1f);
+ addrange(cv, 0x7f, 0x9f);
+ break;
+ case CC_DIGIT:
+ cv = getcvec(v, 0, NUM_DIGIT_RANGE);
+ if (cv) {
+ for (i=0 ; (size_t)i<NUM_DIGIT_RANGE ; i++) {
+ addrange(cv, digitRangeTable[i].start,
+ digitRangeTable[i].end);
+ }
+ }
+ break;
+ case CC_PUNCT:
+ cv = getcvec(v, NUM_PUNCT_CHAR, NUM_PUNCT_RANGE);
+ if (cv) {
+ for (i=0 ; (size_t)i<NUM_PUNCT_RANGE ; i++) {
+ addrange(cv, punctRangeTable[i].start,
+ punctRangeTable[i].end);
+ }
+ for (i=0 ; (size_t)i<NUM_PUNCT_CHAR ; i++) {
+ addchr(cv, punctCharTable[i]);
+ }
+ }
+ break;
+ case CC_XDIGIT:
+ /*
+ * This is a 3 instead of (NUM_DIGIT_RANGE+2) because I've no idea how
+ * to define the digits 'a' through 'f' in non-western locales. The
+ * concept is quite possibly non portable, or only used in contextx
+ * where the characters used would be the western ones anyway!
+ * Whatever is actually the case, the number of ranges is fixed (until
+ * someone comes up with a better arrangement!)
+ */
+
+ cv = getcvec(v, 0, 3);
+ if (cv) {
+ addrange(cv, '0', '9');
+ addrange(cv, 'a', 'f');
+ addrange(cv, 'A', 'F');
+ }
+ break;
+ case CC_SPACE:
+ cv = getcvec(v, NUM_SPACE_CHAR, NUM_SPACE_RANGE);
+ if (cv) {
+ for (i=0 ; (size_t)i<NUM_SPACE_RANGE ; i++) {
+ addrange(cv, spaceRangeTable[i].start,
+ spaceRangeTable[i].end);
+ }
+ for (i=0 ; (size_t)i<NUM_SPACE_CHAR ; i++) {
+ addchr(cv, spaceCharTable[i]);
+ }
+ }
+ break;
+ case CC_LOWER:
+ cv = getcvec(v, NUM_LOWER_CHAR, NUM_LOWER_RANGE);
+ if (cv) {
+ for (i=0 ; (size_t)i<NUM_LOWER_RANGE ; i++) {
+ addrange(cv, lowerRangeTable[i].start,
+ lowerRangeTable[i].end);
+ }
+ for (i=0 ; (size_t)i<NUM_LOWER_CHAR ; i++) {
+ addchr(cv, lowerCharTable[i]);
+ }
+ }
+ break;
+ case CC_UPPER:
+ cv = getcvec(v, NUM_UPPER_CHAR, NUM_UPPER_RANGE);
+ if (cv) {
+ for (i=0 ; (size_t)i<NUM_UPPER_RANGE ; i++) {
+ addrange(cv, upperRangeTable[i].start,
+ upperRangeTable[i].end);
+ }
+ for (i=0 ; (size_t)i<NUM_UPPER_CHAR ; i++) {
+ addchr(cv, upperCharTable[i]);
+ }
+ }
+ break;
+ case CC_GRAPH:
+ cv = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE);
+ if (cv) {
+ for (i=0 ; (size_t)i<NUM_GRAPH_RANGE ; i++) {
+ addrange(cv, graphRangeTable[i].start,
+ graphRangeTable[i].end);
+ }
+ for (i=0 ; (size_t)i<NUM_GRAPH_CHAR ; i++) {
+ addchr(cv, graphCharTable[i]);
+ }
+ }
+ break;
+ }
+ if (cv == NULL) {
+ ERR(REG_ESPACE);
+ }
+ return cv;
+}
+
+/*
+ - allcases - supply cvec for all case counterparts of a chr (including itself)
+ * This is a shortcut, preferably an efficient one, for simple characters;
+ * messy cases are done via range().
+ ^ static struct cvec *allcases(struct vars *, pchr);
+ */
+static struct cvec *
+allcases(
+ struct vars *v, /* context */
+ pchr pc) /* character to get case equivs of */
+{
+ struct cvec *cv;
+ chr c = (chr)pc;
+ chr lc, uc, tc;
+
+ lc = Tcl_UniCharToLower((chr)c);
+ uc = Tcl_UniCharToUpper((chr)c);
+ tc = Tcl_UniCharToTitle((chr)c);
+
+ if (tc != uc) {
+ cv = getcvec(v, 3, 0);
+ addchr(cv, tc);
+ } else {
+ cv = getcvec(v, 2, 0);
+ }
+ addchr(cv, lc);
+ if (lc != uc) {
+ addchr(cv, uc);
+ }
+ return cv;
+}
+
+/*
+ - cmp - chr-substring compare
+ * Backrefs need this. It should preferably be efficient.
+ * Note that it does not need to report anything except equal/unequal.
+ * Note also that the length is exact, and the comparison should not
+ * stop at embedded NULs!
+ ^ static int cmp(const chr *, const chr *, size_t);
+ */
+static int /* 0 for equal, nonzero for unequal */
+cmp(
+ const chr *x, const chr *y, /* strings to compare */
+ size_t len) /* exact length of comparison */
+{
+ return memcmp(VS(x), VS(y), len*sizeof(chr));
+}
+
+/*
+ - casecmp - case-independent chr-substring compare
+ * REG_ICASE backrefs need this. It should preferably be efficient.
+ * Note that it does not need to report anything except equal/unequal.
+ * Note also that the length is exact, and the comparison should not
+ * stop at embedded NULs!
+ ^ static int casecmp(const chr *, const chr *, size_t);
+ */
+static int /* 0 for equal, nonzero for unequal */
+casecmp(
+ const chr *x, const chr *y, /* strings to compare */
+ size_t len) /* exact length of comparison */
+{
+ for (; len > 0; len--, x++, y++) {
+ if ((*x!=*y) && (Tcl_UniCharToLower(*x) != Tcl_UniCharToLower(*y))) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Local Variables:
+ * mode: c
+ * c-basic-offset: 4
+ * fill-column: 78
+ * End:
+ */
diff --git a/contrib/hsrex/regc_nfa.c b/contrib/hsrex/regc_nfa.c
new file mode 100644
index 0000000..04d2f46
--- /dev/null
+++ b/contrib/hsrex/regc_nfa.c
@@ -0,0 +1,1873 @@
+/*
+ * NFA utilities.
+ * This file is #included by regcomp.c.
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation of
+ * software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * One or two things that technically ought to be in here are actually in
+ * color.c, thanks to some incestuous relationships in the color chains.
+ */
+
+#define NISERR() VISERR(nfa->v)
+#define NERR(e) VERR(nfa->v, (e))
+
+/*
+ - newnfa - set up an NFA
+ ^ static struct nfa *newnfa(struct vars *, struct colormap *, struct nfa *);
+ */
+static struct nfa * /* the NFA, or NULL */
+newnfa(
+ struct vars *v,
+ struct colormap *cm,
+ struct nfa *parent) /* NULL if primary NFA */
+{
+ struct nfa *nfa;
+
+ nfa = (struct nfa *) MALLOC(sizeof(struct nfa));
+ if (nfa == NULL) {
+ return NULL;
+ }
+
+ nfa->states = NULL;
+ nfa->slast = NULL;
+ nfa->free = NULL;
+ nfa->nstates = 0;
+ nfa->cm = cm;
+ nfa->v = v;
+ nfa->size = 0;
+ nfa->bos[0] = nfa->bos[1] = COLORLESS;
+ nfa->eos[0] = nfa->eos[1] = COLORLESS;
+ nfa->parent = parent; /* Precedes newfstate so parent is valid. */
+ nfa->post = newfstate(nfa, '@'); /* number 0 */
+ nfa->pre = newfstate(nfa, '>'); /* number 1 */
+
+ nfa->init = newstate(nfa); /* May become invalid later. */
+ nfa->final = newstate(nfa);
+ if (ISERR()) {
+ freenfa(nfa);
+ return NULL;
+ }
+ rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->pre, nfa->init);
+ newarc(nfa, '^', 1, nfa->pre, nfa->init);
+ newarc(nfa, '^', 0, nfa->pre, nfa->init);
+ rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->final, nfa->post);
+ newarc(nfa, '$', 1, nfa->final, nfa->post);
+ newarc(nfa, '$', 0, nfa->final, nfa->post);
+
+ if (ISERR()) {
+ freenfa(nfa);
+ return NULL;
+ }
+ return nfa;
+}
+
+/*
+ - TooManyStates - checks if the max states exceeds the compile-time value
+ ^ static int TooManyStates(struct nfa *);
+ */
+static int
+TooManyStates(
+ struct nfa *nfa)
+{
+ struct nfa *parent = nfa->parent;
+ size_t sz = nfa->size;
+
+ while (parent != NULL) {
+ sz = parent->size;
+ parent = parent->parent;
+ }
+ if (sz > REG_MAX_STATES) {
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ - IncrementSize - increases the tracked size of the NFA and its parents.
+ ^ static void IncrementSize(struct nfa *);
+ */
+static void
+IncrementSize(
+ struct nfa *nfa)
+{
+ struct nfa *parent = nfa->parent;
+
+ nfa->size++;
+ while (parent != NULL) {
+ parent->size++;
+ parent = parent->parent;
+ }
+}
+
+/*
+ - DecrementSize - increases the tracked size of the NFA and its parents.
+ ^ static void DecrementSize(struct nfa *);
+ */
+static void
+DecrementSize(
+ struct nfa *nfa)
+{
+ struct nfa *parent = nfa->parent;
+
+ nfa->size--;
+ while (parent != NULL) {
+ parent->size--;
+ parent = parent->parent;
+ }
+}
+
+/*
+ - freenfa - free an entire NFA
+ ^ static void freenfa(struct nfa *);
+ */
+static void
+freenfa(
+ struct nfa *nfa)
+{
+ struct state *s;
+
+ while ((s = nfa->states) != NULL) {
+ s->nins = s->nouts = 0; /* don't worry about arcs */
+ freestate(nfa, s);
+ }
+ while ((s = nfa->free) != NULL) {
+ nfa->free = s->next;
+ destroystate(nfa, s);
+ }
+
+ nfa->slast = NULL;
+ nfa->nstates = -1;
+ nfa->pre = NULL;
+ nfa->post = NULL;
+ FREE(nfa);
+}
+
+/*
+ - newstate - allocate an NFA state, with zero flag value
+ ^ static struct state *newstate(struct nfa *);
+ */
+static struct state * /* NULL on error */
+newstate(
+ struct nfa *nfa)
+{
+ struct state *s;
+
+ if (TooManyStates(nfa)) {
+ /* XXX: add specific error for this */
+ NERR(REG_ETOOBIG);
+ return NULL;
+ }
+ if (nfa->free != NULL) {
+ s = nfa->free;
+ nfa->free = s->next;
+ } else {
+ s = (struct state *) MALLOC(sizeof(struct state));
+ if (s == NULL) {
+ NERR(REG_ESPACE);
+ return NULL;
+ }
+ s->oas.next = NULL;
+ s->free = NULL;
+ s->noas = 0;
+ }
+
+ assert(nfa->nstates >= 0);
+ s->no = nfa->nstates++;
+ s->flag = 0;
+ if (nfa->states == NULL) {
+ nfa->states = s;
+ }
+ s->nins = 0;
+ s->ins = NULL;
+ s->nouts = 0;
+ s->outs = NULL;
+ s->tmp = NULL;
+ s->next = NULL;
+ if (nfa->slast != NULL) {
+ assert(nfa->slast->next == NULL);
+ nfa->slast->next = s;
+ }
+ s->prev = nfa->slast;
+ nfa->slast = s;
+
+ /*
+ * Track the current size and the parent size.
+ */
+
+ IncrementSize(nfa);
+ return s;
+}
+
+/*
+ - newfstate - allocate an NFA state with a specified flag value
+ ^ static struct state *newfstate(struct nfa *, int flag);
+ */
+static struct state * /* NULL on error */
+newfstate(
+ struct nfa *nfa,
+ int flag)
+{
+ struct state *s;
+
+ s = newstate(nfa);
+ if (s != NULL) {
+ s->flag = (char) flag;
+ }
+ return s;
+}
+
+/*
+ - dropstate - delete a state's inarcs and outarcs and free it
+ ^ static void dropstate(struct nfa *, struct state *);
+ */
+static void
+dropstate(
+ struct nfa *nfa,
+ struct state *s)
+{
+ struct arc *a;
+
+ while ((a = s->ins) != NULL) {
+ freearc(nfa, a);
+ }
+ while ((a = s->outs) != NULL) {
+ freearc(nfa, a);
+ }
+ freestate(nfa, s);
+}
+
+/*
+ - freestate - free a state, which has no in-arcs or out-arcs
+ ^ static void freestate(struct nfa *, struct state *);
+ */
+static void
+freestate(
+ struct nfa *nfa,
+ struct state *s)
+{
+ assert(s != NULL);
+ assert(s->nins == 0 && s->nouts == 0);
+
+ s->no = FREESTATE;
+ s->flag = 0;
+ if (s->next != NULL) {
+ s->next->prev = s->prev;
+ } else {
+ assert(s == nfa->slast);
+ nfa->slast = s->prev;
+ }
+ if (s->prev != NULL) {
+ s->prev->next = s->next;
+ } else {
+ assert(s == nfa->states);
+ nfa->states = s->next;
+ }
+ s->prev = NULL;
+ s->next = nfa->free; /* don't delete it, put it on the free list */
+ nfa->free = s;
+ DecrementSize(nfa);
+}
+
+/*
+ - destroystate - really get rid of an already-freed state
+ ^ static void destroystate(struct nfa *, struct state *);
+ */
+static void
+destroystate(
+ struct nfa *nfa,
+ struct state *s)
+{
+ struct arcbatch *ab;
+ struct arcbatch *abnext;
+
+ assert(s->no == FREESTATE);
+ for (ab=s->oas.next ; ab!=NULL ; ab=abnext) {
+ abnext = ab->next;
+ FREE(ab);
+ }
+ s->ins = NULL;
+ s->outs = NULL;
+ s->next = NULL;
+ FREE(s);
+}
+
+/*
+ - newarc - set up a new arc within an NFA
+ ^ static void newarc(struct nfa *, int, pcolor, struct state *,
+ ^ struct state *);
+ */
+static void
+newarc(
+ struct nfa *nfa,
+ int t,
+ pcolor co,
+ struct state *from,
+ struct state *to)
+{
+ struct arc *a;
+
+ assert(from != NULL && to != NULL);
+
+ /*
+ * Check for duplicates.
+ */
+
+ for (a=from->outs ; a!=NULL ; a=a->outchain) {
+ if (a->to == to && a->co == co && a->type == t) {
+ return;
+ }
+ }
+
+ a = allocarc(nfa, from);
+ if (NISERR()) {
+ return;
+ }
+ assert(a != NULL);
+
+ a->type = t;
+ a->co = (color) co;
+ a->to = to;
+ a->from = from;
+
+ /*
+ * Put the new arc on the beginning, not the end, of the chains. Not only
+ * is this easier, it has the very useful side effect that deleting the
+ * most-recently-added arc is the cheapest case rather than the most
+ * expensive one.
+ */
+
+ a->inchain = to->ins;
+ to->ins = a;
+ a->outchain = from->outs;
+ from->outs = a;
+
+ from->nouts++;
+ to->nins++;
+
+ if (COLORED(a) && nfa->parent == NULL) {
+ colorchain(nfa->cm, a);
+ }
+}
+
+/*
+ - allocarc - allocate a new out-arc within a state
+ ^ static struct arc *allocarc(struct nfa *, struct state *);
+ */
+static struct arc * /* NULL for failure */
+allocarc(
+ struct nfa *nfa,
+ struct state *s)
+{
+ struct arc *a;
+
+ /*
+ * Shortcut
+ */
+
+ if (s->free == NULL && s->noas < ABSIZE) {
+ a = &s->oas.a[s->noas];
+ s->noas++;
+ return a;
+ }
+
+ /*
+ * if none at hand, get more
+ */
+
+ if (s->free == NULL) {
+ struct arcbatch *newAb = (struct arcbatch *)
+ MALLOC(sizeof(struct arcbatch));
+ int i;
+
+ if (newAb == NULL) {
+ NERR(REG_ESPACE);
+ return NULL;
+ }
+ newAb->next = s->oas.next;
+ s->oas.next = newAb;
+
+ for (i=0 ; i<ABSIZE ; i++) {
+ newAb->a[i].type = 0;
+ newAb->a[i].freechain = &newAb->a[i+1];
+ }
+ newAb->a[ABSIZE-1].freechain = NULL;
+ s->free = &newAb->a[0];
+ }
+ assert(s->free != NULL);
+
+ a = s->free;
+ s->free = a->freechain;
+ return a;
+}
+
+/*
+ - freearc - free an arc
+ ^ static void freearc(struct nfa *, struct arc *);
+ */
+static void
+freearc(
+ struct nfa *nfa,
+ struct arc *victim)
+{
+ struct state *from = victim->from;
+ struct state *to = victim->to;
+ struct arc *a;
+
+ assert(victim->type != 0);
+
+ /*
+ * Take it off color chain if necessary.
+ */
+
+ if (COLORED(victim) && nfa->parent == NULL) {
+ uncolorchain(nfa->cm, victim);
+ }
+
+ /*
+ * Take it off source's out-chain.
+ */
+
+ assert(from != NULL);
+ assert(from->outs != NULL);
+ a = from->outs;
+ if (a == victim) { /* simple case: first in chain */
+ from->outs = victim->outchain;
+ } else {
+ for (; a!=NULL && a->outchain!=victim ; a=a->outchain) {
+ continue;
+ }
+ assert(a != NULL);
+ a->outchain = victim->outchain;
+ }
+ from->nouts--;
+
+ /*
+ * Take it off target's in-chain.
+ */
+
+ assert(to != NULL);
+ assert(to->ins != NULL);
+ a = to->ins;
+ if (a == victim) { /* simple case: first in chain */
+ to->ins = victim->inchain;
+ } else {
+ for (; a->inchain!=victim ; a=a->inchain) {
+ assert(a->inchain != NULL);
+ continue;
+ }
+ a->inchain = victim->inchain;
+ }
+ to->nins--;
+
+ /*
+ * Clean up and place on free list.
+ */
+
+ victim->type = 0;
+ victim->from = NULL; /* precautions... */
+ victim->to = NULL;
+ victim->inchain = NULL;
+ victim->outchain = NULL;
+ victim->freechain = from->free;
+ from->free = victim;
+}
+
+/*
+ - findarc - find arc, if any, from given source with given type and color
+ * If there is more than one such arc, the result is random.
+ ^ static struct arc *findarc(struct state *, int, pcolor);
+ */
+static struct arc *
+findarc(
+ struct state *s,
+ int type,
+ pcolor co)
+{
+ struct arc *a;
+
+ for (a=s->outs ; a!=NULL ; a=a->outchain) {
+ if (a->type == type && a->co == co) {
+ return a;
+ }
+ }
+ return NULL;
+}
+
+/*
+ - cparc - allocate a new arc within an NFA, copying details from old one
+ ^ static void cparc(struct nfa *, struct arc *, struct state *,
+ ^ struct state *);
+ */
+static void
+cparc(
+ struct nfa *nfa,
+ struct arc *oa,
+ struct state *from,
+ struct state *to)
+{
+ newarc(nfa, oa->type, oa->co, from, to);
+}
+
+/*
+ - moveins - move all in arcs of a state to another state
+ * You might think this could be done better by just updating the
+ * existing arcs, and you would be right if it weren't for the desire
+ * for duplicate suppression, which makes it easier to just make new
+ * ones to exploit the suppression built into newarc.
+ ^ static void moveins(struct nfa *, struct state *, struct state *);
+ */
+static void
+moveins(
+ struct nfa *nfa,
+ struct state *oldState,
+ struct state *newState)
+{
+ struct arc *a;
+
+ assert(oldState != newState);
+
+ while ((a = oldState->ins) != NULL) {
+ cparc(nfa, a, a->from, newState);
+ freearc(nfa, a);
+ }
+ assert(oldState->nins == 0);
+ assert(oldState->ins == NULL);
+}
+
+/*
+ - copyins - copy all in arcs of a state to another state
+ ^ static void copyins(struct nfa *, struct state *, struct state *);
+ */
+static void
+copyins(
+ struct nfa *nfa,
+ struct state *oldState,
+ struct state *newState)
+{
+ struct arc *a;
+
+ assert(oldState != newState);
+
+ for (a=oldState->ins ; a!=NULL ; a=a->inchain) {
+ cparc(nfa, a, a->from, newState);
+ }
+}
+
+/*
+ - moveouts - move all out arcs of a state to another state
+ ^ static void moveouts(struct nfa *, struct state *, struct state *);
+ */
+static void
+moveouts(
+ struct nfa *nfa,
+ struct state *oldState,
+ struct state *newState)
+{
+ struct arc *a;
+
+ assert(oldState != newState);
+
+ while ((a = oldState->outs) != NULL) {
+ cparc(nfa, a, newState, a->to);
+ freearc(nfa, a);
+ }
+}
+
+/*
+ - copyouts - copy all out arcs of a state to another state
+ ^ static void copyouts(struct nfa *, struct state *, struct state *);
+ */
+static void
+copyouts(
+ struct nfa *nfa,
+ struct state *oldState,
+ struct state *newState)
+{
+ struct arc *a;
+
+ assert(oldState != newState);
+
+ for (a=oldState->outs ; a!=NULL ; a=a->outchain) {
+ cparc(nfa, a, newState, a->to);
+ }
+}
+
+/*
+ - cloneouts - copy out arcs of a state to another state pair, modifying type
+ ^ static void cloneouts(struct nfa *, struct state *, struct state *,
+ ^ struct state *, int);
+ */
+static void
+cloneouts(
+ struct nfa *nfa,
+ struct state *old,
+ struct state *from,
+ struct state *to,
+ int type)
+{
+ struct arc *a;
+
+ assert(old != from);
+
+ for (a=old->outs ; a!=NULL ; a=a->outchain) {
+ newarc(nfa, type, a->co, from, to);
+ }
+}
+
+/*
+ - delsub - delete a sub-NFA, updating subre pointers if necessary
+ * This uses a recursive traversal of the sub-NFA, marking already-seen
+ * states using their tmp pointer.
+ ^ static void delsub(struct nfa *, struct state *, struct state *);
+ */
+static void
+delsub(
+ struct nfa *nfa,
+ struct state *lp, /* the sub-NFA goes from here... */
+ struct state *rp) /* ...to here, *not* inclusive */
+{
+ assert(lp != rp);
+
+ rp->tmp = rp; /* mark end */
+
+ deltraverse(nfa, lp, lp);
+ assert(lp->nouts == 0 && rp->nins == 0); /* did the job */
+ assert(lp->no != FREESTATE && rp->no != FREESTATE); /* no more */
+
+ rp->tmp = NULL; /* unmark end */
+ lp->tmp = NULL; /* and begin, marked by deltraverse */
+}
+
+/*
+ - deltraverse - the recursive heart of delsub
+ * This routine's basic job is to destroy all out-arcs of the state.
+ ^ static void deltraverse(struct nfa *, struct state *, struct state *);
+ */
+static void
+deltraverse(
+ struct nfa *nfa,
+ struct state *leftend,
+ struct state *s)
+{
+ struct arc *a;
+ struct state *to;
+
+ if (s->nouts == 0) {
+ return; /* nothing to do */
+ }
+ if (s->tmp != NULL) {
+ return; /* already in progress */
+ }
+
+ s->tmp = s; /* mark as in progress */
+
+ while ((a = s->outs) != NULL) {
+ to = a->to;
+ deltraverse(nfa, leftend, to);
+ assert(to->nouts == 0 || to->tmp != NULL);
+ freearc(nfa, a);
+ if (to->nins == 0 && to->tmp == NULL) {
+ assert(to->nouts == 0);
+ freestate(nfa, to);
+ }
+ }
+
+ assert(s->no != FREESTATE); /* we're still here */
+ assert(s == leftend || s->nins != 0); /* and still reachable */
+ assert(s->nouts == 0); /* but have no outarcs */
+
+ s->tmp = NULL; /* we're done here */
+}
+
+/*
+ - dupnfa - duplicate sub-NFA
+ * Another recursive traversal, this time using tmp to point to duplicates as
+ * well as mark already-seen states. (You knew there was a reason why it's a
+ * state pointer, didn't you? :-))
+ ^ static void dupnfa(struct nfa *, struct state *, struct state *,
+ ^ struct state *, struct state *);
+ */
+static void
+dupnfa(
+ struct nfa *nfa,
+ struct state *start, /* duplicate of subNFA starting here */
+ struct state *stop, /* and stopping here */
+ struct state *from, /* stringing duplicate from here */
+ struct state *to) /* to here */
+{
+ if (start == stop) {
+ newarc(nfa, EMPTY, 0, from, to);
+ return;
+ }
+
+ stop->tmp = to;
+ duptraverse(nfa, start, from, 0);
+ /* done, except for clearing out the tmp pointers */
+
+ stop->tmp = NULL;
+ cleartraverse(nfa, start);
+}
+
+/*
+ - duptraverse - recursive heart of dupnfa
+ ^ static void duptraverse(struct nfa *, struct state *, struct state *);
+ */
+static void
+duptraverse(
+ struct nfa *nfa,
+ struct state *s,
+ struct state *stmp, /* s's duplicate, or NULL */
+ int depth)
+{
+ struct arc *a;
+
+ if (s->tmp != NULL) {
+ return; /* already done */
+ }
+
+ s->tmp = (stmp == NULL) ? newstate(nfa) : stmp;
+ if (s->tmp == NULL) {
+ assert(NISERR());
+ return;
+ }
+
+ /*
+ * Arbitrary depth limit. Needs tuning, but this value is sufficient to
+ * make all normal tests (not reg-33.14) pass.
+ */
+ /* Updated from 500 to 1204 to support REs with 99 group patterns.
+ * Why to limit the tree depth ?
+ * If long REs are not needed then just don't write long REs.
+ */
+#define DUPTRAVERSE_MAX_DEPTH 1204
+
+ if (depth++ > DUPTRAVERSE_MAX_DEPTH) {
+ NERR(REG_ESPACE);
+ }
+
+ for (a=s->outs ; a!=NULL && !NISERR() ; a=a->outchain) {
+ duptraverse(nfa, a->to, NULL, depth);
+ if (NISERR()) {
+ break;
+ }
+ assert(a->to->tmp != NULL);
+ cparc(nfa, a, s->tmp, a->to->tmp);
+ }
+}
+
+/*
+ - cleartraverse - recursive cleanup for algorithms that leave tmp ptrs set
+ ^ static void cleartraverse(struct nfa *, struct state *);
+ */
+static void
+cleartraverse(
+ struct nfa *nfa,
+ struct state *s)
+{
+ struct arc *a;
+
+ if (s->tmp == NULL) {
+ return;
+ }
+ s->tmp = NULL;
+
+ for (a=s->outs ; a!=NULL ; a=a->outchain) {
+ cleartraverse(nfa, a->to);
+ }
+}
+
+/*
+ - specialcolors - fill in special colors for an NFA
+ ^ static void specialcolors(struct nfa *);
+ */
+static void
+specialcolors(
+ struct nfa *nfa)
+{
+ /*
+ * False colors for BOS, BOL, EOS, EOL
+ */
+
+ if (nfa->parent == NULL) {
+ nfa->bos[0] = pseudocolor(nfa->cm);
+ nfa->bos[1] = pseudocolor(nfa->cm);
+ nfa->eos[0] = pseudocolor(nfa->cm);
+ nfa->eos[1] = pseudocolor(nfa->cm);
+ } else {
+ assert(nfa->parent->bos[0] != COLORLESS);
+ nfa->bos[0] = nfa->parent->bos[0];
+ assert(nfa->parent->bos[1] != COLORLESS);
+ nfa->bos[1] = nfa->parent->bos[1];
+ assert(nfa->parent->eos[0] != COLORLESS);
+ nfa->eos[0] = nfa->parent->eos[0];
+ assert(nfa->parent->eos[1] != COLORLESS);
+ nfa->eos[1] = nfa->parent->eos[1];
+ }
+}
+
+/*
+ - optimize - optimize an NFA
+ ^ static long optimize(struct nfa *, FILE *);
+ */
+static long /* re_info bits */
+optimize(
+ struct nfa *nfa,
+ FILE *f) /* for debug output; NULL none */
+{
+ int verbose = (f != NULL) ? 1 : 0;
+
+ if (verbose) {
+ fprintf(f, "\ninitial cleanup:\n");
+ }
+ cleanup(nfa); /* may simplify situation */
+ if (verbose) {
+ dumpnfa(nfa, f);
+ }
+ if (verbose) {
+ fprintf(f, "\nempties:\n");
+ }
+ fixempties(nfa, f); /* get rid of EMPTY arcs */
+ if (verbose) {
+ fprintf(f, "\nconstraints:\n");
+ }
+ pullback(nfa, f); /* pull back constraints backward */
+ pushfwd(nfa, f); /* push fwd constraints forward */
+ if (verbose) {
+ fprintf(f, "\nfinal cleanup:\n");
+ }
+ cleanup(nfa); /* final tidying */
+ return analyze(nfa); /* and analysis */
+}
+
+/*
+ - pullback - pull back constraints backward to (with luck) eliminate them
+ ^ static void pullback(struct nfa *, FILE *);
+ */
+static void
+pullback(
+ struct nfa *nfa,
+ FILE *f) /* for debug output; NULL none */
+{
+ struct state *s;
+ struct state *nexts;
+ struct arc *a;
+ struct arc *nexta;
+ int progress;
+
+ /*
+ * Find and pull until there are no more.
+ */
+
+ do {
+ progress = 0;
+ for (s=nfa->states ; s!=NULL && !NISERR() ; s=nexts) {
+ nexts = s->next;
+ for (a=s->outs ; a!=NULL && !NISERR() ; a=nexta) {
+ nexta = a->outchain;
+ if (a->type == '^' || a->type == BEHIND) {
+ if (pull(nfa, a)) {
+ progress = 1;
+ }
+ }
+ assert(nexta == NULL || s->no != FREESTATE);
+ }
+ }
+ if (progress && f != NULL) {
+ dumpnfa(nfa, f);
+ }
+ } while (progress && !NISERR());
+ if (NISERR()) {
+ return;
+ }
+
+ for (a=nfa->pre->outs ; a!=NULL ; a=nexta) {
+ nexta = a->outchain;
+ if (a->type == '^') {
+ assert(a->co == 0 || a->co == 1);
+ newarc(nfa, PLAIN, nfa->bos[a->co], a->from, a->to);
+ freearc(nfa, a);
+ }
+ }
+}
+
+/*
+ - pull - pull a back constraint backward past its source state
+ * A significant property of this function is that it deletes at most
+ * one state -- the constraint's from state -- and only if the constraint
+ * was that state's last outarc.
+ ^ static int pull(struct nfa *, struct arc *);
+ */
+static int /* 0 couldn't, 1 could */
+pull(
+ struct nfa *nfa,
+ struct arc *con)
+{
+ struct state *from = con->from;
+ struct state *to = con->to;
+ struct arc *a;
+ struct arc *nexta;
+ struct state *s;
+
+ if (from == to) { /* circular constraint is pointless */
+ freearc(nfa, con);
+ return 1;
+ }
+ if (from->flag) { /* can't pull back beyond start */
+ return 0;
+ }
+ if (from->nins == 0) { /* unreachable */
+ freearc(nfa, con);
+ return 1;
+ }
+
+ /*
+ * DGP 2007-11-15: Cloning a state with a circular constraint on its list
+ * of outs can lead to trouble [Bug 1810038], so get rid of them first.
+ */
+
+ for (a = from->outs; a != NULL; a = nexta) {
+ nexta = a->outchain;
+ switch (a->type) {
+ case '^':
+ case '$':
+ case BEHIND:
+ case AHEAD:
+ if (from == a->to) {
+ freearc(nfa, a);
+ }
+ break;
+ }
+ }
+
+ /*
+ * First, clone from state if necessary to avoid other outarcs.
+ */
+
+ if (from->nouts > 1) {
+ s = newstate(nfa);
+ if (NISERR()) {
+ return 0;
+ }
+ assert(to != from); /* con is not an inarc */
+ copyins(nfa, from, s); /* duplicate inarcs */
+ cparc(nfa, con, s, to); /* move constraint arc */
+ freearc(nfa, con);
+ from = s;
+ con = from->outs;
+ }
+ assert(from->nouts == 1);
+
+ /*
+ * Propagate the constraint into the from state's inarcs.
+ */
+
+ for (a=from->ins ; a!=NULL ; a=nexta) {
+ nexta = a->inchain;
+ switch (combine(con, a)) {
+ case INCOMPATIBLE: /* destroy the arc */
+ freearc(nfa, a);
+ break;
+ case SATISFIED: /* no action needed */
+ break;
+ case COMPATIBLE: /* swap the two arcs, more or less */
+ s = newstate(nfa);
+ if (NISERR()) {
+ return 0;
+ }
+ cparc(nfa, a, s, to); /* anticipate move */
+ cparc(nfa, con, a->from, s);
+ if (NISERR()) {
+ return 0;
+ }
+ freearc(nfa, a);
+ break;
+ default:
+ assert(NOTREACHED);
+ break;
+ }
+ }
+
+ /*
+ * Remaining inarcs, if any, incorporate the constraint.
+ */
+
+ moveins(nfa, from, to);
+ dropstate(nfa, from); /* will free the constraint */
+ return 1;
+}
+
+/*
+ - pushfwd - push forward constraints forward to (with luck) eliminate them
+ ^ static void pushfwd(struct nfa *, FILE *);
+ */
+static void
+pushfwd(
+ struct nfa *nfa,
+ FILE *f) /* for debug output; NULL none */
+{
+ struct state *s;
+ struct state *nexts;
+ struct arc *a;
+ struct arc *nexta;
+ int progress;
+
+ /*
+ * Find and push until there are no more.
+ */
+
+ do {
+ progress = 0;
+ for (s=nfa->states ; s!=NULL && !NISERR() ; s=nexts) {
+ nexts = s->next;
+ for (a = s->ins; a != NULL && !NISERR(); a = nexta) {
+ nexta = a->inchain;
+ if (a->type == '$' || a->type == AHEAD) {
+ if (push(nfa, a)) {
+ progress = 1;
+ }
+ }
+ assert(nexta == NULL || s->no != FREESTATE);
+ }
+ }
+ if (progress && f != NULL) {
+ dumpnfa(nfa, f);
+ }
+ } while (progress && !NISERR());
+ if (NISERR()) {
+ return;
+ }
+
+ for (a = nfa->post->ins; a != NULL; a = nexta) {
+ nexta = a->inchain;
+ if (a->type == '$') {
+ assert(a->co == 0 || a->co == 1);
+ newarc(nfa, PLAIN, nfa->eos[a->co], a->from, a->to);
+ freearc(nfa, a);
+ }
+ }
+}
+
+/*
+ - push - push a forward constraint forward past its destination state
+ * A significant property of this function is that it deletes at most
+ * one state -- the constraint's to state -- and only if the constraint
+ * was that state's last inarc.
+ ^ static int push(struct nfa *, struct arc *);
+ */
+static int /* 0 couldn't, 1 could */
+push(
+ struct nfa *nfa,
+ struct arc *con)
+{
+ struct state *from = con->from;
+ struct state *to = con->to;
+ struct arc *a;
+ struct arc *nexta;
+ struct state *s;
+
+ if (to == from) { /* circular constraint is pointless */
+ freearc(nfa, con);
+ return 1;
+ }
+ if (to->flag) { /* can't push forward beyond end */
+ return 0;
+ }
+ if (to->nouts == 0) { /* dead end */
+ freearc(nfa, con);
+ return 1;
+ }
+
+ /*
+ * DGP 2007-11-15: Here we duplicate the same protections as appear
+ * in pull() above to avoid troubles with cloning a state with a
+ * circular constraint on its list of ins. It is not clear whether
+ * this is necessary, or is protecting against a "can't happen".
+ * Any test case that actually leads to a freearc() call here would
+ * be a welcome addition to the test suite.
+ */
+
+ for (a = to->ins; a != NULL; a = nexta) {
+ nexta = a->inchain;
+ switch (a->type) {
+ case '^':
+ case '$':
+ case BEHIND:
+ case AHEAD:
+ if (a->from == to) {
+ freearc(nfa, a);
+ }
+ break;
+ }
+ }
+ /*
+ * First, clone to state if necessary to avoid other inarcs.
+ */
+
+ if (to->nins > 1) {
+ s = newstate(nfa);
+ if (NISERR()) {
+ return 0;
+ }
+ copyouts(nfa, to, s); /* duplicate outarcs */
+ cparc(nfa, con, from, s); /* move constraint */
+ freearc(nfa, con);
+ to = s;
+ con = to->ins;
+ }
+ assert(to->nins == 1);
+
+ /*
+ * Propagate the constraint into the to state's outarcs.
+ */
+
+ for (a = to->outs; a != NULL; a = nexta) {
+ nexta = a->outchain;
+ switch (combine(con, a)) {
+ case INCOMPATIBLE: /* destroy the arc */
+ freearc(nfa, a);
+ break;
+ case SATISFIED: /* no action needed */
+ break;
+ case COMPATIBLE: /* swap the two arcs, more or less */
+ s = newstate(nfa);
+ if (NISERR()) {
+ return 0;
+ }
+ cparc(nfa, con, s, a->to); /* anticipate move */
+ cparc(nfa, a, from, s);
+ if (NISERR()) {
+ return 0;
+ }
+ freearc(nfa, a);
+ break;
+ default:
+ assert(NOTREACHED);
+ break;
+ }
+ }
+
+ /*
+ * Remaining outarcs, if any, incorporate the constraint.
+ */
+
+ moveouts(nfa, to, from);
+ dropstate(nfa, to); /* will free the constraint */
+ return 1;
+}
+
+/*
+ - combine - constraint lands on an arc, what happens?
+ ^ #def INCOMPATIBLE 1 // destroys arc
+ ^ #def SATISFIED 2 // constraint satisfied
+ ^ #def COMPATIBLE 3 // compatible but not satisfied yet
+ ^ static int combine(struct arc *, struct arc *);
+ */
+static int
+combine(
+ struct arc *con,
+ struct arc *a)
+{
+#define CA(ct,at) (((ct)<<CHAR_BIT) | (at))
+
+ switch (CA(con->type, a->type)) {
+ case CA('^', PLAIN): /* newlines are handled separately */
+ case CA('$', PLAIN):
+ return INCOMPATIBLE;
+ break;
+ case CA(AHEAD, PLAIN): /* color constraints meet colors */
+ case CA(BEHIND, PLAIN):
+ if (con->co == a->co) {
+ return SATISFIED;
+ }
+ return INCOMPATIBLE;
+ break;
+ case CA('^', '^'): /* collision, similar constraints */
+ case CA('$', '$'):
+ case CA(AHEAD, AHEAD):
+ case CA(BEHIND, BEHIND):
+ if (con->co == a->co) { /* true duplication */
+ return SATISFIED;
+ }
+ return INCOMPATIBLE;
+ break;
+ case CA('^', BEHIND): /* collision, dissimilar constraints */
+ case CA(BEHIND, '^'):
+ case CA('$', AHEAD):
+ case CA(AHEAD, '$'):
+ return INCOMPATIBLE;
+ break;
+ case CA('^', '$'): /* constraints passing each other */
+ case CA('^', AHEAD):
+ case CA(BEHIND, '$'):
+ case CA(BEHIND, AHEAD):
+ case CA('$', '^'):
+ case CA('$', BEHIND):
+ case CA(AHEAD, '^'):
+ case CA(AHEAD, BEHIND):
+ case CA('^', LACON):
+ case CA(BEHIND, LACON):
+ case CA('$', LACON):
+ case CA(AHEAD, LACON):
+ return COMPATIBLE;
+ break;
+ }
+ assert(NOTREACHED);
+ return INCOMPATIBLE; /* for benefit of blind compilers */
+}
+
+/*
+ - fixempties - get rid of EMPTY arcs
+ ^ static void fixempties(struct nfa *, FILE *);
+ */
+static void
+fixempties(
+ struct nfa *nfa,
+ FILE *f) /* for debug output; NULL none */
+{
+ struct state *s;
+ struct state *nexts;
+ struct arc *a;
+ struct arc *nexta;
+ int progress;
+
+ /*
+ * Find and eliminate empties until there are no more.
+ */
+
+ do {
+ progress = 0;
+ for (s = nfa->states; s != NULL && !NISERR()
+ && s->no != FREESTATE; s = nexts) {
+ nexts = s->next;
+ for (a = s->outs; a != NULL && !NISERR(); a = nexta) {
+ nexta = a->outchain;
+ if (a->type == EMPTY && unempty(nfa, a)) {
+ progress = 1;
+ }
+ assert(nexta == NULL || s->no != FREESTATE);
+ }
+ }
+ if (progress && f != NULL) {
+ dumpnfa(nfa, f);
+ }
+ } while (progress && !NISERR());
+}
+
+/*
+ - unempty - optimize out an EMPTY arc, if possible
+ * Actually, as it stands this function always succeeds, but the return value
+ * is kept with an eye on possible future changes.
+ ^ static int unempty(struct nfa *, struct arc *);
+ */
+static int /* 0 couldn't, 1 could */
+unempty(
+ struct nfa *nfa,
+ struct arc *a)
+{
+ struct state *from = a->from;
+ struct state *to = a->to;
+ int usefrom; /* work on from, as opposed to to? */
+
+ assert(a->type == EMPTY);
+ assert(from != nfa->pre && to != nfa->post);
+
+ if (from == to) { /* vacuous loop */
+ freearc(nfa, a);
+ return 1;
+ }
+
+ /*
+ * Decide which end to work on.
+ */
+
+ usefrom = 1; /* default: attack from */
+ if (from->nouts > to->nins) {
+ usefrom = 0;
+ } else if (from->nouts == to->nins) {
+ /*
+ * Decide on secondary issue: move/copy fewest arcs.
+ */
+
+ if (from->nins > to->nouts) {
+ usefrom = 0;
+ }
+ }
+
+ freearc(nfa, a);
+ if (usefrom) {
+ if (from->nouts == 0) {
+ /*
+ * Was the state's only outarc.
+ */
+
+ moveins(nfa, from, to);
+ freestate(nfa, from);
+ } else {
+ copyins(nfa, from, to);
+ }
+ } else {
+ if (to->nins == 0) {
+ /*
+ * Was the state's only inarc.
+ */
+
+ moveouts(nfa, to, from);
+ freestate(nfa, to);
+ } else {
+ copyouts(nfa, to, from);
+ }
+ }
+
+ return 1;
+}
+
+/*
+ - cleanup - clean up NFA after optimizations
+ ^ static void cleanup(struct nfa *);
+ */
+static void
+cleanup(
+ struct nfa *nfa)
+{
+ struct state *s;
+ struct state *nexts;
+ int n;
+
+ /*
+ * Clear out unreachable or dead-end states. Use pre to mark reachable,
+ * then post to mark can-reach-post.
+ */
+
+ markreachable(nfa, nfa->pre, NULL, nfa->pre);
+ markcanreach(nfa, nfa->post, nfa->pre, nfa->post);
+ for (s = nfa->states; s != NULL; s = nexts) {
+ nexts = s->next;
+ if (s->tmp != nfa->post && !s->flag) {
+ dropstate(nfa, s);
+ }
+ }
+ assert(nfa->post->nins == 0 || nfa->post->tmp == nfa->post);
+ cleartraverse(nfa, nfa->pre);
+ assert(nfa->post->nins == 0 || nfa->post->tmp == NULL);
+ /* the nins==0 (final unreachable) case will be caught later */
+
+ /*
+ * Renumber surviving states.
+ */
+
+ n = 0;
+ for (s = nfa->states; s != NULL; s = s->next) {
+ s->no = n++;
+ }
+ nfa->nstates = n;
+}
+
+/*
+ - markreachable - recursive marking of reachable states
+ ^ static void markreachable(struct nfa *, struct state *, struct state *,
+ ^ struct state *);
+ */
+static void
+markreachable(
+ struct nfa *nfa,
+ struct state *s,
+ struct state *okay, /* consider only states with this mark */
+ struct state *mark) /* the value to mark with */
+{
+ struct arc *a;
+
+ if (s->tmp != okay) {
+ return;
+ }
+ s->tmp = mark;
+
+ for (a = s->outs; a != NULL; a = a->outchain) {
+ markreachable(nfa, a->to, okay, mark);
+ }
+}
+
+/*
+ - markcanreach - recursive marking of states which can reach here
+ ^ static void markcanreach(struct nfa *, struct state *, struct state *,
+ ^ struct state *);
+ */
+static void
+markcanreach(
+ struct nfa *nfa,
+ struct state *s,
+ struct state *okay, /* consider only states with this mark */
+ struct state *mark) /* the value to mark with */
+{
+ struct arc *a;
+
+ if (s->tmp != okay) {
+ return;
+ }
+ s->tmp = mark;
+
+ for (a = s->ins; a != NULL; a = a->inchain) {
+ markcanreach(nfa, a->from, okay, mark);
+ }
+}
+
+/*
+ - analyze - ascertain potentially-useful facts about an optimized NFA
+ ^ static long analyze(struct nfa *);
+ */
+static long /* re_info bits to be ORed in */
+analyze(
+ struct nfa *nfa)
+{
+ struct arc *a;
+ struct arc *aa;
+
+ if (nfa->pre->outs == NULL) {
+ return REG_UIMPOSSIBLE;
+ }
+ for (a = nfa->pre->outs; a != NULL; a = a->outchain) {
+ for (aa = a->to->outs; aa != NULL; aa = aa->outchain) {
+ if (aa->to == nfa->post) {
+ return REG_UEMPTYMATCH;
+ }
+ }
+ }
+ return 0;
+}
+
+/*
+ - compact - compact an NFA
+ ^ static void compact(struct nfa *, struct cnfa *);
+ */
+static void
+compact(
+ struct nfa *nfa,
+ struct cnfa *cnfa)
+{
+ struct state *s;
+ struct arc *a;
+ size_t nstates;
+ size_t narcs;
+ struct carc *ca;
+ struct carc *first;
+
+ assert(!NISERR());
+
+ nstates = 0;
+ narcs = 0;
+ for (s = nfa->states; s != NULL; s = s->next) {
+ nstates++;
+ narcs += 1 + s->nouts + 1;
+ /* 1 as a fake for flags, nouts for arcs, 1 as endmarker */
+ }
+
+ cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *));
+ cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc));
+ if (cnfa->states == NULL || cnfa->arcs == NULL) {
+ if (cnfa->states != NULL) {
+ FREE(cnfa->states);
+ }
+ if (cnfa->arcs != NULL) {
+ FREE(cnfa->arcs);
+ }
+ NERR(REG_ESPACE);
+ return;
+ }
+ cnfa->nstates = nstates;
+ cnfa->pre = nfa->pre->no;
+ cnfa->post = nfa->post->no;
+ cnfa->bos[0] = nfa->bos[0];
+ cnfa->bos[1] = nfa->bos[1];
+ cnfa->eos[0] = nfa->eos[0];
+ cnfa->eos[1] = nfa->eos[1];
+ cnfa->ncolors = maxcolor(nfa->cm) + 1;
+ cnfa->flags = 0;
+
+ ca = cnfa->arcs;
+ for (s = nfa->states; s != NULL; s = s->next) {
+ assert((size_t) s->no < nstates);
+ cnfa->states[s->no] = ca;
+ ca->co = 0; /* clear and skip flags "arc" */
+ ca++;
+ first = ca;
+ for (a = s->outs; a != NULL; a = a->outchain) {
+ switch (a->type) {
+ case PLAIN:
+ ca->co = a->co;
+ ca->to = a->to->no;
+ ca++;
+ break;
+ case LACON:
+ assert(s->no != cnfa->pre);
+ ca->co = (color) (cnfa->ncolors + a->co);
+ ca->to = a->to->no;
+ ca++;
+ cnfa->flags |= HASLACONS;
+ break;
+ default:
+ assert(NOTREACHED);
+ break;
+ }
+ }
+ carcsort(first, ca-1);
+ ca->co = COLORLESS;
+ ca->to = 0;
+ ca++;
+ }
+ assert(ca == &cnfa->arcs[narcs]);
+ assert(cnfa->nstates != 0);
+
+ /*
+ * Mark no-progress states.
+ */
+
+ for (a = nfa->pre->outs; a != NULL; a = a->outchain) {
+ cnfa->states[a->to->no]->co = 1;
+ }
+ cnfa->states[nfa->pre->no]->co = 1;
+}
+
+/*
+ - carcsort - sort compacted-NFA arcs by color
+ * Really dumb algorithm, but if the list is long enough for that to matter,
+ * you're in real trouble anyway.
+ ^ static void carcsort(struct carc *, struct carc *);
+ */
+static void
+carcsort(
+ struct carc *first,
+ struct carc *last)
+{
+ struct carc *p;
+ struct carc *q;
+ struct carc tmp;
+
+ if (last - first <= 1) {
+ return;
+ }
+
+ for (p = first; p <= last; p++) {
+ for (q = p; q <= last; q++) {
+ if (p->co > q->co || (p->co == q->co && p->to > q->to)) {
+ assert(p != q);
+ tmp = *p;
+ *p = *q;
+ *q = tmp;
+ }
+ }
+ }
+}
+
+/*
+ - freecnfa - free a compacted NFA
+ ^ static void freecnfa(struct cnfa *);
+ */
+static void
+freecnfa(
+ struct cnfa *cnfa)
+{
+ assert(cnfa->nstates != 0); /* not empty already */
+ cnfa->nstates = 0;
+ FREE(cnfa->states);
+ FREE(cnfa->arcs);
+}
+
+/*
+ - dumpnfa - dump an NFA in human-readable form
+ ^ static void dumpnfa(struct nfa *, FILE *);
+ */
+static void
+dumpnfa(
+ struct nfa *nfa,
+ FILE *f)
+{
+#ifdef REG_DEBUG
+ struct state *s;
+
+ fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no);
+ if (nfa->bos[0] != COLORLESS) {
+ fprintf(f, ", bos [%ld]", (long) nfa->bos[0]);
+ }
+ if (nfa->bos[1] != COLORLESS) {
+ fprintf(f, ", bol [%ld]", (long) nfa->bos[1]);
+ }
+ if (nfa->eos[0] != COLORLESS) {
+ fprintf(f, ", eos [%ld]", (long) nfa->eos[0]);
+ }
+ if (nfa->eos[1] != COLORLESS) {
+ fprintf(f, ", eol [%ld]", (long) nfa->eos[1]);
+ }
+ fprintf(f, "\n");
+ for (s = nfa->states; s != NULL; s = s->next) {
+ dumpstate(s, f);
+ }
+ if (nfa->parent == NULL) {
+ dumpcolors(nfa->cm, f);
+ }
+ fflush(f);
+#endif
+}
+
+#ifdef REG_DEBUG /* subordinates of dumpnfa */
+/*
+ ^ #ifdef REG_DEBUG
+ */
+
+/*
+ - dumpstate - dump an NFA state in human-readable form
+ ^ static void dumpstate(struct state *, FILE *);
+ */
+static void
+dumpstate(
+ struct state *s,
+ FILE *f)
+{
+ struct arc *a;
+
+ fprintf(f, "%d%s%c", s->no, (s->tmp != NULL) ? "T" : "",
+ (s->flag) ? s->flag : '.');
+ if (s->prev != NULL && s->prev->next != s) {
+ fprintf(f, "\tstate chain bad\n");
+ }
+ if (s->nouts == 0) {
+ fprintf(f, "\tno out arcs\n");
+ } else {
+ dumparcs(s, f);
+ }
+ fflush(f);
+ for (a = s->ins; a != NULL; a = a->inchain) {
+ if (a->to != s) {
+ fprintf(f, "\tlink from %d to %d on %d's in-chain\n",
+ a->from->no, a->to->no, s->no);
+ }
+ }
+}
+
+/*
+ - dumparcs - dump out-arcs in human-readable form
+ ^ static void dumparcs(struct state *, FILE *);
+ */
+static void
+dumparcs(
+ struct state *s,
+ FILE *f)
+{
+ int pos;
+
+ assert(s->nouts > 0);
+ /* printing arcs in reverse order is usually clearer */
+ pos = dumprarcs(s->outs, s, f, 1);
+ if (pos != 1) {
+ fprintf(f, "\n");
+ }
+}
+
+/*
+ - dumprarcs - dump remaining outarcs, recursively, in reverse order
+ ^ static int dumprarcs(struct arc *, struct state *, FILE *, int);
+ */
+static int /* resulting print position */
+dumprarcs(
+ struct arc *a,
+ struct state *s,
+ FILE *f,
+ int pos) /* initial print position */
+{
+ if (a->outchain != NULL) {
+ pos = dumprarcs(a->outchain, s, f, pos);
+ }
+ dumparc(a, s, f);
+ if (pos == 5) {
+ fprintf(f, "\n");
+ pos = 1;
+ } else {
+ pos++;
+ }
+ return pos;
+}
+
+/*
+ - dumparc - dump one outarc in readable form, including prefixing tab
+ ^ static void dumparc(struct arc *, struct state *, FILE *);
+ */
+static void
+dumparc(
+ struct arc *a,
+ struct state *s,
+ FILE *f)
+{
+ struct arc *aa;
+ struct arcbatch *ab;
+
+ fprintf(f, "\t");
+ switch (a->type) {
+ case PLAIN:
+ fprintf(f, "[%ld]", (long) a->co);
+ break;
+ case AHEAD:
+ fprintf(f, ">%ld>", (long) a->co);
+ break;
+ case BEHIND:
+ fprintf(f, "<%ld<", (long) a->co);
+ break;
+ case LACON:
+ fprintf(f, ":%ld:", (long) a->co);
+ break;
+ case '^':
+ case '$':
+ fprintf(f, "%c%d", a->type, (int) a->co);
+ break;
+ case EMPTY:
+ break;
+ default:
+ fprintf(f, "0x%x/0%lo", a->type, (long) a->co);
+ break;
+ }
+ if (a->from != s) {
+ fprintf(f, "?%d?", a->from->no);
+ }
+ for (ab = &a->from->oas; ab != NULL; ab = ab->next) {
+ for (aa = &ab->a[0]; aa < &ab->a[ABSIZE]; aa++) {
+ if (aa == a) {
+ break; /* NOTE BREAK OUT */
+ }
+ }
+ if (aa < &ab->a[ABSIZE]) { /* propagate break */
+ break; /* NOTE BREAK OUT */
+ }
+ }
+ if (ab == NULL) {
+ fprintf(f, "?!?"); /* not in allocated space */
+ }
+ fprintf(f, "->");
+ if (a->to == NULL) {
+ fprintf(f, "NULL");
+ return;
+ }
+ fprintf(f, "%d", a->to->no);
+ for (aa = a->to->ins; aa != NULL; aa = aa->inchain) {
+ if (aa == a) {
+ break; /* NOTE BREAK OUT */
+ }
+ }
+ if (aa == NULL) {
+ fprintf(f, "?!?"); /* missing from in-chain */
+ }
+}
+
+/*
+ ^ #endif
+ */
+#endif /* ifdef REG_DEBUG */
+
+/*
+ - dumpcnfa - dump a compacted NFA in human-readable form
+ ^ static void dumpcnfa(struct cnfa *, FILE *);
+ */
+static void
+dumpcnfa(
+ struct cnfa *cnfa,
+ FILE *f)
+{
+#ifdef REG_DEBUG
+ int st;
+
+ fprintf(f, "pre %d, post %d", cnfa->pre, cnfa->post);
+ if (cnfa->bos[0] != COLORLESS) {
+ fprintf(f, ", bos [%ld]", (long) cnfa->bos[0]);
+ }
+ if (cnfa->bos[1] != COLORLESS) {
+ fprintf(f, ", bol [%ld]", (long) cnfa->bos[1]);
+ }
+ if (cnfa->eos[0] != COLORLESS) {
+ fprintf(f, ", eos [%ld]", (long) cnfa->eos[0]);
+ }
+ if (cnfa->eos[1] != COLORLESS) {
+ fprintf(f, ", eol [%ld]", (long) cnfa->eos[1]);
+ }
+ if (cnfa->flags&HASLACONS) {
+ fprintf(f, ", haslacons");
+ }
+ fprintf(f, "\n");
+ for (st = 0; st < cnfa->nstates; st++) {
+ dumpcstate(st, cnfa->states[st], cnfa, f);
+ }
+ fflush(f);
+#endif
+}
+
+#ifdef REG_DEBUG /* subordinates of dumpcnfa */
+/*
+ ^ #ifdef REG_DEBUG
+ */
+
+/*
+ - dumpcstate - dump a compacted-NFA state in human-readable form
+ ^ static void dumpcstate(int, struct carc *, struct cnfa *, FILE *);
+ */
+static void
+dumpcstate(
+ int st,
+ struct carc *ca,
+ struct cnfa *cnfa,
+ FILE *f)
+{
+ int i;
+ int pos;
+
+ fprintf(f, "%d%s", st, (ca[0].co) ? ":" : ".");
+ pos = 1;
+ for (i = 1; ca[i].co != COLORLESS; i++) {
+ if (ca[i].co < cnfa->ncolors) {
+ fprintf(f, "\t[%ld]->%d", (long) ca[i].co, ca[i].to);
+ } else {
+ fprintf(f, "\t:%ld:->%d", (long) ca[i].co-cnfa->ncolors,ca[i].to);
+ }
+ if (pos == 5) {
+ fprintf(f, "\n");
+ pos = 1;
+ } else {
+ pos++;
+ }
+ }
+ if (i == 1 || pos != 1) {
+ fprintf(f, "\n");
+ }
+ fflush(f);
+}
+
+/*
+ ^ #endif
+ */
+#endif /* ifdef REG_DEBUG */
+
+/*
+ * Local Variables:
+ * mode: c
+ * c-basic-offset: 4
+ * fill-column: 78
+ * End:
+ */
diff --git a/contrib/hsrex/regcomp.c b/contrib/hsrex/regcomp.c
new file mode 100644
index 0000000..8ff77ad
--- /dev/null
+++ b/contrib/hsrex/regcomp.c
@@ -0,0 +1,2169 @@
+/*
+ * re_*comp and friends - compile REs
+ * This file #includes several others (see the bottom).
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation of
+ * software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "regguts.h"
+
+/*
+ * forward declarations, up here so forward datatypes etc. are defined early
+ */
+/* =====^!^===== begin forwards =====^!^===== */
+/* automatically gathered by fwd; do not hand-edit */
+/* === regcomp.c === */
+int compile(regex_t *, const chr *, size_t, int);
+static void moresubs(struct vars *, int);
+static int freev(struct vars *, int);
+static void makesearch(struct vars *, struct nfa *);
+static struct subre *parse(struct vars *, int, int, struct state *, struct state *);
+static struct subre *parsebranch(struct vars *, int, int, struct state *, struct state *, int);
+static void parseqatom(struct vars *, int, int, struct state *, struct state *, struct subre *);
+static void nonword(struct vars *, int, struct state *, struct state *);
+static void word(struct vars *, int, struct state *, struct state *);
+static int scannum(struct vars *);
+static void repeat(struct vars *, struct state *, struct state *, int, int);
+static void bracket(struct vars *, struct state *, struct state *);
+static void cbracket(struct vars *, struct state *, struct state *);
+static void brackpart(struct vars *, struct state *, struct state *);
+static const chr *scanplain(struct vars *);
+static void onechr(struct vars *, pchr, struct state *, struct state *);
+static void dovec(struct vars *, struct cvec *, struct state *, struct state *);
+static void wordchrs(struct vars *);
+static struct subre *subre(struct vars *, int, int, struct state *, struct state *);
+static void freesubre(struct vars *, struct subre *);
+static void freesrnode(struct vars *, struct subre *);
+static void optst(struct vars *, struct subre *);
+static int numst(struct subre *, int);
+static void markst(struct subre *);
+static void cleanst(struct vars *);
+static long nfatree(struct vars *, struct subre *, FILE *);
+static long nfanode(struct vars *, struct subre *, FILE *);
+static int newlacon(struct vars *, struct state *, struct state *, int);
+static void freelacons(struct subre *, int);
+static void rfree(regex_t *);
+static void dump(regex_t *, FILE *);
+static void dumpst(struct subre *, FILE *, int);
+static void stdump(struct subre *, FILE *, int);
+static const char *stid(struct subre *, char *, size_t);
+/* === regc_lex.c === */
+static void lexstart(struct vars *);
+static void prefixes(struct vars *);
+static void lexnest(struct vars *, const chr *, const chr *);
+static void lexword(struct vars *);
+static int next(struct vars *);
+static int lexescape(struct vars *);
+static chr lexdigits(struct vars *, int, int, int);
+static int brenext(struct vars *, pchr);
+static void skip(struct vars *);
+static chr newline(NOPARMS);
+#ifdef REG_DEBUG
+static const chr *ch(NOPARMS);
+#endif
+static chr chrnamed(struct vars *, const chr *, const chr *, pchr);
+/* === regc_color.c === */
+static void initcm(struct vars *, struct colormap *);
+static void freecm(struct colormap *);
+static void cmtreefree(struct colormap *, union tree *, int);
+static color setcolor(struct colormap *, pchr, pcolor);
+static color maxcolor(struct colormap *);
+static color newcolor(struct colormap *);
+static void freecolor(struct colormap *, pcolor);
+static color pseudocolor(struct colormap *);
+static color subcolor(struct colormap *, pchr c);
+static color newsub(struct colormap *, pcolor);
+static void subrange(struct vars *, pchr, pchr, struct state *, struct state *);
+static void subblock(struct vars *, pchr, struct state *, struct state *);
+static void okcolors(struct nfa *, struct colormap *);
+static void colorchain(struct colormap *, struct arc *);
+static void uncolorchain(struct colormap *, struct arc *);
+static void rainbow(struct nfa *, struct colormap *, int, pcolor, struct state *, struct state *);
+static void colorcomplement(struct nfa *, struct colormap *, int, struct state *, struct state *, struct state *);
+#ifdef REG_DEBUG
+static void dumpcolors(struct colormap *, FILE *);
+static void fillcheck(struct colormap *, union tree *, int, FILE *);
+static void dumpchr(pchr, FILE *);
+#endif
+/* === regc_nfa.c === */
+static struct nfa *newnfa(struct vars *, struct colormap *, struct nfa *);
+static void freenfa(struct nfa *);
+static struct state *newstate(struct nfa *);
+static struct state *newfstate(struct nfa *, int flag);
+static void dropstate(struct nfa *, struct state *);
+static void freestate(struct nfa *, struct state *);
+static void destroystate(struct nfa *, struct state *);
+static void newarc(struct nfa *, int, pcolor, struct state *, struct state *);
+static struct arc *allocarc(struct nfa *, struct state *);
+static void freearc(struct nfa *, struct arc *);
+static struct arc *findarc(struct state *, int, pcolor);
+static void cparc(struct nfa *, struct arc *, struct state *, struct state *);
+static void moveins(struct nfa *, struct state *, struct state *);
+static void copyins(struct nfa *, struct state *, struct state *);
+static void moveouts(struct nfa *, struct state *, struct state *);
+static void copyouts(struct nfa *, struct state *, struct state *);
+static void cloneouts(struct nfa *, struct state *, struct state *, struct state *, int);
+static void delsub(struct nfa *, struct state *, struct state *);
+static void deltraverse(struct nfa *, struct state *, struct state *);
+static void dupnfa(struct nfa *, struct state *, struct state *, struct state *, struct state *);
+static void duptraverse(struct nfa *, struct state *, struct state *, int);
+static void cleartraverse(struct nfa *, struct state *);
+static void specialcolors(struct nfa *);
+static long optimize(struct nfa *, FILE *);
+static void pullback(struct nfa *, FILE *);
+static int pull(struct nfa *, struct arc *);
+static void pushfwd(struct nfa *, FILE *);
+static int push(struct nfa *, struct arc *);
+#define INCOMPATIBLE 1 /* destroys arc */
+#define SATISFIED 2 /* constraint satisfied */
+#define COMPATIBLE 3 /* compatible but not satisfied yet */
+static int combine(struct arc *, struct arc *);
+static void fixempties(struct nfa *, FILE *);
+static int unempty(struct nfa *, struct arc *);
+static void cleanup(struct nfa *);
+static void markreachable(struct nfa *, struct state *, struct state *, struct state *);
+static void markcanreach(struct nfa *, struct state *, struct state *, struct state *);
+static long analyze(struct nfa *);
+static void compact(struct nfa *, struct cnfa *);
+static void carcsort(struct carc *, struct carc *);
+static void freecnfa(struct cnfa *);
+static void dumpnfa(struct nfa *, FILE *);
+#ifdef REG_DEBUG
+static void dumpstate(struct state *, FILE *);
+static void dumparcs(struct state *, FILE *);
+static int dumprarcs(struct arc *, struct state *, FILE *, int);
+static void dumparc(struct arc *, struct state *, FILE *);
+#endif
+static void dumpcnfa(struct cnfa *, FILE *);
+#ifdef REG_DEBUG
+static void dumpcstate(int, struct carc *, struct cnfa *, FILE *);
+#endif
+/* === regc_cvec.c === */
+static struct cvec *clearcvec(struct cvec *);
+static void addchr(struct cvec *, pchr);
+static void addrange(struct cvec *, pchr, pchr);
+static struct cvec *newcvec(int, int);
+static struct cvec *getcvec(struct vars *, int, int);
+static void freecvec(struct cvec *);
+/* === regc_locale.c === */
+static celt element(struct vars *, const chr *, const chr *);
+static struct cvec *range(struct vars *, celt, celt, int);
+static int before(celt, celt);
+static struct cvec *eclass(struct vars *, celt, int);
+static struct cvec *cclass(struct vars *, const chr *, const chr *, int);
+static struct cvec *allcases(struct vars *, pchr);
+static int cmp(const chr *, const chr *, size_t);
+static int casecmp(const chr *, const chr *, size_t);
+/* automatically gathered by fwd; do not hand-edit */
+/* =====^!^===== end forwards =====^!^===== */
+
+/* internal variables, bundled for easy passing around */
+struct vars {
+ regex_t *re;
+ const chr *now; /* scan pointer into string */
+ const chr *stop; /* end of string */
+ const chr *savenow; /* saved now and stop for "subroutine call" */
+ const chr *savestop;
+ int err; /* error code (0 if none) */
+ int cflags; /* copy of compile flags */
+ int lasttype; /* type of previous token */
+ int nexttype; /* type of next token */
+ chr nextvalue; /* value (if any) of next token */
+ int lexcon; /* lexical context type (see lex.c) */
+ int nsubexp; /* subexpression count */
+ struct subre **subs; /* subRE pointer vector */
+ size_t nsubs; /* length of vector */
+ struct subre *sub10[10]; /* initial vector, enough for most */
+ struct nfa *nfa; /* the NFA */
+ struct colormap *cm; /* character color map */
+ color nlcolor; /* color of newline */
+ struct state *wordchrs; /* state in nfa holding word-char outarcs */
+ struct subre *tree; /* subexpression tree */
+ struct subre *treechain; /* all tree nodes allocated */
+ struct subre *treefree; /* any free tree nodes */
+ int ntree; /* number of tree nodes */
+ struct cvec *cv; /* interface cvec */
+ struct cvec *cv2; /* utility cvec */
+ struct subre *lacons; /* lookahead-constraint vector */
+ int nlacons; /* size of lacons */
+};
+
+/* parsing macros; most know that `v' is the struct vars pointer */
+#define NEXT() (next(v)) /* advance by one token */
+#define SEE(t) (v->nexttype == (t)) /* is next token this? */
+#define EAT(t) (SEE(t) && next(v)) /* if next is this, swallow it */
+#define VISERR(vv) ((vv)->err != 0)/* have we seen an error yet? */
+#define ISERR() VISERR(v)
+#define VERR(vv,e) \
+ ((vv)->nexttype = EOS, ((vv)->err) ? (vv)->err : ((vv)->err = (e)))
+#define ERR(e) VERR(v, e) /* record an error */
+#define NOERR() {if (ISERR()) return;} /* if error seen, return */
+#define NOERRN() {if (ISERR()) return NULL;} /* NOERR with retval */
+#define NOERRZ() {if (ISERR()) return 0;} /* NOERR with retval */
+#define INSIST(c, e) ((c) ? 0 : ERR(e)) /* if condition false, error */
+#define NOTE(b) (v->re->re_info |= (b)) /* note visible condition */
+#define EMPTYARC(x, y) newarc(v->nfa, EMPTY, 0, x, y)
+
+/* token type codes, some also used as NFA arc types */
+#define EMPTY 'n' /* no token present */
+#define EOS 'e' /* end of string */
+#define PLAIN 'p' /* ordinary character */
+#define DIGIT 'd' /* digit (in bound) */
+#define BACKREF 'b' /* back reference */
+#define COLLEL 'I' /* start of [. */
+#define ECLASS 'E' /* start of [= */
+#define CCLASS 'C' /* start of [: */
+#define END 'X' /* end of [. [= [: */
+#define RANGE 'R' /* - within [] which might be range delim. */
+#define LACON 'L' /* lookahead constraint subRE */
+#define AHEAD 'a' /* color-lookahead arc */
+#define BEHIND 'r' /* color-lookbehind arc */
+#define WBDRY 'w' /* word boundary constraint */
+#define NWBDRY 'W' /* non-word-boundary constraint */
+#define SBEGIN 'A' /* beginning of string (even if not BOL) */
+#define SEND 'Z' /* end of string (even if not EOL) */
+#define PREFER 'P' /* length preference */
+
+/* is an arc colored, and hence on a color chain? */
+#define COLORED(a) \
+ ((a)->type == PLAIN || (a)->type == AHEAD || (a)->type == BEHIND)
+
+/* static function list */
+static struct fns functions = {
+ rfree, /* regfree insides */
+};
+
+/*
+ - compile - compile regular expression
+ ^ int compile(regex_t *, const chr *, size_t, int);
+ */
+int
+compile(
+ regex_t *re,
+ const chr *string,
+ size_t len,
+ int flags)
+{
+ AllocVars(v);
+ struct guts *g;
+ int i;
+ size_t j;
+ FILE *debug = (flags&REG_PROGRESS) ? stdout : NULL;
+#define CNOERR() { if (ISERR()) return freev(v, v->err); }
+
+ /*
+ * Sanity checks.
+ */
+
+ if (re == NULL || string == NULL) {
+ FreeVars(v);
+ return REG_INVARG;
+ }
+ if ((flags&REG_QUOTE) && (flags&(REG_ADVANCED|REG_EXPANDED|REG_NEWLINE))) {
+ FreeVars(v);
+ return REG_INVARG;
+ }
+ if (!(flags&REG_EXTENDED) && (flags&REG_ADVF)) {
+ FreeVars(v);
+ return REG_INVARG;
+ }
+
+ /*
+ * Initial setup (after which freev() is callable).
+ */
+
+ v->re = re;
+ v->now = string;
+ v->stop = v->now + len;
+ v->savenow = v->savestop = NULL;
+ v->err = 0;
+ v->cflags = flags;
+ v->nsubexp = 0;
+ v->subs = v->sub10;
+ v->nsubs = 10;
+ for (j = 0; j < v->nsubs; j++) {
+ v->subs[j] = NULL;
+ }
+ v->nfa = NULL;
+ v->cm = NULL;
+ v->nlcolor = COLORLESS;
+ v->wordchrs = NULL;
+ v->tree = NULL;
+ v->treechain = NULL;
+ v->treefree = NULL;
+ v->cv = NULL;
+ v->cv2 = NULL;
+ v->lacons = NULL;
+ v->nlacons = 0;
+ re->re_magic = REMAGIC;
+ re->re_info = 0; /* bits get set during parse */
+ re->re_csize = sizeof(chr);
+ re->re_guts = NULL;
+ re->re_fns = VS(&functions);
+
+ /*
+ * More complex setup, malloced things.
+ */
+
+ re->re_guts = VS(MALLOC(sizeof(struct guts)));
+ if (re->re_guts == NULL) {
+ return freev(v, REG_ESPACE);
+ }
+ g = (struct guts *) re->re_guts;
+ g->tree = NULL;
+ initcm(v, &g->cmap);
+ v->cm = &g->cmap;
+ g->lacons = NULL;
+ g->nlacons = 0;
+ ZAPCNFA(g->search);
+ v->nfa = newnfa(v, v->cm, NULL);
+ CNOERR();
+ v->cv = newcvec(100, 20);
+ if (v->cv == NULL) {
+ return freev(v, REG_ESPACE);
+ }
+
+ /*
+ * Parsing.
+ */
+
+ lexstart(v); /* also handles prefixes */
+ if ((v->cflags&REG_NLSTOP) || (v->cflags&REG_NLANCH)) {
+ /*
+ * Assign newline a unique color.
+ */
+
+ v->nlcolor = subcolor(v->cm, newline());
+ okcolors(v->nfa, v->cm);
+ }
+ CNOERR();
+ v->tree = parse(v, EOS, PLAIN, v->nfa->init, v->nfa->final);
+ assert(SEE(EOS)); /* even if error; ISERR() => SEE(EOS) */
+ CNOERR();
+ assert(v->tree != NULL);
+
+ /*
+ * Finish setup of nfa and its subre tree.
+ */
+
+ specialcolors(v->nfa);
+ CNOERR();
+ if (debug != NULL) {
+ fprintf(debug, "\n\n\n========= RAW ==========\n");
+ dumpnfa(v->nfa, debug);
+ dumpst(v->tree, debug, 1);
+ }
+ optst(v, v->tree);
+ v->ntree = numst(v->tree, 1);
+ markst(v->tree);
+ cleanst(v);
+ if (debug != NULL) {
+ fprintf(debug, "\n\n\n========= TREE FIXED ==========\n");
+ dumpst(v->tree, debug, 1);
+ }
+
+ /*
+ * Build compacted NFAs for tree and lacons.
+ */
+
+ re->re_info |= nfatree(v, v->tree, debug);
+ CNOERR();
+ assert(v->nlacons == 0 || v->lacons != NULL);
+ for (i = 1; i < v->nlacons; i++) {
+ if (debug != NULL) {
+ fprintf(debug, "\n\n\n========= LA%d ==========\n", i);
+ }
+ nfanode(v, &v->lacons[i], debug);
+ }
+ CNOERR();
+ if (v->tree->flags&SHORTER) {
+ NOTE(REG_USHORTEST);
+ }
+
+ /*
+ * Build compacted NFAs for tree, lacons, fast search.
+ */
+
+ if (debug != NULL) {
+ fprintf(debug, "\n\n\n========= SEARCH ==========\n");
+ }
+
+ /*
+ * Can sacrifice main NFA now, so use it as work area.
+ */
+
+ (DISCARD) optimize(v->nfa, debug);
+ CNOERR();
+ makesearch(v, v->nfa);
+ CNOERR();
+ compact(v->nfa, &g->search);
+ CNOERR();
+
+ /*
+ * Looks okay, package it up.
+ */
+
+ re->re_nsub = v->nsubexp;
+ v->re = NULL; /* freev no longer frees re */
+ g->magic = GUTSMAGIC;
+ g->cflags = v->cflags;
+ g->info = re->re_info;
+ g->nsub = re->re_nsub;
+ g->tree = v->tree;
+ v->tree = NULL;
+ g->ntree = v->ntree;
+ g->compare = (v->cflags&REG_ICASE) ? casecmp : cmp;
+ g->lacons = v->lacons;
+ v->lacons = NULL;
+ g->nlacons = v->nlacons;
+
+ if (flags&REG_DUMP) {
+ dump(re, stdout);
+ }
+
+ assert(v->err == 0);
+ return freev(v, 0);
+}
+
+/*
+ - moresubs - enlarge subRE vector
+ ^ static void moresubs(struct vars *, int);
+ */
+static void
+moresubs(
+ struct vars *v,
+ int wanted) /* want enough room for this one */
+{
+ struct subre **p;
+ size_t n;
+
+ assert(wanted > 0 && (size_t)wanted >= v->nsubs);
+ n = (size_t)wanted * 3 / 2 + 1;
+ if (v->subs == v->sub10) {
+ p = (struct subre **) MALLOC(n * sizeof(struct subre *));
+ if (p != NULL) {
+ memcpy(p, v->subs, v->nsubs * sizeof(struct subre *));
+ }
+ } else {
+ p = (struct subre **) REALLOC(v->subs, n*sizeof(struct subre *));
+ }
+ if (p == NULL) {
+ ERR(REG_ESPACE);
+ return;
+ }
+
+ v->subs = p;
+ for (p = &v->subs[v->nsubs]; v->nsubs < n; p++, v->nsubs++) {
+ *p = NULL;
+ }
+ assert(v->nsubs == n);
+ assert((size_t)wanted < v->nsubs);
+}
+
+/*
+ - freev - free vars struct's substructures where necessary
+ * Optionally does error-number setting, and always returns error code (if
+ * any), to make error-handling code terser.
+ ^ static int freev(struct vars *, int);
+ */
+static int
+freev(
+ struct vars *v,
+ int err)
+{
+ register int ret;
+
+ if (v->re != NULL) {
+ rfree(v->re);
+ }
+ if (v->subs != v->sub10) {
+ FREE(v->subs);
+ }
+ if (v->nfa != NULL) {
+ freenfa(v->nfa);
+ }
+ if (v->tree != NULL) {
+ freesubre(v, v->tree);
+ }
+ if (v->treechain != NULL) {
+ cleanst(v);
+ }
+ if (v->cv != NULL) {
+ freecvec(v->cv);
+ }
+ if (v->cv2 != NULL) {
+ freecvec(v->cv2);
+ }
+ if (v->lacons != NULL) {
+ freelacons(v->lacons, v->nlacons);
+ }
+ ERR(err); /* nop if err==0 */
+
+ ret = v->err;
+ FreeVars(v);
+ return ret;
+}
+
+/*
+ - makesearch - turn an NFA into a search NFA (implicit prepend of .*?)
+ * NFA must have been optimize()d already.
+ ^ static void makesearch(struct vars *, struct nfa *);
+ */
+static void
+makesearch(
+ struct vars *v,
+ struct nfa *nfa)
+{
+ struct arc *a, *b;
+ struct state *pre = nfa->pre;
+ struct state *s, *s2, *slist;
+
+ /*
+ * No loops are needed if it's anchored.
+ */
+
+ for (a = pre->outs; a != NULL; a = a->outchain) {
+ assert(a->type == PLAIN);
+ if (a->co != nfa->bos[0] && a->co != nfa->bos[1]) {
+ break;
+ }
+ }
+ if (a != NULL) {
+ /*
+ * Add implicit .* in front.
+ */
+
+ rainbow(nfa, v->cm, PLAIN, COLORLESS, pre, pre);
+
+ /*
+ * And ^* and \A* too -- not always necessary, but harmless.
+ */
+
+ newarc(nfa, PLAIN, nfa->bos[0], pre, pre);
+ newarc(nfa, PLAIN, nfa->bos[1], pre, pre);
+ }
+
+ /*
+ * Now here's the subtle part. Because many REs have no lookback
+ * constraints, often knowing when you were in the pre state tells you
+ * little; it's the next state(s) that are informative. But some of them
+ * may have other inarcs, i.e. it may be possible to make actual progress
+ * and then return to one of them. We must de-optimize such cases,
+ * splitting each such state into progress and no-progress states.
+ */
+
+ /*
+ * First, make a list of the states.
+ */
+
+ slist = NULL;
+ for (a=pre->outs ; a!=NULL ; a=a->outchain) {
+ s = a->to;
+ for (b=s->ins ; b!=NULL ; b=b->inchain) {
+ if (b->from != pre) {
+ break;
+ }
+ }
+ if (b != NULL && s->tmp == NULL) {
+ /*
+ * Must be split if not already in the list (fixes bugs 505048,
+ * 230589, 840258, 504785).
+ */
+
+ s->tmp = slist;
+ slist = s;
+ }
+ }
+
+ /*
+ * Do the splits.
+ */
+
+ for (s=slist ; s!=NULL ; s=s2) {
+ s2 = newstate(nfa);
+
+ copyouts(nfa, s, s2);
+ for (a=s->ins ; a!=NULL ; a=b) {
+ b = a->inchain;
+
+ if (a->from != pre) {
+ cparc(nfa, a, a->from, s2);
+ freearc(nfa, a);
+ }
+ }
+ s2 = s->tmp;
+ s->tmp = NULL; /* clean up while we're at it */
+ }
+}
+
+/*
+ - parse - parse an RE
+ * This is actually just the top level, which parses a bunch of branches tied
+ * together with '|'. They appear in the tree as the left children of a chain
+ * of '|' subres.
+ ^ static struct subre *parse(struct vars *, int, int, struct state *,
+ ^ struct state *);
+ */
+static struct subre *
+parse(
+ struct vars *v,
+ int stopper, /* EOS or ')' */
+ int type, /* LACON (lookahead subRE) or PLAIN */
+ struct state *init, /* initial state */
+ struct state *final) /* final state */
+{
+ struct state *left, *right; /* scaffolding for branch */
+ struct subre *branches; /* top level */
+ struct subre *branch; /* current branch */
+ struct subre *t; /* temporary */
+ int firstbranch; /* is this the first branch? */
+
+ assert(stopper == ')' || stopper == EOS);
+
+ branches = subre(v, '|', LONGER, init, final);
+ NOERRN();
+ branch = branches;
+ firstbranch = 1;
+ do { /* a branch */
+ if (!firstbranch) {
+ /*
+ * Need a place to hang the branch.
+ */
+
+ branch->right = subre(v, '|', LONGER, init, final);
+ NOERRN();
+ branch = branch->right;
+ }
+ firstbranch = 0;
+ left = newstate(v->nfa);
+ right = newstate(v->nfa);
+ NOERRN();
+ EMPTYARC(init, left);
+ EMPTYARC(right, final);
+ NOERRN();
+ branch->left = parsebranch(v, stopper, type, left, right, 0);
+ NOERRN();
+ branch->flags |= UP(branch->flags | branch->left->flags);
+ if ((branch->flags &~ branches->flags) != 0) { /* new flags */
+ for (t = branches; t != branch; t = t->right) {
+ t->flags |= branch->flags;
+ }
+ }
+ } while (EAT('|'));
+ assert(SEE(stopper) || SEE(EOS));
+
+ if (!SEE(stopper)) {
+ assert(stopper == ')' && SEE(EOS));
+ ERR(REG_EPAREN);
+ }
+
+ /*
+ * Optimize out simple cases.
+ */
+
+ if (branch == branches) { /* only one branch */
+ assert(branch->right == NULL);
+ t = branch->left;
+ branch->left = NULL;
+ freesubre(v, branches);
+ branches = t;
+ } else if (!MESSY(branches->flags)) { /* no interesting innards */
+ freesubre(v, branches->left);
+ branches->left = NULL;
+ freesubre(v, branches->right);
+ branches->right = NULL;
+ branches->op = '=';
+ }
+
+ return branches;
+}
+
+/*
+ - parsebranch - parse one branch of an RE
+ * This mostly manages concatenation, working closely with parseqatom().
+ * Concatenated things are bundled up as much as possible, with separate
+ * ',' nodes introduced only when necessary due to substructure.
+ ^ static struct subre *parsebranch(struct vars *, int, int, struct state *,
+ ^ struct state *, int);
+ */
+static struct subre *
+parsebranch(
+ struct vars *v,
+ int stopper, /* EOS or ')' */
+ int type, /* LACON (lookahead subRE) or PLAIN */
+ struct state *left, /* leftmost state */
+ struct state *right, /* rightmost state */
+ int partial) /* is this only part of a branch? */
+{
+ struct state *lp; /* left end of current construct */
+ int seencontent; /* is there anything in this branch yet? */
+ struct subre *t;
+
+ lp = left;
+ seencontent = 0;
+ t = subre(v, '=', 0, left, right); /* op '=' is tentative */
+ NOERRN();
+ while (!SEE('|') && !SEE(stopper) && !SEE(EOS)) {
+ if (seencontent) { /* implicit concat operator */
+ lp = newstate(v->nfa);
+ NOERRN();
+ moveins(v->nfa, right, lp);
+ }
+ seencontent = 1;
+
+ /* NB, recursion in parseqatom() may swallow rest of branch */
+ parseqatom(v, stopper, type, lp, right, t);
+ }
+
+ if (!seencontent) { /* empty branch */
+ if (!partial) {
+ NOTE(REG_UUNSPEC);
+ }
+ assert(lp == left);
+ EMPTYARC(left, right);
+ }
+
+ return t;
+}
+
+/*
+ - parseqatom - parse one quantified atom or constraint of an RE
+ * The bookkeeping near the end cooperates very closely with parsebranch(); in
+ * particular, it contains a recursion that can involve parsing the rest of
+ * the branch, making this function's name somewhat inaccurate.
+ ^ static void parseqatom(struct vars *, int, int, struct state *,
+ ^ struct state *, struct subre *);
+ */
+static void
+parseqatom(
+ struct vars *v,
+ int stopper, /* EOS or ')' */
+ int type, /* LACON (lookahead subRE) or PLAIN */
+ struct state *lp, /* left state to hang it on */
+ struct state *rp, /* right state to hang it on */
+ struct subre *top) /* subtree top */
+{
+ struct state *s; /* temporaries for new states */
+ struct state *s2;
+#define ARCV(t, val) newarc(v->nfa, t, val, lp, rp)
+ int m, n;
+ struct subre *atom; /* atom's subtree */
+ struct subre *t;
+ int cap; /* capturing parens? */
+ int pos; /* positive lookahead? */
+ int subno; /* capturing-parens or backref number */
+ int atomtype;
+ int qprefer; /* quantifier short/long preference */
+ int f;
+ struct subre **atomp; /* where the pointer to atom is */
+
+ /*
+ * Initial bookkeeping.
+ */
+
+ atom = NULL;
+ assert(lp->nouts == 0); /* must string new code */
+ assert(rp->nins == 0); /* between lp and rp */
+ subno = 0; /* just to shut lint up */
+
+ /*
+ * An atom or constraint...
+ */
+
+ atomtype = v->nexttype;
+ switch (atomtype) {
+ /* first, constraints, which end by returning */
+ case '^':
+ ARCV('^', 1);
+ if (v->cflags&REG_NLANCH) {
+ ARCV(BEHIND, v->nlcolor);
+ }
+ NEXT();
+ return;
+ case '$':
+ ARCV('$', 1);
+ if (v->cflags&REG_NLANCH) {
+ ARCV(AHEAD, v->nlcolor);
+ }
+ NEXT();
+ return;
+ case SBEGIN:
+ ARCV('^', 1); /* BOL */
+ ARCV('^', 0); /* or BOS */
+ NEXT();
+ return;
+ case SEND:
+ ARCV('$', 1); /* EOL */
+ ARCV('$', 0); /* or EOS */
+ NEXT();
+ return;
+ case '<':
+ wordchrs(v); /* does NEXT() */
+ s = newstate(v->nfa);
+ NOERR();
+ nonword(v, BEHIND, lp, s);
+ word(v, AHEAD, s, rp);
+ return;
+ case '>':
+ wordchrs(v); /* does NEXT() */
+ s = newstate(v->nfa);
+ NOERR();
+ word(v, BEHIND, lp, s);
+ nonword(v, AHEAD, s, rp);
+ return;
+ case WBDRY:
+ wordchrs(v); /* does NEXT() */
+ s = newstate(v->nfa);
+ NOERR();
+ nonword(v, BEHIND, lp, s);
+ word(v, AHEAD, s, rp);
+ s = newstate(v->nfa);
+ NOERR();
+ word(v, BEHIND, lp, s);
+ nonword(v, AHEAD, s, rp);
+ return;
+ case NWBDRY:
+ wordchrs(v); /* does NEXT() */
+ s = newstate(v->nfa);
+ NOERR();
+ word(v, BEHIND, lp, s);
+ word(v, AHEAD, s, rp);
+ s = newstate(v->nfa);
+ NOERR();
+ nonword(v, BEHIND, lp, s);
+ nonword(v, AHEAD, s, rp);
+ return;
+ case LACON: /* lookahead constraint */
+ pos = v->nextvalue;
+ NEXT();
+ s = newstate(v->nfa);
+ s2 = newstate(v->nfa);
+ NOERR();
+ t = parse(v, ')', LACON, s, s2);
+ freesubre(v, t); /* internal structure irrelevant */
+ assert(SEE(')') || ISERR());
+ NEXT();
+ n = newlacon(v, s, s2, pos);
+ NOERR();
+ ARCV(LACON, n);
+ return;
+
+ /*
+ * Then errors, to get them out of the way.
+ */
+
+ case '*':
+ case '+':
+ case '?':
+ case '{':
+ ERR(REG_BADRPT);
+ return;
+ default:
+ ERR(REG_ASSERT);
+ return;
+
+ /*
+ * Then plain characters, and minor variants on that theme.
+ */
+
+ case ')': /* unbalanced paren */
+ if ((v->cflags&REG_ADVANCED) != REG_EXTENDED) {
+ ERR(REG_EPAREN);
+ return;
+ }
+
+ /*
+ * Legal in EREs due to specification botch.
+ */
+
+ NOTE(REG_UPBOTCH);
+ /* fallthrough into case PLAIN */
+ case PLAIN:
+ onechr(v, v->nextvalue, lp, rp);
+ okcolors(v->nfa, v->cm);
+ NOERR();
+ NEXT();
+ break;
+ case '[':
+ if (v->nextvalue == 1) {
+ bracket(v, lp, rp);
+ } else {
+ cbracket(v, lp, rp);
+ }
+ assert(SEE(']') || ISERR());
+ NEXT();
+ break;
+ case '.':
+ rainbow(v->nfa, v->cm, PLAIN,
+ (v->cflags&REG_NLSTOP) ? v->nlcolor : COLORLESS, lp, rp);
+ NEXT();
+ break;
+
+ /*
+ * And finally the ugly stuff.
+ */
+
+ case '(': /* value flags as capturing or non */
+ cap = (type == LACON) ? 0 : v->nextvalue;
+ if (cap) {
+ v->nsubexp++;
+ subno = v->nsubexp;
+ if ((size_t)subno >= v->nsubs) {
+ moresubs(v, subno);
+ }
+ assert((size_t)subno < v->nsubs);
+ } else {
+ atomtype = PLAIN; /* something that's not '(' */
+ }
+ NEXT();
+
+ /*
+ * Need new endpoints because tree will contain pointers.
+ */
+
+ s = newstate(v->nfa);
+ s2 = newstate(v->nfa);
+ NOERR();
+ EMPTYARC(lp, s);
+ EMPTYARC(s2, rp);
+ NOERR();
+ atom = parse(v, ')', PLAIN, s, s2);
+ assert(SEE(')') || ISERR());
+ NEXT();
+ NOERR();
+ if (cap) {
+ v->subs[subno] = atom;
+ t = subre(v, '(', atom->flags|CAP, lp, rp);
+ NOERR();
+ t->subno = subno;
+ t->left = atom;
+ atom = t;
+ }
+
+ /*
+ * Postpone everything else pending possible {0}.
+ */
+
+ break;
+ case BACKREF: /* the Feature From The Black Lagoon */
+ INSIST(type != LACON, REG_ESUBREG);
+ INSIST(v->nextvalue < v->nsubs, REG_ESUBREG);
+ INSIST(v->subs[v->nextvalue] != NULL, REG_ESUBREG);
+ NOERR();
+ assert(v->nextvalue > 0);
+ atom = subre(v, 'b', BACKR, lp, rp);
+ subno = v->nextvalue;
+ atom->subno = subno;
+ EMPTYARC(lp, rp); /* temporarily, so there's something */
+ NEXT();
+ break;
+ }
+
+ /*
+ * ...and an atom may be followed by a quantifier.
+ */
+
+ switch (v->nexttype) {
+ case '*':
+ m = 0;
+ n = INFINITY;
+ qprefer = (v->nextvalue) ? LONGER : SHORTER;
+ NEXT();
+ break;
+ case '+':
+ m = 1;
+ n = INFINITY;
+ qprefer = (v->nextvalue) ? LONGER : SHORTER;
+ NEXT();
+ break;
+ case '?':
+ m = 0;
+ n = 1;
+ qprefer = (v->nextvalue) ? LONGER : SHORTER;
+ NEXT();
+ break;
+ case '{':
+ NEXT();
+ m = scannum(v);
+ if (EAT(',')) {
+ if (SEE(DIGIT)) {
+ n = scannum(v);
+ } else {
+ n = INFINITY;
+ }
+ if (m > n) {
+ ERR(REG_BADBR);
+ return;
+ }
+
+ /*
+ * {m,n} exercises preference, even if it's {m,m}
+ */
+
+ qprefer = (v->nextvalue) ? LONGER : SHORTER;
+ } else {
+ n = m;
+ /*
+ * {m} passes operand's preference through.
+ */
+
+ qprefer = 0;
+ }
+ if (!SEE('}')) { /* catches errors too */
+ ERR(REG_BADBR);
+ return;
+ }
+ NEXT();
+ break;
+ default: /* no quantifier */
+ m = n = 1;
+ qprefer = 0;
+ break;
+ }
+
+ /*
+ * Annoying special case: {0} or {0,0} cancels everything.
+ */
+
+ if (m == 0 && n == 0) {
+ if (atom != NULL) {
+ freesubre(v, atom);
+ }
+ if (atomtype == '(') {
+ v->subs[subno] = NULL;
+ }
+ delsub(v->nfa, lp, rp);
+ EMPTYARC(lp, rp);
+ return;
+ }
+
+ /*
+ * If not a messy case, avoid hard part.
+ */
+
+ assert(!MESSY(top->flags));
+ f = top->flags | qprefer | ((atom != NULL) ? atom->flags : 0);
+ if (atomtype != '(' && atomtype != BACKREF && !MESSY(UP(f))) {
+ if (!(m == 1 && n == 1)) {
+ repeat(v, lp, rp, m, n);
+ }
+ if (atom != NULL) {
+ freesubre(v, atom);
+ }
+ top->flags = f;
+ return;
+ }
+
+ /*
+ * hard part: something messy
+ * That is, capturing parens, back reference, short/long clash, or an atom
+ * with substructure containing one of those.
+ */
+
+ /*
+ * Now we'll need a subre for the contents even if they're boring.
+ */
+
+ if (atom == NULL) {
+ atom = subre(v, '=', 0, lp, rp);
+ NOERR();
+ }
+
+ /*
+ * Prepare a general-purpose state skeleton.
+ *
+ * ---> [s] ---prefix---> [begin] ---atom---> [end] ----rest---> [rp]
+ * / /
+ * [lp] ----> [s2] ----bypass---------------------
+ *
+ * where bypass is an empty, and prefix is some repetitions of atom
+ */
+
+ s = newstate(v->nfa); /* first, new endpoints for the atom */
+ s2 = newstate(v->nfa);
+ NOERR();
+ moveouts(v->nfa, lp, s);
+ moveins(v->nfa, rp, s2);
+ NOERR();
+ atom->begin = s;
+ atom->end = s2;
+ s = newstate(v->nfa); /* and spots for prefix and bypass */
+ s2 = newstate(v->nfa);
+ NOERR();
+ EMPTYARC(lp, s);
+ EMPTYARC(lp, s2);
+ NOERR();
+
+ /*
+ * Break remaining subRE into x{...} and what follows.
+ */
+
+ t = subre(v, '.', COMBINE(qprefer, atom->flags), lp, rp);
+ t->left = atom;
+ atomp = &t->left;
+
+ /*
+ * Here we should recurse... but we must postpone that to the end.
+ */
+
+ /*
+ * Split top into prefix and remaining.
+ */
+
+ assert(top->op == '=' && top->left == NULL && top->right == NULL);
+ top->left = subre(v, '=', top->flags, top->begin, lp);
+ top->op = '.';
+ top->right = t;
+
+ /*
+ * If it's a backref, now is the time to replicate the subNFA.
+ */
+
+ if (atomtype == BACKREF) {
+ assert(atom->begin->nouts == 1); /* just the EMPTY */
+ delsub(v->nfa, atom->begin, atom->end);
+ assert(v->subs[subno] != NULL);
+
+ /*
+ * And here's why the recursion got postponed: it must wait until the
+ * skeleton is filled in, because it may hit a backref that wants to
+ * copy the filled-in skeleton.
+ */
+
+ dupnfa(v->nfa, v->subs[subno]->begin, v->subs[subno]->end,
+ atom->begin, atom->end);
+ NOERR();
+ }
+
+ /*
+ * It's quantifier time; first, turn x{0,...} into x{1,...}|empty
+ */
+
+ if (m == 0) {
+ EMPTYARC(s2, atom->end);/* the bypass */
+ assert(PREF(qprefer) != 0);
+ f = COMBINE(qprefer, atom->flags);
+ t = subre(v, '|', f, lp, atom->end);
+ NOERR();
+ t->left = atom;
+ t->right = subre(v, '|', PREF(f), s2, atom->end);
+ NOERR();
+ t->right->left = subre(v, '=', 0, s2, atom->end);
+ NOERR();
+ *atomp = t;
+ atomp = &t->left;
+ m = 1;
+ }
+
+ /*
+ * Deal with the rest of the quantifier.
+ */
+
+ if (atomtype == BACKREF) {
+ /*
+ * Special case: backrefs have internal quantifiers.
+ */
+
+ EMPTYARC(s, atom->begin); /* empty prefix */
+
+ /*
+ * Just stuff everything into atom.
+ */
+
+ repeat(v, atom->begin, atom->end, m, n);
+ atom->min = (short) m;
+ atom->max = (short) n;
+ atom->flags |= COMBINE(qprefer, atom->flags);
+ } else if (m == 1 && n == 1) {
+ /*
+ * No/vacuous quantifier: done.
+ */
+
+ EMPTYARC(s, atom->begin); /* empty prefix */
+ } else {
+ /*
+ * Turn x{m,n} into x{m-1,n-1}x, with capturing parens in only second
+ * x
+ */
+
+ dupnfa(v->nfa, atom->begin, atom->end, s, atom->begin);
+ assert(m >= 1 && m != INFINITY && n >= 1);
+ repeat(v, s, atom->begin, m-1, (n == INFINITY) ? n : n-1);
+ f = COMBINE(qprefer, atom->flags);
+ t = subre(v, '.', f, s, atom->end); /* prefix and atom */
+ NOERR();
+ t->left = subre(v, '=', PREF(f), s, atom->begin);
+ NOERR();
+ t->right = atom;
+ *atomp = t;
+ }
+
+ /*
+ * And finally, look after that postponed recursion.
+ */
+
+ t = top->right;
+ if (!(SEE('|') || SEE(stopper) || SEE(EOS))) {
+ t->right = parsebranch(v, stopper, type, atom->end, rp, 1);
+ } else {
+ EMPTYARC(atom->end, rp);
+ t->right = subre(v, '=', 0, atom->end, rp);
+ }
+ assert(SEE('|') || SEE(stopper) || SEE(EOS));
+ t->flags |= COMBINE(t->flags, t->right->flags);
+ top->flags |= COMBINE(top->flags, t->flags);
+}
+
+/*
+ - nonword - generate arcs for non-word-character ahead or behind
+ ^ static void nonword(struct vars *, int, struct state *, struct state *);
+ */
+static void
+nonword(
+ struct vars *v,
+ int dir, /* AHEAD or BEHIND */
+ struct state *lp,
+ struct state *rp)
+{
+ int anchor = (dir == AHEAD) ? '$' : '^';
+
+ assert(dir == AHEAD || dir == BEHIND);
+ newarc(v->nfa, anchor, 1, lp, rp);
+ newarc(v->nfa, anchor, 0, lp, rp);
+ colorcomplement(v->nfa, v->cm, dir, v->wordchrs, lp, rp);
+ /* (no need for special attention to \n) */
+}
+
+/*
+ - word - generate arcs for word character ahead or behind
+ ^ static void word(struct vars *, int, struct state *, struct state *);
+ */
+static void
+word(
+ struct vars *v,
+ int dir, /* AHEAD or BEHIND */
+ struct state *lp,
+ struct state *rp)
+{
+ assert(dir == AHEAD || dir == BEHIND);
+ cloneouts(v->nfa, v->wordchrs, lp, rp, dir);
+ /* (no need for special attention to \n) */
+}
+
+/*
+ - scannum - scan a number
+ ^ static int scannum(struct vars *);
+ */
+static int /* value, <= DUPMAX */
+scannum(
+ struct vars *v)
+{
+ int n = 0;
+
+ while (SEE(DIGIT) && n < DUPMAX) {
+ n = n*10 + v->nextvalue;
+ NEXT();
+ }
+ if (SEE(DIGIT) || n > DUPMAX) {
+ ERR(REG_BADBR);
+ return 0;
+ }
+ return n;
+}
+
+/*
+ - repeat - replicate subNFA for quantifiers
+ * The duplication sequences used here are chosen carefully so that any
+ * pointers starting out pointing into the subexpression end up pointing into
+ * the last occurrence. (Note that it may not be strung between the same left
+ * and right end states, however!) This used to be important for the subRE
+ * tree, although the important bits are now handled by the in-line code in
+ * parse(), and when this is called, it doesn't matter any more.
+ ^ static void repeat(struct vars *, struct state *, struct state *, int, int);
+ */
+static void
+repeat(
+ struct vars *v,
+ struct state *lp,
+ struct state *rp,
+ int m,
+ int n)
+{
+#define SOME 2
+#define INF 3
+#define PAIR(x, y) ((x)*4 + (y))
+#define REDUCE(x) ( ((x) == INFINITY) ? INF : (((x) > 1) ? SOME : (x)) )
+ const int rm = REDUCE(m);
+ const int rn = REDUCE(n);
+ struct state *s, *s2;
+
+ switch (PAIR(rm, rn)) {
+ case PAIR(0, 0): /* empty string */
+ delsub(v->nfa, lp, rp);
+ EMPTYARC(lp, rp);
+ break;
+ case PAIR(0, 1): /* do as x| */
+ EMPTYARC(lp, rp);
+ break;
+ case PAIR(0, SOME): /* do as x{1,n}| */
+ repeat(v, lp, rp, 1, n);
+ NOERR();
+ EMPTYARC(lp, rp);
+ break;
+ case PAIR(0, INF): /* loop x around */
+ s = newstate(v->nfa);
+ NOERR();
+ moveouts(v->nfa, lp, s);
+ moveins(v->nfa, rp, s);
+ EMPTYARC(lp, s);
+ EMPTYARC(s, rp);
+ break;
+ case PAIR(1, 1): /* no action required */
+ break;
+ case PAIR(1, SOME): /* do as x{0,n-1}x = (x{1,n-1}|)x */
+ s = newstate(v->nfa);
+ NOERR();
+ moveouts(v->nfa, lp, s);
+ dupnfa(v->nfa, s, rp, lp, s);
+ NOERR();
+ repeat(v, lp, s, 1, n-1);
+ NOERR();
+ EMPTYARC(lp, s);
+ break;
+ case PAIR(1, INF): /* add loopback arc */
+ s = newstate(v->nfa);
+ s2 = newstate(v->nfa);
+ NOERR();
+ moveouts(v->nfa, lp, s);
+ moveins(v->nfa, rp, s2);
+ EMPTYARC(lp, s);
+ EMPTYARC(s2, rp);
+ EMPTYARC(s2, s);
+ break;
+ case PAIR(SOME, SOME): /* do as x{m-1,n-1}x */
+ s = newstate(v->nfa);
+ NOERR();
+ moveouts(v->nfa, lp, s);
+ dupnfa(v->nfa, s, rp, lp, s);
+ NOERR();
+ repeat(v, lp, s, m-1, n-1);
+ break;
+ case PAIR(SOME, INF): /* do as x{m-1,}x */
+ s = newstate(v->nfa);
+ NOERR();
+ moveouts(v->nfa, lp, s);
+ dupnfa(v->nfa, s, rp, lp, s);
+ NOERR();
+ repeat(v, lp, s, m-1, n);
+ break;
+ default:
+ ERR(REG_ASSERT);
+ break;
+ }
+}
+
+/*
+ - bracket - handle non-complemented bracket expression
+ * Also called from cbracket for complemented bracket expressions.
+ ^ static void bracket(struct vars *, struct state *, struct state *);
+ */
+static void
+bracket(
+ struct vars *v,
+ struct state *lp,
+ struct state *rp)
+{
+ assert(SEE('['));
+ NEXT();
+ while (!SEE(']') && !SEE(EOS)) {
+ brackpart(v, lp, rp);
+ }
+ assert(SEE(']') || ISERR());
+ okcolors(v->nfa, v->cm);
+}
+
+/*
+ - cbracket - handle complemented bracket expression
+ * We do it by calling bracket() with dummy endpoints, and then complementing
+ * the result. The alternative would be to invoke rainbow(), and then delete
+ * arcs as the b.e. is seen... but that gets messy.
+ ^ static void cbracket(struct vars *, struct state *, struct state *);
+ */
+static void
+cbracket(
+ struct vars *v,
+ struct state *lp,
+ struct state *rp)
+{
+ struct state *left = newstate(v->nfa);
+ struct state *right = newstate(v->nfa);
+
+ NOERR();
+ bracket(v, left, right);
+ if (v->cflags&REG_NLSTOP) {
+ newarc(v->nfa, PLAIN, v->nlcolor, left, right);
+ }
+ NOERR();
+
+ assert(lp->nouts == 0); /* all outarcs will be ours */
+
+ /*
+ * Easy part of complementing, and all there is to do since the MCCE code
+ * was removed.
+ */
+
+ colorcomplement(v->nfa, v->cm, PLAIN, left, lp, rp);
+ NOERR();
+ dropstate(v->nfa, left);
+ assert(right->nins == 0);
+ freestate(v->nfa, right);
+ return;
+}
+
+/*
+ - brackpart - handle one item (or range) within a bracket expression
+ ^ static void brackpart(struct vars *, struct state *, struct state *);
+ */
+static void
+brackpart(
+ struct vars *v,
+ struct state *lp,
+ struct state *rp)
+{
+ celt startc, endc;
+ struct cvec *cv;
+ const chr *startp, *endp;
+ chr c[1];
+
+ /*
+ * Parse something, get rid of special cases, take shortcuts.
+ */
+
+ switch (v->nexttype) {
+ case RANGE: /* a-b-c or other botch */
+ ERR(REG_ERANGE);
+ return;
+ break;
+ case PLAIN:
+ c[0] = v->nextvalue;
+ NEXT();
+
+ /*
+ * Shortcut for ordinary chr (not range).
+ */
+
+ if (!SEE(RANGE)) {
+ onechr(v, c[0], lp, rp);
+ return;
+ }
+ startc = element(v, c, c+1);
+ NOERR();
+ break;
+ case COLLEL:
+ startp = v->now;
+ endp = scanplain(v);
+ INSIST(startp < endp, REG_ECOLLATE);
+ NOERR();
+ startc = element(v, startp, endp);
+ NOERR();
+ break;
+ case ECLASS:
+ startp = v->now;
+ endp = scanplain(v);
+ INSIST(startp < endp, REG_ECOLLATE);
+ NOERR();
+ startc = element(v, startp, endp);
+ NOERR();
+ cv = eclass(v, startc, (v->cflags&REG_ICASE));
+ NOERR();
+ dovec(v, cv, lp, rp);
+ return;
+ break;
+ case CCLASS:
+ startp = v->now;
+ endp = scanplain(v);
+ INSIST(startp < endp, REG_ECTYPE);
+ NOERR();
+ cv = cclass(v, startp, endp, (v->cflags&REG_ICASE));
+ NOERR();
+ dovec(v, cv, lp, rp);
+ return;
+ break;
+ default:
+ ERR(REG_ASSERT);
+ return;
+ break;
+ }
+
+ if (SEE(RANGE)) {
+ NEXT();
+ switch (v->nexttype) {
+ case PLAIN:
+ case RANGE:
+ c[0] = v->nextvalue;
+ NEXT();
+ endc = element(v, c, c+1);
+ NOERR();
+ break;
+ case COLLEL:
+ startp = v->now;
+ endp = scanplain(v);
+ INSIST(startp < endp, REG_ECOLLATE);
+ NOERR();
+ endc = element(v, startp, endp);
+ NOERR();
+ break;
+ default:
+ ERR(REG_ERANGE);
+ return;
+ break;
+ }
+ } else {
+ endc = startc;
+ }
+
+ /*
+ * Ranges are unportable. Actually, standard C does guarantee that digits
+ * are contiguous, but making that an exception is just too complicated.
+ */
+
+ if (startc != endc) {
+ NOTE(REG_UUNPORT);
+ }
+ cv = range(v, startc, endc, (v->cflags&REG_ICASE));
+ NOERR();
+ dovec(v, cv, lp, rp);
+}
+
+/*
+ - scanplain - scan PLAIN contents of [. etc.
+ * Certain bits of trickery in lex.c know that this code does not try to look
+ * past the final bracket of the [. etc.
+ ^ static const chr *scanplain(struct vars *);
+ */
+static const chr * /* just after end of sequence */
+scanplain(
+ struct vars *v)
+{
+ const chr *endp;
+
+ assert(SEE(COLLEL) || SEE(ECLASS) || SEE(CCLASS));
+ NEXT();
+
+ endp = v->now;
+ while (SEE(PLAIN)) {
+ endp = v->now;
+ NEXT();
+ }
+
+ assert(SEE(END) || ISERR());
+ NEXT();
+
+ return endp;
+}
+
+/*
+ - onechr - fill in arcs for a plain character, and possible case complements
+ * This is mostly a shortcut for efficient handling of the common case.
+ ^ static void onechr(struct vars *, pchr, struct state *, struct state *);
+ */
+static void
+onechr(
+ struct vars *v,
+ pchr c,
+ struct state *lp,
+ struct state *rp)
+{
+ if (!(v->cflags&REG_ICASE)) {
+ newarc(v->nfa, PLAIN, subcolor(v->cm, c), lp, rp);
+ return;
+ }
+
+ /*
+ * Rats, need general case anyway...
+ */
+
+ dovec(v, allcases(v, c), lp, rp);
+}
+
+/*
+ - dovec - fill in arcs for each element of a cvec
+ ^ static void dovec(struct vars *, struct cvec *, struct state *,
+ ^ struct state *);
+ */
+static void
+dovec(
+ struct vars *v,
+ struct cvec *cv,
+ struct state *lp,
+ struct state *rp)
+{
+ chr ch, from, to;
+ const chr *p;
+ int i;
+
+ for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) {
+ ch = *p;
+ newarc(v->nfa, PLAIN, subcolor(v->cm, ch), lp, rp);
+ }
+
+ for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) {
+ from = *p;
+ to = *(p+1);
+ if (from <= to) {
+ subrange(v, from, to, lp, rp);
+ }
+ }
+
+}
+
+/*
+ - wordchrs - set up word-chr list for word-boundary stuff, if needed
+ * The list is kept as a bunch of arcs between two dummy states; it's disposed
+ * of by the unreachable-states sweep in NFA optimization. Does NEXT(). Must
+ * not be called from any unusual lexical context. This should be reconciled
+ * with the \w etc. handling in lex.c, and should be cleaned up to reduce
+ * dependencies on input scanning.
+ ^ static void wordchrs(struct vars *);
+ */
+static void
+wordchrs(
+ struct vars *v)
+{
+ struct state *left, *right;
+
+ if (v->wordchrs != NULL) {
+ NEXT(); /* for consistency */
+ return;
+ }
+
+ left = newstate(v->nfa);
+ right = newstate(v->nfa);
+ NOERR();
+
+ /*
+ * Fine point: implemented with [::], and lexer will set REG_ULOCALE.
+ */
+
+ lexword(v);
+ NEXT();
+ assert(v->savenow != NULL && SEE('['));
+ bracket(v, left, right);
+ assert((v->savenow != NULL && SEE(']')) || ISERR());
+ NEXT();
+ NOERR();
+ v->wordchrs = left;
+}
+
+/*
+ - subre - allocate a subre
+ ^ static struct subre *subre(struct vars *, int, int, struct state *,
+ ^ struct state *);
+ */
+static struct subre *
+subre(
+ struct vars *v,
+ int op,
+ int flags,
+ struct state *begin,
+ struct state *end)
+{
+ struct subre *ret = v->treefree;
+
+ if (ret != NULL) {
+ v->treefree = ret->left;
+ } else {
+ ret = (struct subre *) MALLOC(sizeof(struct subre));
+ if (ret == NULL) {
+ ERR(REG_ESPACE);
+ return NULL;
+ }
+ ret->chain = v->treechain;
+ v->treechain = ret;
+ }
+
+ assert(strchr("|.b(=", op) != NULL);
+
+ ret->op = op;
+ ret->flags = flags;
+ ret->retry = 0;
+ ret->subno = 0;
+ ret->min = ret->max = 1;
+ ret->left = NULL;
+ ret->right = NULL;
+ ret->begin = begin;
+ ret->end = end;
+ ZAPCNFA(ret->cnfa);
+
+ return ret;
+}
+
+/*
+ - freesubre - free a subRE subtree
+ ^ static void freesubre(struct vars *, struct subre *);
+ */
+static void
+freesubre(
+ struct vars *v, /* might be NULL */
+ struct subre *sr)
+{
+ if (sr == NULL) {
+ return;
+ }
+
+ if (sr->left != NULL) {
+ freesubre(v, sr->left);
+ }
+ if (sr->right != NULL) {
+ freesubre(v, sr->right);
+ }
+
+ freesrnode(v, sr);
+}
+
+/*
+ - freesrnode - free one node in a subRE subtree
+ ^ static void freesrnode(struct vars *, struct subre *);
+ */
+static void
+freesrnode(
+ struct vars *v, /* might be NULL */
+ struct subre *sr)
+{
+ if (sr == NULL) {
+ return;
+ }
+
+ if (!NULLCNFA(sr->cnfa)) {
+ freecnfa(&sr->cnfa);
+ }
+ sr->flags = 0;
+
+ if (v != NULL) {
+ sr->left = v->treefree;
+ v->treefree = sr;
+ } else {
+ FREE(sr);
+ }
+}
+
+/*
+ - optst - optimize a subRE subtree
+ ^ static void optst(struct vars *, struct subre *);
+ */
+static void
+optst(
+ struct vars *v,
+ struct subre *t)
+{
+ /*
+ * DGP (2007-11-13): I assume it was the programmer's intent to eventually
+ * come back and add code to optimize subRE trees, but the routine coded
+ * just spends effort traversing the tree and doing nothing. We can do
+ * nothing with less effort.
+ */
+
+ return;
+}
+
+/*
+ - numst - number tree nodes (assigning retry indexes)
+ ^ static int numst(struct subre *, int);
+ */
+static int /* next number */
+numst(
+ struct subre *t,
+ int start) /* starting point for subtree numbers */
+{
+ int i;
+
+ assert(t != NULL);
+
+ i = start;
+ t->retry = (short) i++;
+ if (t->left != NULL) {
+ i = numst(t->left, i);
+ }
+ if (t->right != NULL) {
+ i = numst(t->right, i);
+ }
+ return i;
+}
+
+/*
+ - markst - mark tree nodes as INUSE
+ ^ static void markst(struct subre *);
+ */
+static void
+markst(
+ struct subre *t)
+{
+ assert(t != NULL);
+
+ t->flags |= INUSE;
+ if (t->left != NULL) {
+ markst(t->left);
+ }
+ if (t->right != NULL) {
+ markst(t->right);
+ }
+}
+
+/*
+ - cleanst - free any tree nodes not marked INUSE
+ ^ static void cleanst(struct vars *);
+ */
+static void
+cleanst(
+ struct vars *v)
+{
+ struct subre *t;
+ struct subre *next;
+
+ for (t = v->treechain; t != NULL; t = next) {
+ next = t->chain;
+ if (!(t->flags&INUSE)) {
+ FREE(t);
+ }
+ }
+ v->treechain = NULL;
+ v->treefree = NULL; /* just on general principles */
+}
+
+/*
+ - nfatree - turn a subRE subtree into a tree of compacted NFAs
+ ^ static long nfatree(struct vars *, struct subre *, FILE *);
+ */
+static long /* optimize results from top node */
+nfatree(
+ struct vars *v,
+ struct subre *t,
+ FILE *f) /* for debug output */
+{
+ assert(t != NULL && t->begin != NULL);
+
+ if (t->left != NULL) {
+ (DISCARD) nfatree(v, t->left, f);
+ }
+ if (t->right != NULL) {
+ (DISCARD) nfatree(v, t->right, f);
+ }
+
+ return nfanode(v, t, f);
+}
+
+/*
+ - nfanode - do one NFA for nfatree
+ ^ static long nfanode(struct vars *, struct subre *, FILE *);
+ */
+static long /* optimize results */
+nfanode(
+ struct vars *v,
+ struct subre *t,
+ FILE *f) /* for debug output */
+{
+ struct nfa *nfa;
+ long ret = 0;
+ char idbuf[50];
+
+ assert(t->begin != NULL);
+
+ if (f != NULL) {
+ fprintf(f, "\n\n\n========= TREE NODE %s ==========\n",
+ stid(t, idbuf, sizeof(idbuf)));
+ }
+ nfa = newnfa(v, v->cm, v->nfa);
+ NOERRZ();
+ dupnfa(nfa, t->begin, t->end, nfa->init, nfa->final);
+ if (!ISERR()) {
+ specialcolors(nfa);
+ ret = optimize(nfa, f);
+ }
+ if (!ISERR()) {
+ compact(nfa, &t->cnfa);
+ }
+
+ freenfa(nfa);
+ return ret;
+}
+
+/*
+ - newlacon - allocate a lookahead-constraint subRE
+ ^ static int newlacon(struct vars *, struct state *, struct state *, int);
+ */
+static int /* lacon number */
+newlacon(
+ struct vars *v,
+ struct state *begin,
+ struct state *end,
+ int pos)
+{
+ struct subre *sub;
+ int n;
+
+ if (v->nlacons == 0) {
+ v->lacons = (struct subre *) MALLOC(2 * sizeof(struct subre));
+ n = 1; /* skip 0th */
+ v->nlacons = 2;
+ } else {
+ v->lacons = (struct subre *) REALLOC(v->lacons,
+ (v->nlacons+1)*sizeof(struct subre));
+ n = v->nlacons++;
+ }
+
+ if (v->lacons == NULL) {
+ ERR(REG_ESPACE);
+ return 0;
+ }
+
+ sub = &v->lacons[n];
+ sub->begin = begin;
+ sub->end = end;
+ sub->subno = pos;
+ ZAPCNFA(sub->cnfa);
+ return n;
+}
+
+/*
+ - freelacons - free lookahead-constraint subRE vector
+ ^ static void freelacons(struct subre *, int);
+ */
+static void
+freelacons(
+ struct subre *subs,
+ int n)
+{
+ struct subre *sub;
+ int i;
+
+ assert(n > 0);
+ for (sub=subs+1, i=n-1; i>0; sub++, i--) { /* no 0th */
+ if (!NULLCNFA(sub->cnfa)) {
+ freecnfa(&sub->cnfa);
+ }
+ }
+ FREE(subs);
+}
+
+/*
+ - rfree - free a whole RE (insides of regfree)
+ ^ static void rfree(regex_t *);
+ */
+static void
+rfree(
+ regex_t *re)
+{
+ struct guts *g;
+
+ if (re == NULL || re->re_magic != REMAGIC) {
+ return;
+ }
+
+ re->re_magic = 0; /* invalidate RE */
+ g = (struct guts *) re->re_guts;
+ re->re_guts = NULL;
+ re->re_fns = NULL;
+ g->magic = 0;
+ freecm(&g->cmap);
+ if (g->tree != NULL) {
+ freesubre(NULL, g->tree);
+ }
+ if (g->lacons != NULL) {
+ freelacons(g->lacons, g->nlacons);
+ }
+ if (!NULLCNFA(g->search)) {
+ freecnfa(&g->search);
+ }
+ FREE(g);
+}
+
+/*
+ - dump - dump an RE in human-readable form
+ ^ static void dump(regex_t *, FILE *);
+ */
+static void
+dump(
+ regex_t *re,
+ FILE *f)
+{
+#ifdef REG_DEBUG
+ struct guts *g;
+ int i;
+
+ if (re->re_magic != REMAGIC) {
+ fprintf(f, "bad magic number (0x%x not 0x%x)\n",
+ re->re_magic, REMAGIC);
+ }
+ if (re->re_guts == NULL) {
+ fprintf(f, "NULL guts!!!\n");
+ return;
+ }
+ g = (struct guts *) re->re_guts;
+ if (g->magic != GUTSMAGIC) {
+ fprintf(f, "bad guts magic number (0x%x not 0x%x)\n",
+ g->magic, GUTSMAGIC);
+ }
+
+ fprintf(f, "\n\n\n========= DUMP ==========\n");
+ fprintf(f, "nsub %d, info 0%lo, csize %d, ntree %d\n",
+ re->re_nsub, re->re_info, re->re_csize, g->ntree);
+
+ dumpcolors(&g->cmap, f);
+ if (!NULLCNFA(g->search)) {
+ printf("\nsearch:\n");
+ dumpcnfa(&g->search, f);
+ }
+ for (i = 1; i < g->nlacons; i++) {
+ fprintf(f, "\nla%d (%s):\n", i,
+ (g->lacons[i].subno) ? "positive" : "negative");
+ dumpcnfa(&g->lacons[i].cnfa, f);
+ }
+ fprintf(f, "\n");
+ dumpst(g->tree, f, 0);
+#endif
+}
+
+/*
+ - dumpst - dump a subRE tree
+ ^ static void dumpst(struct subre *, FILE *, int);
+ */
+static void
+dumpst(
+ struct subre *t,
+ FILE *f,
+ int nfapresent) /* is the original NFA still around? */
+{
+ if (t == NULL) {
+ fprintf(f, "null tree\n");
+ } else {
+ stdump(t, f, nfapresent);
+ }
+ fflush(f);
+}
+
+/*
+ - stdump - recursive guts of dumpst
+ ^ static void stdump(struct subre *, FILE *, int);
+ */
+static void
+stdump(
+ struct subre *t,
+ FILE *f,
+ int nfapresent) /* is the original NFA still around? */
+{
+ char idbuf[50];
+
+ fprintf(f, "%s. `%c'", stid(t, idbuf, sizeof(idbuf)), t->op);
+ if (t->flags&LONGER) {
+ fprintf(f, " longest");
+ }
+ if (t->flags&SHORTER) {
+ fprintf(f, " shortest");
+ }
+ if (t->flags&MIXED) {
+ fprintf(f, " hasmixed");
+ }
+ if (t->flags&CAP) {
+ fprintf(f, " hascapture");
+ }
+ if (t->flags&BACKR) {
+ fprintf(f, " hasbackref");
+ }
+ if (!(t->flags&INUSE)) {
+ fprintf(f, " UNUSED");
+ }
+ if (t->subno != 0) {
+ fprintf(f, " (#%d)", t->subno);
+ }
+ if (t->min != 1 || t->max != 1) {
+ fprintf(f, " {%d,", t->min);
+ if (t->max != INFINITY) {
+ fprintf(f, "%d", t->max);
+ }
+ fprintf(f, "}");
+ }
+ if (nfapresent) {
+ fprintf(f, " %ld-%ld", (long)t->begin->no, (long)t->end->no);
+ }
+ if (t->left != NULL) {
+ fprintf(f, " L:%s", stid(t->left, idbuf, sizeof(idbuf)));
+ }
+ if (t->right != NULL) {
+ fprintf(f, " R:%s", stid(t->right, idbuf, sizeof(idbuf)));
+ }
+ if (!NULLCNFA(t->cnfa)) {
+ fprintf(f, "\n");
+ dumpcnfa(&t->cnfa, f);
+ }
+ fprintf(f, "\n");
+ if (t->left != NULL) {
+ stdump(t->left, f, nfapresent);
+ }
+ if (t->right != NULL) {
+ stdump(t->right, f, nfapresent);
+ }
+}
+
+/*
+ - stid - identify a subtree node for dumping
+ ^ static char *stid(struct subre *, char *, size_t);
+ */
+static const char * /* points to buf or constant string */
+stid(
+ struct subre *t,
+ char *buf,
+ size_t bufsize)
+{
+ /*
+ * Big enough for hex int or decimal t->retry?
+ */
+
+ if (bufsize < sizeof(void*)*2 + 3 || bufsize < sizeof(t->retry)*3 + 1) {
+ return "unable";
+ }
+ if (t->retry != 0) {
+ sprintf(buf, "%d", t->retry);
+ } else {
+ sprintf(buf, "%p", t);
+ }
+ return buf;
+}
+
+#include "regc_lex.c"
+#include "regc_color.c"
+#include "regc_nfa.c"
+#include "regc_cvec.c"
+#include "regc_locale.c"
+
+/*
+ * Local Variables:
+ * mode: c
+ * c-basic-offset: 4
+ * fill-column: 78
+ * End:
+ */
diff --git a/contrib/hsrex/regcustom.h b/contrib/hsrex/regcustom.h
new file mode 100644
index 0000000..c341c23
--- /dev/null
+++ b/contrib/hsrex/regcustom.h
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms - with or without
+ * modification - are permitted for any purpose, provided that redistributions
+ * in source form retain this entire copyright notice and indicate the origin
+ * and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation of
+ * software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Headers if any.
+ */
+
+#ifdef REGEX_STANDALONE
+# include "regalone.h"
+#else
+# include "tclInt.h"
+#endif
+
+/*
+ * Overrides for regguts.h definitions, if any.
+ */
+
+#define FUNCPTR(name, args) (*name)args
+#ifndef REGEX_STANDALONE
+#define MALLOC(n) ckalloc(n)
+#define FREE(p) ckfree(VS(p))
+#define REALLOC(p,n) ckrealloc(VS(p),n)
+#endif
+
+/*
+ * Do not insert extras between the "begin" and "end" lines - this chunk is
+ * automatically extracted to be fitted into regex.h.
+ */
+
+/* --- begin --- */
+/* Ensure certain things don't sneak in from system headers. */
+#ifdef __REG_WIDE_T
+#undef __REG_WIDE_T
+#endif
+#ifdef __REG_WIDE_COMPILE
+#undef __REG_WIDE_COMPILE
+#endif
+#ifdef __REG_WIDE_EXEC
+#undef __REG_WIDE_EXEC
+#endif
+#ifdef __REG_REGOFF_T
+#undef __REG_REGOFF_T
+#endif
+#ifdef __REG_VOID_T
+#undef __REG_VOID_T
+#endif
+#ifdef __REG_CONST
+#undef __REG_CONST
+#endif
+#ifdef __REG_NOFRONT
+#undef __REG_NOFRONT
+#endif
+#ifdef __REG_NOCHAR
+#undef __REG_NOCHAR
+#endif
+/* Interface types */
+#define __REG_WIDE_T Tcl_UniChar
+#define __REG_REGOFF_T long /* Not really right, but good enough... */
+#define __REG_VOID_T void
+#define __REG_CONST const
+/* Names and declarations */
+#define __REG_WIDE_COMPILE TclReComp
+#define __REG_WIDE_EXEC TclReExec
+#define __REG_NOFRONT /* Don't want regcomp() and regexec() */
+#define __REG_NOCHAR /* Or the char versions */
+#define regfree TclReFree
+#define regerror TclReError
+/* --- end --- */
+
+/*
+ * Internal character type and related.
+ */
+
+#ifndef REGEX_STANDALONE
+typedef Tcl_UniChar chr; /* The type itself. */
+#endif
+typedef int pchr; /* What it promotes to. */
+typedef unsigned uchr; /* Unsigned type that will hold a chr. */
+typedef int celt; /* Type to hold chr, or NOCELT */
+#define NOCELT (-1) /* Celt value which is not valid chr */
+#define CHR(c) (UCHAR(c)) /* Turn char literal into chr literal */
+#define DIGITVAL(c) ((c)-'0') /* Turn chr digit into its value */
+#if TCL_UTF_MAX > 3
+#define CHRBITS 32 /* Bits in a chr; must not use sizeof */
+#define CHR_MIN 0x00000000 /* Smallest and largest chr; the value */
+#define CHR_MAX 0xffffffff /* CHR_MAX-CHR_MIN+1 should fit in uchr */
+#elif defined(REGEX_STANDALONE) && ! defined(REGEX_WCHAR)
+# define CHRBITS 8
+# define CHR_MIN 0x00
+# define CHR_MAX 0xff
+#else
+#define CHRBITS 16 /* Bits in a chr; must not use sizeof */
+#define CHR_MIN 0x0000 /* Smallest and largest chr; the value */
+#define CHR_MAX 0xffff /* CHR_MAX-CHR_MIN+1 should fit in uchr */
+#endif
+
+/*
+ * Functions operating on chr.
+ */
+
+#define iscalnum(x) Tcl_UniCharIsAlnum(x)
+#define iscalpha(x) Tcl_UniCharIsAlpha(x)
+#define iscdigit(x) Tcl_UniCharIsDigit(x)
+#define iscspace(x) Tcl_UniCharIsSpace(x)
+
+/*
+ * Name the external functions.
+ */
+
+#ifdef REGEX_STANDALONE
+# ifdef REGEX_WCHAR
+# define compile re_wcomp
+# define exec re_wexec
+# define __REG_NOCHAR
+# else
+# define compile re_comp
+# define exec re_exec
+# undef __REG_NOCHAR
+# endif
+#else
+#define compile TclReComp
+#define exec TclReExec
+#endif
+
+/*
+& Enable/disable debugging code (by whether REG_DEBUG is defined or not).
+*/
+
+#if 0 /* No debug unless requested by makefile. */
+#define REG_DEBUG /* */
+#endif
+
+
+#ifndef REGEX_STANDALONE
+/*
+ * Method of allocating a local workspace. We used a thread-specific data
+ * space to store this because the regular expression engine is never
+ * reentered from the same thread; it doesn't make any callbacks.
+ */
+#define AllocVars(vPtr) \
+ static Tcl_ThreadDataKey varsKey; \
+ register struct vars *vPtr = (struct vars *) \
+ Tcl_GetThreadData(&varsKey, sizeof(struct vars))
+#elif 0
+/*
+ * This strategy for allocating workspace is "more proper" in some sense, but
+ * quite a bit slower. Using TSD (as above) leads to code that is quite a bit
+ * faster in practice (measured!)
+ */
+#define AllocVars(vPtr) \
+ register struct vars *vPtr = (struct vars *) MALLOC(sizeof(struct vars))
+#define FreeVars(vPtr) \
+ FREE(vPtr)
+#endif
+
+/*
+ * And pick up the standard header.
+ */
+
+#include "regex.h"
diff --git a/contrib/hsrex/rege_dfa.c b/contrib/hsrex/rege_dfa.c
new file mode 100644
index 0000000..fbeae20
--- /dev/null
+++ b/contrib/hsrex/rege_dfa.c
@@ -0,0 +1,816 @@
+/*
+ * DFA routines
+ * This file is #included by regexec.c.
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation
+ * of software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+/*
+ - longest - longest-preferred matching engine
+ ^ static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *);
+ */
+static chr * /* endpoint, or NULL */
+longest(
+ struct vars *v, /* used only for debug and exec flags */
+ struct dfa *d,
+ chr *start, /* where the match should start */
+ chr *stop, /* match must end at or before here */
+ int *hitstopp) /* record whether hit v->stop, if non-NULL */
+{
+ chr *cp;
+ chr *realstop = (stop == v->stop) ? stop : stop + 1;
+ color co;
+ struct sset *css;
+ struct sset *ss;
+ chr *post;
+ int i;
+ struct colormap *cm = d->cm;
+
+ /*
+ * Initialize.
+ */
+
+ css = initialize(v, d, start);
+ cp = start;
+ if (hitstopp != NULL) {
+ *hitstopp = 0;
+ }
+
+ /*
+ * Startup.
+ */
+
+ FDEBUG(("+++ startup +++\n"));
+ if (cp == v->start) {
+ co = d->cnfa->bos[(v->eflags&REG_NOTBOL) ? 0 : 1];
+ FDEBUG(("color %ld\n", (long)co));
+ } else {
+ co = GETCOLOR(cm, *(cp - 1));
+ FDEBUG(("char %c, color %ld\n", (char)*(cp-1), (long)co));
+ }
+ css = miss(v, d, css, co, cp, start);
+ if (css == NULL) {
+ return NULL;
+ }
+ css->lastseen = cp;
+
+ /*
+ * Main loop.
+ */
+
+ if (v->eflags&REG_FTRACE) {
+ while (cp < realstop) {
+ FDEBUG(("+++ at c%d +++\n", css - d->ssets));
+ co = GETCOLOR(cm, *cp);
+ FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co));
+ ss = css->outs[co];
+ if (ss == NULL) {
+ ss = miss(v, d, css, co, cp+1, start);
+ if (ss == NULL) {
+ break; /* NOTE BREAK OUT */
+ }
+ }
+ cp++;
+ ss->lastseen = cp;
+ css = ss;
+ }
+ } else {
+ while (cp < realstop) {
+ co = GETCOLOR(cm, *cp);
+ ss = css->outs[co];
+ if (ss == NULL) {
+ ss = miss(v, d, css, co, cp+1, start);
+ if (ss == NULL) {
+ break; /* NOTE BREAK OUT */
+ }
+ }
+ cp++;
+ ss->lastseen = cp;
+ css = ss;
+ }
+ }
+
+ /*
+ * Shutdown.
+ */
+
+ FDEBUG(("+++ shutdown at c%d +++\n", css - d->ssets));
+ if (cp == v->stop && stop == v->stop) {
+ if (hitstopp != NULL) {
+ *hitstopp = 1;
+ }
+ co = d->cnfa->eos[(v->eflags&REG_NOTEOL) ? 0 : 1];
+ FDEBUG(("color %ld\n", (long)co));
+ ss = miss(v, d, css, co, cp, start);
+
+ /*
+ * Special case: match ended at eol?
+ */
+
+ if (ss != NULL && (ss->flags&POSTSTATE)) {
+ return cp;
+ } else if (ss != NULL) {
+ ss->lastseen = cp; /* to be tidy */
+ }
+ }
+
+ /*
+ * Find last match, if any.
+ */
+
+ post = d->lastpost;
+ for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--) {
+ if ((ss->flags&POSTSTATE) && (post != ss->lastseen) &&
+ (post == NULL || post < ss->lastseen)) {
+ post = ss->lastseen;
+ }
+ }
+ if (post != NULL) { /* found one */
+ return post - 1;
+ }
+
+ return NULL;
+}
+
+/*
+ - shortest - shortest-preferred matching engine
+ ^ static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *,
+ ^ chr **, int *);
+ */
+static chr * /* endpoint, or NULL */
+shortest(
+ struct vars *v,
+ struct dfa *d,
+ chr *start, /* where the match should start */
+ chr *min, /* match must end at or after here */
+ chr *max, /* match must end at or before here */
+ chr **coldp, /* store coldstart pointer here, if nonNULL */
+ int *hitstopp) /* record whether hit v->stop, if non-NULL */
+{
+ chr *cp;
+ chr *realmin = (min == v->stop) ? min : min + 1;
+ chr *realmax = (max == v->stop) ? max : max + 1;
+ color co;
+ struct sset *css;
+ struct sset *ss;
+ struct colormap *cm = d->cm;
+
+ /*
+ * Initialize.
+ */
+
+ css = initialize(v, d, start);
+ cp = start;
+ if (hitstopp != NULL) {
+ *hitstopp = 0;
+ }
+
+ /*
+ * Startup.
+ */
+
+ FDEBUG(("--- startup ---\n"));
+ if (cp == v->start) {
+ co = d->cnfa->bos[(v->eflags&REG_NOTBOL) ? 0 : 1];
+ FDEBUG(("color %ld\n", (long)co));
+ } else {
+ co = GETCOLOR(cm, *(cp - 1));
+ FDEBUG(("char %c, color %ld\n", (char)*(cp-1), (long)co));
+ }
+ css = miss(v, d, css, co, cp, start);
+ if (css == NULL) {
+ return NULL;
+ }
+ css->lastseen = cp;
+ ss = css;
+
+ /*
+ * Main loop.
+ */
+
+ if (v->eflags&REG_FTRACE) {
+ while (cp < realmax) {
+ FDEBUG(("--- at c%d ---\n", css - d->ssets));
+ co = GETCOLOR(cm, *cp);
+ FDEBUG(("char %c, color %ld\n", (char)*cp, (long)co));
+ ss = css->outs[co];
+ if (ss == NULL) {
+ ss = miss(v, d, css, co, cp+1, start);
+ if (ss == NULL) {
+ break; /* NOTE BREAK OUT */
+ }
+ }
+ cp++;
+ ss->lastseen = cp;
+ css = ss;
+ if ((ss->flags&POSTSTATE) && cp >= realmin) {
+ break; /* NOTE BREAK OUT */
+ }
+ }
+ } else {
+ while (cp < realmax) {
+ co = GETCOLOR(cm, *cp);
+ ss = css->outs[co];
+ if (ss == NULL) {
+ ss = miss(v, d, css, co, cp+1, start);
+ if (ss == NULL) {
+ break; /* NOTE BREAK OUT */
+ }
+ }
+ cp++;
+ ss->lastseen = cp;
+ css = ss;
+ if ((ss->flags&POSTSTATE) && cp >= realmin) {
+ break; /* NOTE BREAK OUT */
+ }
+ }
+ }
+
+ if (ss == NULL) {
+ return NULL;
+ }
+
+ if (coldp != NULL) { /* report last no-progress state set, if any */
+ *coldp = lastcold(v, d);
+ }
+
+ if ((ss->flags&POSTSTATE) && cp > min) {
+ assert(cp >= realmin);
+ cp--;
+ } else if (cp == v->stop && max == v->stop) {
+ co = d->cnfa->eos[(v->eflags&REG_NOTEOL) ? 0 : 1];
+ FDEBUG(("color %ld\n", (long)co));
+ ss = miss(v, d, css, co, cp, start);
+
+ /*
+ * Match might have ended at eol.
+ */
+
+ if ((ss == NULL || !(ss->flags&POSTSTATE)) && hitstopp != NULL) {
+ *hitstopp = 1;
+ }
+ }
+
+ if (ss == NULL || !(ss->flags&POSTSTATE)) {
+ return NULL;
+ }
+
+ return cp;
+}
+
+/*
+ - lastcold - determine last point at which no progress had been made
+ ^ static chr *lastcold(struct vars *, struct dfa *);
+ */
+static chr * /* endpoint, or NULL */
+lastcold(
+ struct vars *v,
+ struct dfa *d)
+{
+ struct sset *ss;
+ chr *nopr;
+ int i;
+
+ nopr = d->lastnopr;
+ if (nopr == NULL) {
+ nopr = v->start;
+ }
+ for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--) {
+ if ((ss->flags&NOPROGRESS) && nopr < ss->lastseen) {
+ nopr = ss->lastseen;
+ }
+ }
+ return nopr;
+}
+
+/*
+ - newdfa - set up a fresh DFA
+ ^ static struct dfa *newdfa(struct vars *, struct cnfa *,
+ ^ struct colormap *, struct smalldfa *);
+ */
+static struct dfa *
+newdfa(
+ struct vars *v,
+ struct cnfa *cnfa,
+ struct colormap *cm,
+ struct smalldfa *small) /* preallocated space, may be NULL */
+{
+ struct dfa *d;
+ size_t nss = cnfa->nstates * 2;
+ int wordsper = (cnfa->nstates + UBITS - 1) / UBITS;
+ struct smalldfa *smallwas = small;
+
+ assert(cnfa != NULL && cnfa->nstates != 0);
+
+ if (nss <= FEWSTATES && cnfa->ncolors <= FEWCOLORS) {
+ assert(wordsper == 1);
+ if (small == NULL) {
+ small = (struct smalldfa *) MALLOC(sizeof(struct smalldfa));
+ if (small == NULL) {
+ ERR(REG_ESPACE);
+ return NULL;
+ }
+ }
+ d = &small->dfa;
+ d->ssets = small->ssets;
+ d->statesarea = small->statesarea;
+ d->work = &d->statesarea[nss];
+ d->outsarea = small->outsarea;
+ d->incarea = small->incarea;
+ d->cptsmalloced = 0;
+ d->mallocarea = (smallwas == NULL) ? (char *)small : NULL;
+ } else {
+ d = (struct dfa *)MALLOC(sizeof(struct dfa));
+ if (d == NULL) {
+ ERR(REG_ESPACE);
+ return NULL;
+ }
+ d->ssets = (struct sset *)MALLOC(nss * sizeof(struct sset));
+ d->statesarea = (unsigned *)
+ MALLOC((nss+WORK) * wordsper * sizeof(unsigned));
+ d->work = &d->statesarea[nss * wordsper];
+ d->outsarea = (struct sset **)
+ MALLOC(nss * cnfa->ncolors * sizeof(struct sset *));
+ d->incarea = (struct arcp *)
+ MALLOC(nss * cnfa->ncolors * sizeof(struct arcp));
+ d->cptsmalloced = 1;
+ d->mallocarea = (char *)d;
+ if (d->ssets == NULL || d->statesarea == NULL ||
+ d->outsarea == NULL || d->incarea == NULL) {
+ freedfa(d);
+ ERR(REG_ESPACE);
+ return NULL;
+ }
+ }
+
+ d->nssets = (v->eflags&REG_SMALL) ? 7 : nss;
+ d->nssused = 0;
+ d->nstates = cnfa->nstates;
+ d->ncolors = cnfa->ncolors;
+ d->wordsper = wordsper;
+ d->cnfa = cnfa;
+ d->cm = cm;
+ d->lastpost = NULL;
+ d->lastnopr = NULL;
+ d->search = d->ssets;
+
+ /*
+ * Initialization of sset fields is done as needed.
+ */
+
+ return d;
+}
+
+/*
+ - freedfa - free a DFA
+ ^ static void freedfa(struct dfa *);
+ */
+static void
+freedfa(
+ struct dfa *d)
+{
+ if (d->cptsmalloced) {
+ if (d->ssets != NULL) {
+ FREE(d->ssets);
+ }
+ if (d->statesarea != NULL) {
+ FREE(d->statesarea);
+ }
+ if (d->outsarea != NULL) {
+ FREE(d->outsarea);
+ }
+ if (d->incarea != NULL) {
+ FREE(d->incarea);
+ }
+ }
+
+ if (d->mallocarea != NULL) {
+ FREE(d->mallocarea);
+ }
+}
+
+/*
+ - hash - construct a hash code for a bitvector
+ * There are probably better ways, but they're more expensive.
+ ^ static unsigned hash(unsigned *, int);
+ */
+static unsigned
+hash(
+ unsigned *uv,
+ int n)
+{
+ int i;
+ unsigned h;
+
+ h = 0;
+ for (i = 0; i < n; i++) {
+ h ^= uv[i];
+ }
+ return h;
+}
+
+/*
+ - initialize - hand-craft a cache entry for startup, otherwise get ready
+ ^ static struct sset *initialize(struct vars *, struct dfa *, chr *);
+ */
+static struct sset *
+initialize(
+ struct vars *v, /* used only for debug flags */
+ struct dfa *d,
+ chr *start)
+{
+ struct sset *ss;
+ int i;
+
+ /*
+ * Is previous one still there?
+ */
+
+ if (d->nssused > 0 && (d->ssets[0].flags&STARTER)) {
+ ss = &d->ssets[0];
+ } else { /* no, must (re)build it */
+ ss = getvacant(v, d, start, start);
+ for (i = 0; i < d->wordsper; i++) {
+ ss->states[i] = 0;
+ }
+ BSET(ss->states, d->cnfa->pre);
+ ss->hash = HASH(ss->states, d->wordsper);
+ assert(d->cnfa->pre != d->cnfa->post);
+ ss->flags = STARTER|LOCKED|NOPROGRESS;
+
+ /*
+ * lastseen dealt with below
+ */
+ }
+
+ for (i = 0; i < d->nssused; i++) {
+ d->ssets[i].lastseen = NULL;
+ }
+ ss->lastseen = start; /* maybe untrue, but harmless */
+ d->lastpost = NULL;
+ d->lastnopr = NULL;
+ return ss;
+}
+
+/*
+ - miss - handle a cache miss
+ ^ static struct sset *miss(struct vars *, struct dfa *, struct sset *,
+ ^ pcolor, chr *, chr *);
+ */
+static struct sset * /* NULL if goes to empty set */
+miss(
+ struct vars *v, /* used only for debug flags */
+ struct dfa *d,
+ struct sset *css,
+ pcolor co,
+ chr *cp, /* next chr */
+ chr *start) /* where the attempt got started */
+{
+ struct cnfa *cnfa = d->cnfa;
+ int i;
+ unsigned h;
+ struct carc *ca;
+ struct sset *p;
+ int ispost;
+ int noprogress;
+ int gotstate;
+ int dolacons;
+ int sawlacons;
+
+ /*
+ * For convenience, we can be called even if it might not be a miss.
+ */
+
+ if (css->outs[co] != NULL) {
+ FDEBUG(("hit\n"));
+ return css->outs[co];
+ }
+ FDEBUG(("miss\n"));
+
+ /*
+ * First, what set of states would we end up in?
+ */
+
+ for (i = 0; i < d->wordsper; i++) {
+ d->work[i] = 0;
+ }
+ ispost = 0;
+ noprogress = 1;
+ gotstate = 0;
+ for (i = 0; i < d->nstates; i++) {
+ if (ISBSET(css->states, i)) {
+ for (ca = cnfa->states[i]+1; ca->co != COLORLESS; ca++) {
+ if (ca->co == co) {
+ BSET(d->work, ca->to);
+ gotstate = 1;
+ if (ca->to == cnfa->post) {
+ ispost = 1;
+ }
+ if (!cnfa->states[ca->to]->co) {
+ noprogress = 0;
+ }
+ FDEBUG(("%d -> %d\n", i, ca->to));
+ }
+ }
+ }
+ }
+ dolacons = (gotstate) ? (cnfa->flags&HASLACONS) : 0;
+ sawlacons = 0;
+ while (dolacons) { /* transitive closure */
+ dolacons = 0;
+ for (i = 0; i < d->nstates; i++) {
+ if (ISBSET(d->work, i)) {
+ for (ca = cnfa->states[i]+1; ca->co != COLORLESS; ca++) {
+ if (ca->co <= cnfa->ncolors) {
+ continue; /* NOTE CONTINUE */
+ }
+ sawlacons = 1;
+ if (ISBSET(d->work, ca->to)) {
+ continue; /* NOTE CONTINUE */
+ }
+ if (!lacon(v, cnfa, cp, ca->co)) {
+ continue; /* NOTE CONTINUE */
+ }
+ BSET(d->work, ca->to);
+ dolacons = 1;
+ if (ca->to == cnfa->post) {
+ ispost = 1;
+ }
+ if (!cnfa->states[ca->to]->co) {
+ noprogress = 0;
+ }
+ FDEBUG(("%d :> %d\n", i, ca->to));
+ }
+ }
+ }
+ }
+ if (!gotstate) {
+ return NULL;
+ }
+ h = HASH(d->work, d->wordsper);
+
+ /*
+ * Next, is that in the cache?
+ */
+
+ for (p = d->ssets, i = d->nssused; i > 0; p++, i--) {
+ if (HIT(h, d->work, p, d->wordsper)) {
+ FDEBUG(("cached c%d\n", p - d->ssets));
+ break; /* NOTE BREAK OUT */
+ }
+ }
+ if (i == 0) { /* nope, need a new cache entry */
+ p = getvacant(v, d, cp, start);
+ assert(p != css);
+ for (i = 0; i < d->wordsper; i++) {
+ p->states[i] = d->work[i];
+ }
+ p->hash = h;
+ p->flags = (ispost) ? POSTSTATE : 0;
+ if (noprogress) {
+ p->flags |= NOPROGRESS;
+ }
+
+ /*
+ * lastseen to be dealt with by caller
+ */
+ }
+
+ if (!sawlacons) { /* lookahead conds. always cache miss */
+ FDEBUG(("c%d[%d]->c%d\n", css - d->ssets, co, p - d->ssets));
+ css->outs[co] = p;
+ css->inchain[co] = p->ins;
+ p->ins.ss = css;
+ p->ins.co = (color)co;
+ }
+ return p;
+}
+
+/*
+ - lacon - lookahead-constraint checker for miss()
+ ^ static int lacon(struct vars *, struct cnfa *, chr *, pcolor);
+ */
+static int /* predicate: constraint satisfied? */
+lacon(
+ struct vars *v,
+ struct cnfa *pcnfa, /* parent cnfa */
+ chr *cp,
+ pcolor co) /* "color" of the lookahead constraint */
+{
+ int n;
+ struct subre *sub;
+ struct dfa *d;
+ struct smalldfa sd;
+ chr *end;
+
+ n = co - pcnfa->ncolors;
+ assert(n < v->g->nlacons && v->g->lacons != NULL);
+ FDEBUG(("=== testing lacon %d\n", n));
+ sub = &v->g->lacons[n];
+ d = newdfa(v, &sub->cnfa, &v->g->cmap, &sd);
+ if (d == NULL) {
+ ERR(REG_ESPACE);
+ return 0;
+ }
+ end = longest(v, d, cp, v->stop, (int *)NULL);
+ freedfa(d);
+ FDEBUG(("=== lacon %d match %d\n", n, (end != NULL)));
+ return (sub->subno) ? (end != NULL) : (end == NULL);
+}
+
+/*
+ - getvacant - get a vacant state set
+ * This routine clears out the inarcs and outarcs, but does not otherwise
+ * clear the innards of the state set -- that's up to the caller.
+ ^ static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *);
+ */
+static struct sset *
+getvacant(
+ struct vars *v, /* used only for debug flags */
+ struct dfa *d,
+ chr *cp,
+ chr *start)
+{
+ int i;
+ struct sset *ss;
+ struct sset *p;
+ struct arcp ap;
+ struct arcp lastap = {NULL, 0}; /* silence gcc 4 warning */
+ color co;
+
+ ss = pickss(v, d, cp, start);
+ assert(!(ss->flags&LOCKED));
+
+ /*
+ * Clear out its inarcs, including self-referential ones.
+ */
+
+ ap = ss->ins;
+ while ((p = ap.ss) != NULL) {
+ co = ap.co;
+ FDEBUG(("zapping c%d's %ld outarc\n", p - d->ssets, (long)co));
+ p->outs[co] = NULL;
+ ap = p->inchain[co];
+ p->inchain[co].ss = NULL; /* paranoia */
+ }
+ ss->ins.ss = NULL;
+
+ /*
+ * Take it off the inarc chains of the ssets reached by its outarcs.
+ */
+
+ for (i = 0; i < d->ncolors; i++) {
+ p = ss->outs[i];
+ assert(p != ss); /* not self-referential */
+ if (p == NULL) {
+ continue; /* NOTE CONTINUE */
+ }
+ FDEBUG(("del outarc %d from c%d's in chn\n", i, p - d->ssets));
+ if (p->ins.ss == ss && p->ins.co == i) {
+ p->ins = ss->inchain[i];
+ } else {
+ assert(p->ins.ss != NULL);
+ for (ap = p->ins; ap.ss != NULL &&
+ !(ap.ss == ss && ap.co == i);
+ ap = ap.ss->inchain[ap.co]) {
+ lastap = ap;
+ }
+ assert(ap.ss != NULL);
+ lastap.ss->inchain[lastap.co] = ss->inchain[i];
+ }
+ ss->outs[i] = NULL;
+ ss->inchain[i].ss = NULL;
+ }
+
+ /*
+ * If ss was a success state, may need to remember location.
+ */
+
+ if ((ss->flags&POSTSTATE) && ss->lastseen != d->lastpost &&
+ (d->lastpost == NULL || d->lastpost < ss->lastseen)) {
+ d->lastpost = ss->lastseen;
+ }
+
+ /*
+ * Likewise for a no-progress state.
+ */
+
+ if ((ss->flags&NOPROGRESS) && ss->lastseen != d->lastnopr &&
+ (d->lastnopr == NULL || d->lastnopr < ss->lastseen)) {
+ d->lastnopr = ss->lastseen;
+ }
+
+ return ss;
+}
+
+/*
+ - pickss - pick the next stateset to be used
+ ^ static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *);
+ */
+static struct sset *
+pickss(
+ struct vars *v, /* used only for debug flags */
+ struct dfa *d,
+ chr *cp,
+ chr *start)
+{
+ int i;
+ struct sset *ss;
+ struct sset *end;
+ chr *ancient;
+
+ /*
+ * Shortcut for cases where cache isn't full.
+ */
+
+ if (d->nssused < d->nssets) {
+ i = d->nssused;
+ d->nssused++;
+ ss = &d->ssets[i];
+ FDEBUG(("new c%d\n", i));
+
+ /*
+ * Set up innards.
+ */
+
+ ss->states = &d->statesarea[i * d->wordsper];
+ ss->flags = 0;
+ ss->ins.ss = NULL;
+ ss->ins.co = WHITE; /* give it some value */
+ ss->outs = &d->outsarea[i * d->ncolors];
+ ss->inchain = &d->incarea[i * d->ncolors];
+ for (i = 0; i < d->ncolors; i++) {
+ ss->outs[i] = NULL;
+ ss->inchain[i].ss = NULL;
+ }
+ return ss;
+ }
+
+ /*
+ * Look for oldest, or old enough anyway.
+ */
+
+ if (cp - start > d->nssets*2/3) { /* oldest 33% are expendable */
+ ancient = cp - d->nssets*2/3;
+ } else {
+ ancient = start;
+ }
+ for (ss = d->search, end = &d->ssets[d->nssets]; ss < end; ss++) {
+ if ((ss->lastseen == NULL || ss->lastseen < ancient)
+ && !(ss->flags&LOCKED)) {
+ d->search = ss + 1;
+ FDEBUG(("replacing c%d\n", ss - d->ssets));
+ return ss;
+ }
+ }
+ for (ss = d->ssets, end = d->search; ss < end; ss++) {
+ if ((ss->lastseen == NULL || ss->lastseen < ancient)
+ && !(ss->flags&LOCKED)) {
+ d->search = ss + 1;
+ FDEBUG(("replacing c%d\n", ss - d->ssets));
+ return ss;
+ }
+ }
+
+ /*
+ * Nobody's old enough?!? -- something's really wrong.
+ */
+
+ FDEBUG(("can't find victim to replace!\n"));
+ assert(NOTREACHED);
+ ERR(REG_ASSERT);
+ return d->ssets;
+}
+
+/*
+ * Local Variables:
+ * mode: c
+ * c-basic-offset: 4
+ * fill-column: 78
+ * End:
+ */
diff --git a/contrib/hsrex/regerror.c b/contrib/hsrex/regerror.c
new file mode 100644
index 0000000..49b6f3e
--- /dev/null
+++ b/contrib/hsrex/regerror.c
@@ -0,0 +1,129 @@
+/*
+ * regerror - error-code expansion
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation of
+ * software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "regguts.h"
+
+/*
+ * Unknown-error explanation.
+ */
+
+static char unk[] = "*** unknown regex error code 0x%x ***";
+
+/*
+ * Struct to map among codes, code names, and explanations.
+ */
+
+static struct rerr {
+ int code;
+ const char *name;
+ const char *explain;
+} rerrs[] = {
+ /* The actual table is built from regex.h */
+#include "regerrs.h"
+ { -1, "", "oops" }, /* explanation special-cased in code */
+};
+
+/*
+ - regerror - the interface to error numbers
+ */
+/* ARGSUSED */
+size_t /* Actual space needed (including NUL) */
+regerror(
+ int code, /* Error code, or REG_ATOI or REG_ITOA */
+ const regex_t *preg, /* Associated regex_t (unused at present) */
+ char *errbuf, /* Result buffer (unless errbuf_size==0) */
+ size_t errbuf_size) /* Available space in errbuf, can be 0 */
+{
+ struct rerr *r;
+ const char *msg;
+ char convbuf[sizeof(unk)+50]; /* 50 = plenty for int */
+ size_t len;
+ int icode;
+
+ switch (code) {
+ case REG_ATOI: /* Convert name to number */
+ for (r = rerrs; r->code >= 0; r++) {
+ if (strcmp(r->name, errbuf) == 0) {
+ break;
+ }
+ }
+ sprintf(convbuf, "%d", r->code); /* -1 for unknown */
+ msg = convbuf;
+ break;
+ case REG_ITOA: /* Convert number to name */
+ icode = atoi(errbuf); /* Not our problem if this fails */
+ for (r = rerrs; r->code >= 0; r++) {
+ if (r->code == icode) {
+ break;
+ }
+ }
+ if (r->code >= 0) {
+ msg = r->name;
+ } else { /* Unknown; tell him the number */
+ sprintf(convbuf, "REG_%u", (unsigned)icode);
+ msg = convbuf;
+ }
+ break;
+ default: /* A real, normal error code */
+ for (r = rerrs; r->code >= 0; r++) {
+ if (r->code == code) {
+ break;
+ }
+ }
+ if (r->code >= 0) {
+ msg = r->explain;
+ } else { /* Unknown; say so */
+ sprintf(convbuf, unk, code);
+ msg = convbuf;
+ }
+ break;
+ }
+
+ len = strlen(msg) + 1; /* Space needed, including NUL */
+ if (errbuf_size > 0) {
+ if (errbuf_size > len) {
+ strcpy(errbuf, msg);
+ } else { /* Truncate to fit */
+ strncpy(errbuf, msg, errbuf_size-1);
+ errbuf[errbuf_size-1] = '\0';
+ }
+ }
+
+ return len;
+}
+
+/*
+ * Local Variables:
+ * mode: c
+ * c-basic-offset: 4
+ * fill-column: 78
+ * End:
+ */
diff --git a/contrib/hsrex/regerrs.h b/contrib/hsrex/regerrs.h
new file mode 100644
index 0000000..259c0cb
--- /dev/null
+++ b/contrib/hsrex/regerrs.h
@@ -0,0 +1,19 @@
+{ REG_OKAY, "REG_OKAY", "no errors detected" },
+{ REG_NOMATCH, "REG_NOMATCH", "failed to match" },
+{ REG_BADPAT, "REG_BADPAT", "invalid regexp (reg version 0.8)" },
+{ REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element" },
+{ REG_ECTYPE, "REG_ECTYPE", "invalid character class" },
+{ REG_EESCAPE, "REG_EESCAPE", "invalid escape \\ sequence" },
+{ REG_ESUBREG, "REG_ESUBREG", "invalid backreference number" },
+{ REG_EBRACK, "REG_EBRACK", "brackets [] not balanced" },
+{ REG_EPAREN, "REG_EPAREN", "parentheses () not balanced" },
+{ REG_EBRACE, "REG_EBRACE", "braces {} not balanced" },
+{ REG_BADBR, "REG_BADBR", "invalid repetition count(s)" },
+{ REG_ERANGE, "REG_ERANGE", "invalid character range" },
+{ REG_ESPACE, "REG_ESPACE", "out of memory" },
+{ REG_BADRPT, "REG_BADRPT", "quantifier operand invalid" },
+{ REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug" },
+{ REG_INVARG, "REG_INVARG", "invalid argument to regex function" },
+{ REG_MIXED, "REG_MIXED", "character widths of regex and string differ" },
+{ REG_BADOPT, "REG_BADOPT", "invalid embedded option" },
+{ REG_ETOOBIG, "REG_ETOOBIG", "nfa has too many states" },
diff --git a/contrib/hsrex/regex.h b/contrib/hsrex/regex.h
new file mode 100644
index 0000000..2ef538a
--- /dev/null
+++ b/contrib/hsrex/regex.h
@@ -0,0 +1,336 @@
+#ifndef _REGEX_H_
+#define _REGEX_H_ /* never again */
+/*
+ * regular expressions
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation of
+ * software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * Prototypes etc. marked with "^" within comments get gathered up (and
+ * possibly edited) by the regfwd program and inserted near the bottom of this
+ * file.
+ *
+ * We offer the option of declaring one wide-character version of the RE
+ * functions as well as the char versions. To do that, define __REG_WIDE_T to
+ * the type of wide characters (unfortunately, there is no consensus that
+ * wchar_t is suitable) and __REG_WIDE_COMPILE and __REG_WIDE_EXEC to the
+ * names to be used for the compile and execute functions (suggestion:
+ * re_Xcomp and re_Xexec, where X is a letter suggestive of the wide type,
+ * e.g. re_ucomp and re_uexec for Unicode). For cranky old compilers, it may
+ * be necessary to do something like:
+ * #define __REG_WIDE_COMPILE(a,b,c,d) re_Xcomp(a,b,c,d)
+ * #define __REG_WIDE_EXEC(a,b,c,d,e,f,g) re_Xexec(a,b,c,d,e,f,g)
+ * rather than just #defining the names as parameterless macros.
+ *
+ * For some specialized purposes, it may be desirable to suppress the
+ * declarations of the "front end" functions, regcomp() and regexec(), or of
+ * the char versions of the compile and execute functions. To suppress the
+ * front-end functions, define __REG_NOFRONT. To suppress the char versions,
+ * define __REG_NOCHAR.
+ *
+ * The right place to do those defines (and some others you may want, see
+ * below) would be <sys/types.h>. If you don't have control of that file, the
+ * right place to add your own defines to this file is marked below. This is
+ * normally done automatically, by the makefile and regmkhdr, based on the
+ * contents of regcustom.h.
+ */
+
+/*
+ * voodoo for C++
+ */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Add your own defines, if needed, here.
+ */
+
+/*
+ * Location where a chunk of regcustom.h is automatically spliced into this
+ * file (working from its prototype, regproto.h).
+ */
+
+/* --- begin --- */
+/* ensure certain things don't sneak in from system headers */
+#ifdef __REG_WIDE_T
+#undef __REG_WIDE_T
+#endif
+#ifdef __REG_WIDE_COMPILE
+#undef __REG_WIDE_COMPILE
+#endif
+#ifdef __REG_WIDE_EXEC
+#undef __REG_WIDE_EXEC
+#endif
+#ifdef __REG_REGOFF_T
+#undef __REG_REGOFF_T
+#endif
+#ifdef __REG_VOID_T
+#undef __REG_VOID_T
+#endif
+#ifdef __REG_CONST
+#undef __REG_CONST
+#endif
+#ifdef __REG_NOFRONT
+#undef __REG_NOFRONT
+#endif
+#ifdef __REG_NOCHAR
+#undef __REG_NOCHAR
+#endif
+/* interface types */
+#define __REG_WIDE_T Tcl_UniChar
+#define __REG_REGOFF_T long /* not really right, but good enough... */
+#define __REG_VOID_T void
+#define __REG_CONST const
+/* names and declarations */
+#define __REG_WIDE_COMPILE TclReComp
+#define __REG_WIDE_EXEC TclReExec
+#define __REG_NOFRONT /* don't want regcomp() and regexec() */
+#define __REG_NOCHAR /* or the char versions */
+#define regfree TclReFree
+#define regerror TclReError
+/* --- end --- */
+#ifdef REGEX_STANDALONE
+# undef regfree
+# undef regerror
+# define regfree re_free
+# define regerror re_error
+# undef __REG_WIDE_T
+# define __REG_WIDE_T wchar_t
+# undef __REG_WIDE_COMPILE
+# define __REG_WIDE_COMPILE re_wcomp
+# undef __REG_WIDE_EXEC
+# define __REG_WIDE_EXEC re_wexec
+# ifndef REGEX_WCHAR
+# undef __REG_NOCHAR
+# endif
+#endif
+
+/*
+ * interface types etc.
+ */
+
+/*
+ * regoff_t has to be large enough to hold either off_t or ssize_t, and must
+ * be signed; it's only a guess that long is suitable, so we offer
+ * <sys/types.h> an override.
+ */
+#ifdef __REG_REGOFF_T
+typedef __REG_REGOFF_T regoff_t;
+#else
+typedef long regoff_t;
+#endif
+
+/*
+ * For benefit of old compilers, we offer <sys/types.h> the option of
+ * overriding the `void' type used to declare nonexistent return types.
+ */
+#ifdef __REG_VOID_T
+typedef __REG_VOID_T re_void;
+#else
+typedef void re_void;
+#endif
+
+/*
+ * Also for benefit of old compilers, <sys/types.h> can supply a macro which
+ * expands to a substitute for `const'.
+ */
+#ifndef __REG_CONST
+#define __REG_CONST const
+#endif
+
+
+
+/*
+ * other interface types
+ */
+
+/* the biggie, a compiled RE (or rather, a front end to same) */
+typedef struct {
+ int re_magic; /* magic number */
+ size_t re_nsub; /* number of subexpressions */
+ long re_info; /* information about RE */
+#define REG_UBACKREF 000001
+#define REG_ULOOKAHEAD 000002
+#define REG_UBOUNDS 000004
+#define REG_UBRACES 000010
+#define REG_UBSALNUM 000020
+#define REG_UPBOTCH 000040
+#define REG_UBBS 000100
+#define REG_UNONPOSIX 000200
+#define REG_UUNSPEC 000400
+#define REG_UUNPORT 001000
+#define REG_ULOCALE 002000
+#define REG_UEMPTYMATCH 004000
+#define REG_UIMPOSSIBLE 010000
+#define REG_USHORTEST 020000
+ int re_csize; /* sizeof(character) */
+ char *re_endp; /* backward compatibility kludge */
+ /* the rest is opaque pointers to hidden innards */
+ char *re_guts; /* `char *' is more portable than `void *' */
+ char *re_fns;
+} regex_t;
+
+/* result reporting (may acquire more fields later) */
+typedef struct {
+ regoff_t rm_so; /* start of substring */
+ regoff_t rm_eo; /* end of substring */
+} regmatch_t;
+
+/* supplementary control and reporting */
+typedef struct {
+ regmatch_t rm_extend; /* see REG_EXPECT */
+} rm_detail_t;
+
+/*
+ * compilation
+ ^ #ifndef __REG_NOCHAR
+ ^ int re_comp(regex_t *, __REG_CONST char *, size_t, int);
+ ^ #endif
+ ^ #ifndef __REG_NOFRONT
+ ^ int regcomp(regex_t *, __REG_CONST char *, int);
+ ^ #endif
+ ^ #ifdef __REG_WIDE_T
+ ^ int __REG_WIDE_COMPILE(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int);
+ ^ #endif
+ */
+#define REG_BASIC 000000 /* BREs (convenience) */
+#define REG_EXTENDED 000001 /* EREs */
+#define REG_ADVF 000002 /* advanced features in EREs */
+#define REG_ADVANCED 000003 /* AREs (which are also EREs) */
+#define REG_QUOTE 000004 /* no special characters, none */
+#define REG_NOSPEC REG_QUOTE /* historical synonym */
+#define REG_ICASE 000010 /* ignore case */
+#define REG_NOSUB 000020 /* don't care about subexpressions */
+#define REG_EXPANDED 000040 /* expanded format, white space & comments */
+#define REG_NLSTOP 000100 /* \n doesn't match . or [^ ] */
+#define REG_NLANCH 000200 /* ^ matches after \n, $ before */
+#define REG_NEWLINE 000300 /* newlines are line terminators */
+#define REG_PEND 000400 /* ugh -- backward-compatibility hack */
+#define REG_EXPECT 001000 /* report details on partial/limited matches */
+#define REG_BOSONLY 002000 /* temporary kludge for BOS-only matches */
+#define REG_DUMP 004000 /* none of your business :-) */
+#define REG_FAKE 010000 /* none of your business :-) */
+#define REG_PROGRESS 020000 /* none of your business :-) */
+
+/*
+ * execution
+ ^ #ifndef __REG_NOCHAR
+ ^ int re_exec(regex_t *, __REG_CONST char *, size_t,
+ ^ rm_detail_t *, size_t, regmatch_t [], int);
+ ^ #endif
+ ^ #ifndef __REG_NOFRONT
+ ^ int regexec(regex_t *, __REG_CONST char *, size_t, regmatch_t [], int);
+ ^ #endif
+ ^ #ifdef __REG_WIDE_T
+ ^ int __REG_WIDE_EXEC(regex_t *, __REG_CONST __REG_WIDE_T *, size_t,
+ ^ rm_detail_t *, size_t, regmatch_t [], int);
+ ^ #endif
+ */
+#define REG_NOTBOL 0001 /* BOS is not BOL */
+#define REG_NOTEOL 0002 /* EOS is not EOL */
+#define REG_STARTEND 0004 /* backward compatibility kludge */
+#define REG_FTRACE 0010 /* none of your business */
+#define REG_MTRACE 0020 /* none of your business */
+#define REG_SMALL 0040 /* none of your business */
+
+/*
+ * misc generics (may be more functions here eventually)
+ ^ re_void regfree(regex_t *);
+ */
+
+/*
+ * error reporting
+ * Be careful if modifying the list of error codes -- the table used by
+ * regerror() is generated automatically from this file!
+ *
+ * Note that there is no wide-char variant of regerror at this time; what kind
+ * of character is used for error reports is independent of what kind is used
+ * in matching.
+ *
+ ^ extern size_t regerror(int, __REG_CONST regex_t *, char *, size_t);
+ */
+#define REG_OKAY 0 /* no errors detected */
+#define REG_NOMATCH 1 /* failed to match */
+#define REG_BADPAT 2 /* invalid regexp */
+#define REG_ECOLLATE 3 /* invalid collating element */
+#define REG_ECTYPE 4 /* invalid character class */
+#define REG_EESCAPE 5 /* invalid escape \ sequence */
+#define REG_ESUBREG 6 /* invalid backreference number */
+#define REG_EBRACK 7 /* brackets [] not balanced */
+#define REG_EPAREN 8 /* parentheses () not balanced */
+#define REG_EBRACE 9 /* braces {} not balanced */
+#define REG_BADBR 10 /* invalid repetition count(s) */
+#define REG_ERANGE 11 /* invalid character range */
+#define REG_ESPACE 12 /* out of memory */
+#define REG_BADRPT 13 /* quantifier operand invalid */
+#define REG_ASSERT 15 /* "can't happen" -- you found a bug */
+#define REG_INVARG 16 /* invalid argument to regex function */
+#define REG_MIXED 17 /* character widths of regex and string differ */
+#define REG_BADOPT 18 /* invalid embedded option */
+#define REG_ETOOBIG 19 /* nfa has too many states */
+/* two specials for debugging and testing */
+#define REG_ATOI 101 /* convert error-code name to number */
+#define REG_ITOA 102 /* convert error-code number to name */
+
+/*
+ * the prototypes, as possibly munched by regfwd
+ */
+/* =====^!^===== begin forwards =====^!^===== */
+/* automatically gathered by fwd; do not hand-edit */
+/* === regproto.h === */
+#ifndef __REG_NOCHAR
+int re_comp(regex_t *, __REG_CONST unsigned char *, size_t, int);
+#endif
+#ifndef __REG_NOFRONT
+int regcomp(regex_t *, __REG_CONST char *, int);
+#endif
+#ifdef __REG_WIDE_T
+MODULE_SCOPE int __REG_WIDE_COMPILE(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, int);
+#endif
+#ifndef __REG_NOCHAR
+int re_exec(regex_t *, __REG_CONST unsigned char *, size_t, rm_detail_t *, size_t, regmatch_t [], int);
+#endif
+#ifndef __REG_NOFRONT
+int regexec(regex_t *, __REG_CONST char *, size_t, regmatch_t [], int);
+#endif
+#ifdef __REG_WIDE_T
+MODULE_SCOPE int __REG_WIDE_EXEC(regex_t *, __REG_CONST __REG_WIDE_T *, size_t, rm_detail_t *, size_t, regmatch_t [], int);
+#endif
+MODULE_SCOPE re_void regfree(regex_t *);
+MODULE_SCOPE size_t regerror(int, __REG_CONST regex_t *, char *, size_t);
+/* automatically gathered by fwd; do not hand-edit */
+/* =====^!^===== end forwards =====^!^===== */
+
+/*
+ * more C++ voodoo
+ */
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/contrib/hsrex/regexec.c b/contrib/hsrex/regexec.c
new file mode 100644
index 0000000..24edb41
--- /dev/null
+++ b/contrib/hsrex/regexec.c
@@ -0,0 +1,1215 @@
+/*
+ * re_*exec and friends - match REs
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation of
+ * software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "regguts.h"
+
+/*
+ * Lazy-DFA representation.
+ */
+
+struct arcp { /* "pointer" to an outarc */
+ struct sset *ss;
+ color co;
+};
+
+struct sset { /* state set */
+ unsigned *states; /* pointer to bitvector */
+ unsigned hash; /* hash of bitvector */
+#define HASH(bv, nw) (((nw) == 1) ? *(bv) : hash(bv, nw))
+#define HIT(h,bv,ss,nw) ((ss)->hash == (h) && ((nw) == 1 || \
+ memcmp(VS(bv), VS((ss)->states), (nw)*sizeof(unsigned)) == 0))
+ int flags;
+#define STARTER 01 /* the initial state set */
+#define POSTSTATE 02 /* includes the goal state */
+#define LOCKED 04 /* locked in cache */
+#define NOPROGRESS 010 /* zero-progress state set */
+ struct arcp ins; /* chain of inarcs pointing here */
+ chr *lastseen; /* last entered on arrival here */
+ struct sset **outs; /* outarc vector indexed by color */
+ struct arcp *inchain; /* chain-pointer vector for outarcs */
+};
+
+struct dfa {
+ int nssets; /* size of cache */
+ int nssused; /* how many entries occupied yet */
+ int nstates; /* number of states */
+ int ncolors; /* length of outarc and inchain vectors */
+ int wordsper; /* length of state-set bitvectors */
+ struct sset *ssets; /* state-set cache */
+ unsigned *statesarea; /* bitvector storage */
+ unsigned *work; /* pointer to work area within statesarea */
+ struct sset **outsarea; /* outarc-vector storage */
+ struct arcp *incarea; /* inchain storage */
+ struct cnfa *cnfa;
+ struct colormap *cm;
+ chr *lastpost; /* location of last cache-flushed success */
+ chr *lastnopr; /* location of last cache-flushed NOPROGRESS */
+ struct sset *search; /* replacement-search-pointer memory */
+ int cptsmalloced; /* were the areas individually malloced? */
+ char *mallocarea; /* self, or master malloced area, or NULL */
+};
+
+#define WORK 1 /* number of work bitvectors needed */
+
+/*
+ * Setup for non-malloc allocation for small cases.
+ */
+
+#define FEWSTATES 20 /* must be less than UBITS */
+#define FEWCOLORS 15
+struct smalldfa {
+ struct dfa dfa;
+ struct sset ssets[FEWSTATES*2];
+ unsigned statesarea[FEWSTATES*2 + WORK];
+ struct sset *outsarea[FEWSTATES*2 * FEWCOLORS];
+ struct arcp incarea[FEWSTATES*2 * FEWCOLORS];
+};
+#define DOMALLOC ((struct smalldfa *)NULL) /* force malloc */
+
+/*
+ * Internal variables, bundled for easy passing around.
+ */
+
+struct vars {
+ regex_t *re;
+ struct guts *g;
+ int eflags; /* copies of arguments */
+ size_t nmatch;
+ regmatch_t *pmatch;
+ rm_detail_t *details;
+ chr *start; /* start of string */
+ chr *stop; /* just past end of string */
+ int err; /* error code if any (0 none) */
+ regoff_t *mem; /* memory vector for backtracking */
+ struct smalldfa dfa1;
+ struct smalldfa dfa2;
+};
+#define VISERR(vv) ((vv)->err != 0) /* have we seen an error yet? */
+#define ISERR() VISERR(v)
+#define VERR(vv,e) (((vv)->err) ? (vv)->err : ((vv)->err = (e)))
+#define ERR(e) VERR(v, e) /* record an error */
+#define NOERR() {if (ISERR()) return v->err;} /* if error seen, return it */
+#define OFF(p) ((p) - v->start)
+#define LOFF(p) ((long)OFF(p))
+
+/*
+ * forward declarations
+ */
+/* =====^!^===== begin forwards =====^!^===== */
+/* automatically gathered by fwd; do not hand-edit */
+/* === regexec.c === */
+int exec(regex_t *, const chr *, size_t, rm_detail_t *, size_t, regmatch_t [], int);
+static int find(struct vars *, struct cnfa *, struct colormap *);
+static int cfind(struct vars *, struct cnfa *, struct colormap *);
+static int cfindloop(struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **);
+static void zapsubs(regmatch_t *, size_t);
+static void zapmem(struct vars *, struct subre *);
+static void subset(struct vars *, struct subre *, chr *, chr *);
+static int dissect(struct vars *, struct subre *, chr *, chr *);
+static int condissect(struct vars *, struct subre *, chr *, chr *);
+static int altdissect(struct vars *, struct subre *, chr *, chr *);
+static int cdissect(struct vars *, struct subre *, chr *, chr *);
+static int ccondissect(struct vars *, struct subre *, chr *, chr *);
+static int crevdissect(struct vars *, struct subre *, chr *, chr *);
+static int cbrdissect(struct vars *, struct subre *, chr *, chr *);
+static int caltdissect(struct vars *, struct subre *, chr *, chr *);
+/* === rege_dfa.c === */
+static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *);
+static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *);
+static chr *lastcold(struct vars *, struct dfa *);
+static struct dfa *newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *);
+static void freedfa(struct dfa *);
+static unsigned hash(unsigned *, int);
+static struct sset *initialize(struct vars *, struct dfa *, chr *);
+static struct sset *miss(struct vars *, struct dfa *, struct sset *, pcolor, chr *, chr *);
+static int lacon(struct vars *, struct cnfa *, chr *, pcolor);
+static struct sset *getvacant(struct vars *, struct dfa *, chr *, chr *);
+static struct sset *pickss(struct vars *, struct dfa *, chr *, chr *);
+/* automatically gathered by fwd; do not hand-edit */
+/* =====^!^===== end forwards =====^!^===== */
+
+/*
+ - exec - match regular expression
+ ^ int exec(regex_t *, const chr *, size_t, rm_detail_t *,
+ ^ size_t, regmatch_t [], int);
+ */
+int
+exec(
+ regex_t *re,
+ const chr *string,
+ size_t len,
+ rm_detail_t *details,
+ size_t nmatch,
+ regmatch_t pmatch[],
+ int flags)
+{
+ AllocVars(v);
+ int st;
+ size_t n;
+ int backref;
+#define LOCALMAT 20
+ regmatch_t mat[LOCALMAT];
+#define LOCALMEM 40
+ regoff_t mem[LOCALMEM];
+
+ /*
+ * Sanity checks.
+ */
+
+ if (re == NULL || string == NULL || re->re_magic != REMAGIC) {
+ FreeVars(v);
+ return REG_INVARG;
+ }
+ if (re->re_csize != sizeof(chr)) {
+ FreeVars(v);
+ return REG_MIXED;
+ }
+
+ /*
+ * Setup.
+ */
+
+ v->re = re;
+ v->g = (struct guts *)re->re_guts;
+ if ((v->g->cflags&REG_EXPECT) && details == NULL) {
+ FreeVars(v);
+ return REG_INVARG;
+ }
+ if (v->g->info&REG_UIMPOSSIBLE) {
+ FreeVars(v);
+ return REG_NOMATCH;
+ }
+ backref = (v->g->info&REG_UBACKREF) ? 1 : 0;
+ v->eflags = flags;
+ if (v->g->cflags&REG_NOSUB) {
+ nmatch = 0; /* override client */
+ }
+ v->nmatch = nmatch;
+ if (backref) {
+ /*
+ * Need work area.
+ */
+
+ if (v->g->nsub + 1 <= LOCALMAT) {
+ v->pmatch = mat;
+ } else {
+ v->pmatch = (regmatch_t *)
+ MALLOC((v->g->nsub + 1) * sizeof(regmatch_t));
+ }
+ if (v->pmatch == NULL) {
+ FreeVars(v);
+ return REG_ESPACE;
+ }
+ v->nmatch = v->g->nsub + 1;
+ } else {
+ v->pmatch = pmatch;
+ }
+ v->details = details;
+ v->start = (chr *)string;
+ v->stop = (chr *)string + len;
+ v->err = 0;
+ if (backref) {
+ /*
+ * Need retry memory.
+ */
+
+ assert(v->g->ntree >= 0);
+ n = (size_t)v->g->ntree;
+ if (n <= LOCALMEM) {
+ v->mem = mem;
+ } else {
+ v->mem = (regoff_t *) MALLOC(n*sizeof(regoff_t));
+ }
+ if (v->mem == NULL) {
+ if (v->pmatch != pmatch && v->pmatch != mat) {
+ FREE(v->pmatch);
+ }
+ FreeVars(v);
+ return REG_ESPACE;
+ }
+ } else {
+ v->mem = NULL;
+ }
+
+ /*
+ * Do it.
+ */
+
+ assert(v->g->tree != NULL);
+ if (backref) {
+ st = cfind(v, &v->g->tree->cnfa, &v->g->cmap);
+ } else {
+ st = find(v, &v->g->tree->cnfa, &v->g->cmap);
+ }
+
+ /*
+ * Copy (portion of) match vector over if necessary.
+ */
+
+ if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) {
+ zapsubs(pmatch, nmatch);
+ n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
+ memcpy(VS(pmatch), VS(v->pmatch), n*sizeof(regmatch_t));
+ }
+
+ /*
+ * Clean up.
+ */
+
+ if (v->pmatch != pmatch && v->pmatch != mat) {
+ FREE(v->pmatch);
+ }
+ if (v->mem != NULL && v->mem != mem) {
+ FREE(v->mem);
+ }
+ FreeVars(v);
+ return st;
+}
+
+/*
+ - find - find a match for the main NFA (no-complications case)
+ ^ static int find(struct vars *, struct cnfa *, struct colormap *);
+ */
+static int
+find(
+ struct vars *v,
+ struct cnfa *cnfa,
+ struct colormap *cm)
+{
+ struct dfa *s;
+ struct dfa *d;
+ chr *begin;
+ chr *end = NULL;
+ chr *cold;
+ chr *open; /* Open and close of range of possible
+ * starts */
+ chr *close;
+ int hitend;
+ int shorter = (v->g->tree->flags&SHORTER) ? 1 : 0;
+
+ /*
+ * First, a shot with the search RE.
+ */
+
+ s = newdfa(v, &v->g->search, cm, &v->dfa1);
+ assert(!(ISERR() && s != NULL));
+ NOERR();
+ MDEBUG(("\nsearch at %ld\n", LOFF(v->start)));
+ cold = NULL;
+ close = shortest(v, s, v->start, v->start, v->stop, &cold, NULL);
+ freedfa(s);
+ NOERR();
+ if (v->g->cflags&REG_EXPECT) {
+ assert(v->details != NULL);
+ if (cold != NULL) {
+ v->details->rm_extend.rm_so = OFF(cold);
+ } else {
+ v->details->rm_extend.rm_so = OFF(v->stop);
+ }
+ v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
+ }
+ if (close == NULL) { /* not found */
+ return REG_NOMATCH;
+ }
+ if (v->nmatch == 0) { /* found, don't need exact location */
+ return REG_OKAY;
+ }
+
+ /*
+ * Find starting point and match.
+ */
+
+ assert(cold != NULL);
+ open = cold;
+ cold = NULL;
+ MDEBUG(("between %ld and %ld\n", LOFF(open), LOFF(close)));
+ d = newdfa(v, cnfa, cm, &v->dfa1);
+ assert(!(ISERR() && d != NULL));
+ NOERR();
+ for (begin = open; begin <= close; begin++) {
+ MDEBUG(("\nfind trying at %ld\n", LOFF(begin)));
+ if (shorter) {
+ end = shortest(v, d, begin, begin, v->stop, NULL, &hitend);
+ } else {
+ end = longest(v, d, begin, v->stop, &hitend);
+ }
+ NOERR();
+ if (hitend && cold == NULL) {
+ cold = begin;
+ }
+ if (end != NULL) {
+ break; /* NOTE BREAK OUT */
+ }
+ }
+ assert(end != NULL); /* search RE succeeded so loop should */
+ freedfa(d);
+
+ /*
+ * And pin down details.
+ */
+
+ assert(v->nmatch > 0);
+ v->pmatch[0].rm_so = OFF(begin);
+ v->pmatch[0].rm_eo = OFF(end);
+ if (v->g->cflags&REG_EXPECT) {
+ if (cold != NULL) {
+ v->details->rm_extend.rm_so = OFF(cold);
+ } else {
+ v->details->rm_extend.rm_so = OFF(v->stop);
+ }
+ v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
+ }
+ if (v->nmatch == 1) { /* no need for submatches */
+ return REG_OKAY;
+ }
+
+ /*
+ * Submatches.
+ */
+
+ zapsubs(v->pmatch, v->nmatch);
+ return dissect(v, v->g->tree, begin, end);
+}
+
+/*
+ - cfind - find a match for the main NFA (with complications)
+ ^ static int cfind(struct vars *, struct cnfa *, struct colormap *);
+ */
+static int
+cfind(
+ struct vars *v,
+ struct cnfa *cnfa,
+ struct colormap *cm)
+{
+ struct dfa *s;
+ struct dfa *d;
+ chr *cold = NULL; /* silence gcc 4 warning */
+ int ret;
+
+ s = newdfa(v, &v->g->search, cm, &v->dfa1);
+ NOERR();
+ d = newdfa(v, cnfa, cm, &v->dfa2);
+ if (ISERR()) {
+ assert(d == NULL);
+ freedfa(s);
+ return v->err;
+ }
+
+ ret = cfindloop(v, cnfa, cm, d, s, &cold);
+
+ freedfa(d);
+ freedfa(s);
+ NOERR();
+ if (v->g->cflags&REG_EXPECT) {
+ assert(v->details != NULL);
+ if (cold != NULL) {
+ v->details->rm_extend.rm_so = OFF(cold);
+ } else {
+ v->details->rm_extend.rm_so = OFF(v->stop);
+ }
+ v->details->rm_extend.rm_eo = OFF(v->stop); /* unknown */
+ }
+ return ret;
+}
+
+/*
+ - cfindloop - the heart of cfind
+ ^ static int cfindloop(struct vars *, struct cnfa *, struct colormap *,
+ ^ struct dfa *, struct dfa *, chr **);
+ */
+static int
+cfindloop(
+ struct vars *v,
+ struct cnfa *cnfa,
+ struct colormap *cm,
+ struct dfa *d,
+ struct dfa *s,
+ chr **coldp) /* where to put coldstart pointer */
+{
+ chr *begin;
+ chr *end;
+ chr *cold;
+ chr *open; /* Open and close of range of possible
+ * starts */
+ chr *close;
+ chr *estart;
+ chr *estop;
+ int er;
+ int shorter = v->g->tree->flags&SHORTER;
+ int hitend;
+
+ assert(d != NULL && s != NULL);
+ cold = NULL;
+ close = v->start;
+ do {
+ MDEBUG(("\ncsearch at %ld\n", LOFF(close)));
+ close = shortest(v, s, close, close, v->stop, &cold, NULL);
+ if (close == NULL) {
+ break; /* NOTE BREAK */
+ }
+ assert(cold != NULL);
+ open = cold;
+ cold = NULL;
+ MDEBUG(("cbetween %ld and %ld\n", LOFF(open), LOFF(close)));
+ for (begin = open; begin <= close; begin++) {
+ MDEBUG(("\ncfind trying at %ld\n", LOFF(begin)));
+ estart = begin;
+ estop = v->stop;
+ for (;;) {
+ if (shorter) {
+ end = shortest(v, d, begin, estart, estop, NULL, &hitend);
+ } else {
+ end = longest(v, d, begin, estop, &hitend);
+ }
+ if (hitend && cold == NULL) {
+ cold = begin;
+ }
+ if (end == NULL) {
+ break; /* NOTE BREAK OUT */
+ }
+
+ MDEBUG(("tentative end %ld\n", LOFF(end)));
+ zapsubs(v->pmatch, v->nmatch);
+ zapmem(v, v->g->tree);
+ er = cdissect(v, v->g->tree, begin, end);
+ if (er == REG_OKAY) {
+ if (v->nmatch > 0) {
+ v->pmatch[0].rm_so = OFF(begin);
+ v->pmatch[0].rm_eo = OFF(end);
+ }
+ *coldp = cold;
+ return REG_OKAY;
+ }
+ if (er != REG_NOMATCH) {
+ ERR(er);
+ return er;
+ }
+ if ((shorter) ? end == estop : end == begin) {
+ /*
+ * No point in trying again.
+ */
+
+ *coldp = cold;
+ return REG_NOMATCH;
+ }
+
+ /*
+ * Go around and try again
+ */
+
+ if (shorter) {
+ estart = end + 1;
+ } else {
+ estop = end - 1;
+ }
+ }
+ }
+ } while (close < v->stop);
+
+ *coldp = cold;
+ return REG_NOMATCH;
+}
+
+/*
+ - zapsubs - initialize the subexpression matches to "no match"
+ ^ static void zapsubs(regmatch_t *, size_t);
+ */
+static void
+zapsubs(
+ regmatch_t *p,
+ size_t n)
+{
+ size_t i;
+
+ for (i = n-1; i > 0; i--) {
+ p[i].rm_so = -1;
+ p[i].rm_eo = -1;
+ }
+}
+
+/*
+ - zapmem - initialize the retry memory of a subtree to zeros
+ ^ static void zapmem(struct vars *, struct subre *);
+ */
+static void
+zapmem(
+ struct vars *v,
+ struct subre *t)
+{
+ if (t == NULL) {
+ return;
+ }
+
+ assert(v->mem != NULL);
+ v->mem[t->retry] = 0;
+ if (t->op == '(') {
+ assert(t->subno > 0);
+ v->pmatch[t->subno].rm_so = -1;
+ v->pmatch[t->subno].rm_eo = -1;
+ }
+
+ if (t->left != NULL) {
+ zapmem(v, t->left);
+ }
+ if (t->right != NULL) {
+ zapmem(v, t->right);
+ }
+}
+
+/*
+ - subset - set any subexpression relevant to a successful subre
+ ^ static void subset(struct vars *, struct subre *, chr *, chr *);
+ */
+static void
+subset(
+ struct vars *v,
+ struct subre *sub,
+ chr *begin,
+ chr *end)
+{
+ int n = sub->subno;
+
+ assert(n > 0);
+ if ((size_t)n >= v->nmatch) {
+ return;
+ }
+
+ MDEBUG(("setting %d\n", n));
+ v->pmatch[n].rm_so = OFF(begin);
+ v->pmatch[n].rm_eo = OFF(end);
+}
+
+/*
+ - dissect - determine subexpression matches (uncomplicated case)
+ ^ static int dissect(struct vars *, struct subre *, chr *, chr *);
+ */
+static int /* regexec return code */
+dissect(
+ struct vars *v,
+ struct subre *t,
+ chr *begin, /* beginning of relevant substring */
+ chr *end) /* end of same */
+{
+ assert(t != NULL);
+ MDEBUG(("dissect %ld-%ld\n", LOFF(begin), LOFF(end)));
+
+ switch (t->op) {
+ case '=': /* terminal node */
+ assert(t->left == NULL && t->right == NULL);
+ return REG_OKAY; /* no action, parent did the work */
+ break;
+ case '|': /* alternation */
+ assert(t->left != NULL);
+ return altdissect(v, t, begin, end);
+ break;
+ case 'b': /* back ref -- shouldn't be calling us! */
+ return REG_ASSERT;
+ break;
+ case '.': /* concatenation */
+ assert(t->left != NULL && t->right != NULL);
+ return condissect(v, t, begin, end);
+ break;
+ case '(': /* capturing */
+ assert(t->left != NULL && t->right == NULL);
+ assert(t->subno > 0);
+ subset(v, t, begin, end);
+ return dissect(v, t->left, begin, end);
+ break;
+ default:
+ return REG_ASSERT;
+ break;
+ }
+}
+
+/*
+ - condissect - determine concatenation subexpression matches (uncomplicated)
+ ^ static int condissect(struct vars *, struct subre *, chr *, chr *);
+ */
+static int /* regexec return code */
+condissect(
+ struct vars *v,
+ struct subre *t,
+ chr *begin, /* beginning of relevant substring */
+ chr *end) /* end of same */
+{
+ struct dfa *d;
+ struct dfa *d2;
+ chr *mid;
+ int i;
+ int shorter = (t->left->flags&SHORTER) ? 1 : 0;
+ chr *stop = (shorter) ? end : begin;
+
+ assert(t->op == '.');
+ assert(t->left != NULL && t->left->cnfa.nstates > 0);
+ assert(t->right != NULL && t->right->cnfa.nstates > 0);
+
+ d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
+ NOERR();
+ d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, &v->dfa2);
+ if (ISERR()) {
+ assert(d2 == NULL);
+ freedfa(d);
+ return v->err;
+ }
+
+ /*
+ * Pick a tentative midpoint.
+ */
+
+ if (shorter) {
+ mid = shortest(v, d, begin, begin, end, NULL, NULL);
+ } else {
+ mid = longest(v, d, begin, end, NULL);
+ }
+ if (mid == NULL) {
+ freedfa(d);
+ freedfa(d2);
+ return REG_ASSERT;
+ }
+ MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
+
+ /*
+ * Iterate until satisfaction or failure.
+ */
+
+ while (longest(v, d2, mid, end, NULL) != end) {
+ /*
+ * That midpoint didn't work, find a new one.
+ */
+
+ if (mid == stop) {
+ /*
+ * All possibilities exhausted!
+ */
+
+ MDEBUG(("no midpoint!\n"));
+ freedfa(d);
+ freedfa(d2);
+ return REG_ASSERT;
+ }
+ if (shorter) {
+ mid = shortest(v, d, begin, mid+1, end, NULL, NULL);
+ } else {
+ mid = longest(v, d, begin, mid-1, NULL);
+ }
+ if (mid == NULL) {
+ /*
+ * Failed to find a new one!
+ */
+
+ MDEBUG(("failed midpoint!\n"));
+ freedfa(d);
+ freedfa(d2);
+ return REG_ASSERT;
+ }
+ MDEBUG(("new midpoint %ld\n", LOFF(mid)));
+ }
+
+ /*
+ * Satisfaction.
+ */
+
+ MDEBUG(("successful\n"));
+ freedfa(d);
+ freedfa(d2);
+ i = dissect(v, t->left, begin, mid);
+ if (i != REG_OKAY) {
+ return i;
+ }
+ return dissect(v, t->right, mid, end);
+}
+
+/*
+ - altdissect - determine alternative subexpression matches (uncomplicated)
+ ^ static int altdissect(struct vars *, struct subre *, chr *, chr *);
+ */
+static int /* regexec return code */
+altdissect(
+ struct vars *v,
+ struct subre *t,
+ chr *begin, /* beginning of relevant substring */
+ chr *end) /* end of same */
+{
+ struct dfa *d;
+ int i;
+
+ assert(t != NULL);
+ assert(t->op == '|');
+
+ for (i = 0; t != NULL; t = t->right, i++) {
+ MDEBUG(("trying %dth\n", i));
+ assert(t->left != NULL && t->left->cnfa.nstates > 0);
+ d = newdfa(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
+ if (ISERR()) {
+ return v->err;
+ }
+ if (longest(v, d, begin, end, NULL) == end) {
+ MDEBUG(("success\n"));
+ freedfa(d);
+ return dissect(v, t->left, begin, end);
+ }
+ freedfa(d);
+ }
+ return REG_ASSERT; /* none of them matched?!? */
+}
+
+/*
+ - cdissect - determine subexpression matches (with complications)
+ * The retry memory stores the offset of the trial midpoint from begin, plus 1
+ * so that 0 uniquely means "clean slate".
+ ^ static int cdissect(struct vars *, struct subre *, chr *, chr *);
+ */
+static int /* regexec return code */
+cdissect(
+ struct vars *v,
+ struct subre *t,
+ chr *begin, /* beginning of relevant substring */
+ chr *end) /* end of same */
+{
+ int er;
+
+ assert(t != NULL);
+ MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op));
+
+ switch (t->op) {
+ case '=': /* terminal node */
+ assert(t->left == NULL && t->right == NULL);
+ return REG_OKAY; /* no action, parent did the work */
+ break;
+ case '|': /* alternation */
+ assert(t->left != NULL);
+ return caltdissect(v, t, begin, end);
+ break;
+ case 'b': /* back ref -- shouldn't be calling us! */
+ assert(t->left == NULL && t->right == NULL);
+ return cbrdissect(v, t, begin, end);
+ break;
+ case '.': /* concatenation */
+ assert(t->left != NULL && t->right != NULL);
+ return ccondissect(v, t, begin, end);
+ break;
+ case '(': /* capturing */
+ assert(t->left != NULL && t->right == NULL);
+ assert(t->subno > 0);
+ er = cdissect(v, t->left, begin, end);
+ if (er == REG_OKAY) {
+ subset(v, t, begin, end);
+ }
+ return er;
+ break;
+ default:
+ return REG_ASSERT;
+ break;
+ }
+}
+
+/*
+ - ccondissect - concatenation subexpression matches (with complications)
+ * The retry memory stores the offset of the trial midpoint from begin, plus 1
+ * so that 0 uniquely means "clean slate".
+ ^ static int ccondissect(struct vars *, struct subre *, chr *, chr *);
+ */
+static int /* regexec return code */
+ccondissect(
+ struct vars *v,
+ struct subre *t,
+ chr *begin, /* beginning of relevant substring */
+ chr *end) /* end of same */
+{
+ struct dfa *d;
+ struct dfa *d2;
+ chr *mid;
+ int er;
+
+ assert(t->op == '.');
+ assert(t->left != NULL && t->left->cnfa.nstates > 0);
+ assert(t->right != NULL && t->right->cnfa.nstates > 0);
+
+ if (t->left->flags&SHORTER) { /* reverse scan */
+ return crevdissect(v, t, begin, end);
+ }
+
+ d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
+ if (ISERR()) {
+ return v->err;
+ }
+ d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC);
+ if (ISERR()) {
+ freedfa(d);
+ return v->err;
+ }
+ MDEBUG(("cconcat %d\n", t->retry));
+
+ /*
+ * Pick a tentative midpoint.
+ */
+
+ if (v->mem[t->retry] == 0) {
+ mid = longest(v, d, begin, end, NULL);
+ if (mid == NULL) {
+ freedfa(d);
+ freedfa(d2);
+ return REG_NOMATCH;
+ }
+ MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
+ v->mem[t->retry] = (mid - begin) + 1;
+ } else {
+ mid = begin + (v->mem[t->retry] - 1);
+ MDEBUG(("working midpoint %ld\n", LOFF(mid)));
+ }
+
+ /*
+ * Iterate until satisfaction or failure.
+ */
+
+ for (;;) {
+ /*
+ * Try this midpoint on for size.
+ */
+
+ er = cdissect(v, t->left, begin, mid);
+ if ((er == REG_OKAY) && (longest(v, d2, mid, end, NULL) == end)
+ && (er = cdissect(v, t->right, mid, end)) == REG_OKAY) {
+ break; /* NOTE BREAK OUT */
+ }
+ if ((er != REG_OKAY) && (er != REG_NOMATCH)) {
+ freedfa(d);
+ freedfa(d2);
+ return er;
+ }
+
+ /*
+ * That midpoint didn't work, find a new one.
+ */
+
+ if (mid == begin) {
+ /*
+ * All possibilities exhausted.
+ */
+
+ MDEBUG(("%d no midpoint\n", t->retry));
+ freedfa(d);
+ freedfa(d2);
+ return REG_NOMATCH;
+ }
+ mid = longest(v, d, begin, mid-1, NULL);
+ if (mid == NULL) {
+ /*
+ * Failed to find a new one.
+ */
+
+ MDEBUG(("%d failed midpoint\n", t->retry));
+ freedfa(d);
+ freedfa(d2);
+ return REG_NOMATCH;
+ }
+ MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
+ v->mem[t->retry] = (mid - begin) + 1;
+ zapmem(v, t->left);
+ zapmem(v, t->right);
+ }
+
+ /*
+ * Satisfaction.
+ */
+
+ MDEBUG(("successful\n"));
+ freedfa(d);
+ freedfa(d2);
+ return REG_OKAY;
+}
+
+/*
+ - crevdissect - determine backref shortest-first subexpression matches
+ * The retry memory stores the offset of the trial midpoint from begin, plus 1
+ * so that 0 uniquely means "clean slate".
+ ^ static int crevdissect(struct vars *, struct subre *, chr *, chr *);
+ */
+static int /* regexec return code */
+crevdissect(
+ struct vars *v,
+ struct subre *t,
+ chr *begin, /* beginning of relevant substring */
+ chr *end) /* end of same */
+{
+ struct dfa *d;
+ struct dfa *d2;
+ chr *mid;
+ int er;
+
+ assert(t->op == '.');
+ assert(t->left != NULL && t->left->cnfa.nstates > 0);
+ assert(t->right != NULL && t->right->cnfa.nstates > 0);
+ assert(t->left->flags&SHORTER);
+
+ /*
+ * Concatenation -- need to split the substring between parts.
+ */
+
+ d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
+ if (ISERR()) {
+ return v->err;
+ }
+ d2 = newdfa(v, &t->right->cnfa, &v->g->cmap, DOMALLOC);
+ if (ISERR()) {
+ freedfa(d);
+ return v->err;
+ }
+ MDEBUG(("crev %d\n", t->retry));
+
+ /*
+ * Pick a tentative midpoint.
+ */
+
+ if (v->mem[t->retry] == 0) {
+ mid = shortest(v, d, begin, begin, end, NULL, NULL);
+ if (mid == NULL) {
+ freedfa(d);
+ freedfa(d2);
+ return REG_NOMATCH;
+ }
+ MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
+ v->mem[t->retry] = (mid - begin) + 1;
+ } else {
+ mid = begin + (v->mem[t->retry] - 1);
+ MDEBUG(("working midpoint %ld\n", LOFF(mid)));
+ }
+
+ /*
+ * Iterate until satisfaction or failure.
+ */
+
+ for (;;) {
+ /*
+ * Try this midpoint on for size.
+ */
+
+ er = cdissect(v, t->left, begin, mid);
+ if ((er == REG_OKAY) && (longest(v, d2, mid, end, NULL) == end)
+ && (er = cdissect(v, t->right, mid, end)) == REG_OKAY) {
+ break; /* NOTE BREAK OUT */
+ }
+ if (er != REG_OKAY && er != REG_NOMATCH) {
+ freedfa(d);
+ freedfa(d2);
+ return er;
+ }
+
+ /*
+ * That midpoint didn't work, find a new one.
+ */
+
+ if (mid == end) {
+ /*
+ * All possibilities exhausted.
+ */
+
+ MDEBUG(("%d no midpoint\n", t->retry));
+ freedfa(d);
+ freedfa(d2);
+ return REG_NOMATCH;
+ }
+ mid = shortest(v, d, begin, mid+1, end, NULL, NULL);
+ if (mid == NULL) {
+ /*
+ * Failed to find a new one.
+ */
+
+ MDEBUG(("%d failed midpoint\n", t->retry));
+ freedfa(d);
+ freedfa(d2);
+ return REG_NOMATCH;
+ }
+ MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
+ v->mem[t->retry] = (mid - begin) + 1;
+ zapmem(v, t->left);
+ zapmem(v, t->right);
+ }
+
+ /*
+ * Satisfaction.
+ */
+
+ MDEBUG(("successful\n"));
+ freedfa(d);
+ freedfa(d2);
+ return REG_OKAY;
+}
+
+/*
+ - cbrdissect - determine backref subexpression matches
+ ^ static int cbrdissect(struct vars *, struct subre *, chr *, chr *);
+ */
+static int /* regexec return code */
+cbrdissect(
+ struct vars *v,
+ struct subre *t,
+ chr *begin, /* beginning of relevant substring */
+ chr *end) /* end of same */
+{
+ int i;
+ int n = t->subno;
+ size_t len;
+ chr *paren;
+ chr *p;
+ chr *stop;
+ int min = t->min;
+ int max = t->max;
+
+ assert(t != NULL);
+ assert(t->op == 'b');
+ assert(n >= 0);
+ assert((size_t)n < v->nmatch);
+
+ MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max));
+
+ if (v->pmatch[n].rm_so == -1) {
+ return REG_NOMATCH;
+ }
+ paren = v->start + v->pmatch[n].rm_so;
+ len = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
+
+ /*
+ * No room to maneuver -- retries are pointless.
+ */
+
+ if (v->mem[t->retry]) {
+ return REG_NOMATCH;
+ }
+ v->mem[t->retry] = 1;
+
+ /*
+ * Special-case zero-length string.
+ */
+
+ if (len == 0) {
+ if (begin == end) {
+ return REG_OKAY;
+ }
+ return REG_NOMATCH;
+ }
+
+ /*
+ * And too-short string.
+ */
+
+ assert(end >= begin);
+ if ((size_t)(end - begin) < len) {
+ return REG_NOMATCH;
+ }
+ stop = end - len;
+
+ /*
+ * Count occurrences.
+ */
+
+ i = 0;
+ for (p = begin; p <= stop && (i < max || max == INFINITY); p += len) {
+ if ((*v->g->compare)(paren, p, len) != 0) {
+ break;
+ }
+ i++;
+ }
+ MDEBUG(("cbackref found %d\n", i));
+
+ /*
+ * And sort it out.
+ */
+
+ if (p != end) { /* didn't consume all of it */
+ return REG_NOMATCH;
+ }
+ if (min <= i && (i <= max || max == INFINITY)) {
+ return REG_OKAY;
+ }
+ return REG_NOMATCH; /* out of range */
+}
+
+/*
+ - caltdissect - determine alternative subexpression matches (w. complications)
+ ^ static int caltdissect(struct vars *, struct subre *, chr *, chr *);
+ */
+static int /* regexec return code */
+caltdissect(
+ struct vars *v,
+ struct subre *t,
+ chr *begin, /* beginning of relevant substring */
+ chr *end) /* end of same */
+{
+ struct dfa *d;
+ int er;
+#define UNTRIED 0 /* not yet tried at all */
+#define TRYING 1 /* top matched, trying submatches */
+#define TRIED 2 /* top didn't match or submatches exhausted */
+
+ if (t == NULL) {
+ return REG_NOMATCH;
+ }
+ assert(t->op == '|');
+ if (v->mem[t->retry] == TRIED) {
+ return caltdissect(v, t->right, begin, end);
+ }
+
+ MDEBUG(("calt n%d\n", t->retry));
+ assert(t->left != NULL);
+
+ if (v->mem[t->retry] == UNTRIED) {
+ d = newdfa(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
+ if (ISERR()) {
+ return v->err;
+ }
+ if (longest(v, d, begin, end, NULL) != end) {
+ freedfa(d);
+ v->mem[t->retry] = TRIED;
+ return caltdissect(v, t->right, begin, end);
+ }
+ freedfa(d);
+ MDEBUG(("calt matched\n"));
+ v->mem[t->retry] = TRYING;
+ }
+
+ er = cdissect(v, t->left, begin, end);
+ if (er != REG_NOMATCH) {
+ return er;
+ }
+
+ v->mem[t->retry] = TRIED;
+ return caltdissect(v, t->right, begin, end);
+}
+
+#include "rege_dfa.c"
+
+/*
+ * Local Variables:
+ * mode: c
+ * c-basic-offset: 4
+ * fill-column: 78
+ * End:
+ */
diff --git a/contrib/hsrex/regfree.c b/contrib/hsrex/regfree.c
new file mode 100644
index 0000000..b0aaa70
--- /dev/null
+++ b/contrib/hsrex/regfree.c
@@ -0,0 +1,60 @@
+/*
+ * regfree - free an RE
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation of
+ * software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You might think that this could be incorporated into regcomp.c, and that
+ * would be a reasonable idea... except that this is a generic function (with
+ * a generic name), applicable to all compiled REs regardless of the size of
+ * their characters, whereas the stuff in regcomp.c gets compiled once per
+ * character size.
+ */
+
+#include "regguts.h"
+
+/*
+ - regfree - free an RE (generic function, punts to RE-specific function)
+ *
+ * Ignoring invocation with NULL is a convenience.
+ */
+void
+regfree(
+ regex_t *re)
+{
+ if (re == NULL) {
+ return;
+ }
+ (*((struct fns *)re->re_fns)->free)(re);
+}
+
+/*
+ * Local Variables:
+ * mode: c
+ * c-basic-offset: 4
+ * fill-column: 78
+ * End:
+ */
diff --git a/contrib/hsrex/regguts.h b/contrib/hsrex/regguts.h
new file mode 100644
index 0000000..67e3d03
--- /dev/null
+++ b/contrib/hsrex/regguts.h
@@ -0,0 +1,428 @@
+/*
+ * Internal interface definitions, etc., for the reg package
+ *
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms -- with or without
+ * modification -- are permitted for any purpose, provided that
+ * redistributions in source form retain this entire copyright notice and
+ * indicate the origin and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation of
+ * software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Environmental customization. It should not (I hope) be necessary to alter
+ * the file you are now reading -- regcustom.h should handle it all, given
+ * care here and elsewhere.
+ */
+#include "regcustom.h"
+
+/*
+ * Things that regcustom.h might override.
+ */
+
+/* standard header files (NULL is a reasonable indicator for them) */
+#ifndef NULL
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <limits.h>
+#include <string.h>
+#endif
+
+/* assertions */
+#ifndef assert
+#ifndef REG_DEBUG
+#ifndef NDEBUG
+#define NDEBUG /* no assertions */
+#endif
+#endif /* !REG_DEBUG */
+#include <assert.h>
+#endif
+
+/* voids */
+#ifndef VOID
+#define VOID void /* for function return values */
+#endif
+#ifndef DISCARD
+#define DISCARD void /* for throwing values away */
+#endif
+#ifndef PVOID
+#define PVOID void * /* generic pointer */
+#endif
+#ifndef VS
+#define VS(x) ((void*)(x)) /* cast something to generic ptr */
+#endif
+#ifndef NOPARMS
+#define NOPARMS void /* for empty parm lists */
+#endif
+
+/* const */
+#ifndef CONST
+#define CONST const /* for old compilers, might be empty */
+#endif
+
+/* function-pointer declarator */
+#ifndef FUNCPTR
+#if __STDC__ >= 1
+#define FUNCPTR(name, args) (*name)args
+#else
+#define FUNCPTR(name, args) (*name)()
+#endif
+#endif
+
+/* memory allocation */
+#ifndef MALLOC
+#define MALLOC(n) malloc(n)
+#endif
+#ifndef REALLOC
+#define REALLOC(p, n) realloc(VS(p), n)
+#endif
+#ifndef FREE
+#define FREE(p) free(VS(p))
+#endif
+
+/* want size of a char in bits, and max value in bounded quantifiers */
+#ifndef CHAR_BIT
+#include <limits.h>
+#endif
+#ifndef _POSIX2_RE_DUP_MAX
+#define _POSIX2_RE_DUP_MAX 255 /* normally from <limits.h> */
+#endif
+
+/*
+ * misc
+ */
+
+#define NOTREACHED 0
+#define xxx 1
+
+#define DUPMAX _POSIX2_RE_DUP_MAX
+#define INFINITY (DUPMAX+1)
+
+#define REMAGIC 0xfed7 /* magic number for main struct */
+
+/*
+ * debugging facilities
+ */
+#ifdef REG_DEBUG
+/* FDEBUG does finite-state tracing */
+#define FDEBUG(arglist) { if (v->eflags&REG_FTRACE) printf arglist; }
+/* MDEBUG does higher-level tracing */
+#define MDEBUG(arglist) { if (v->eflags&REG_MTRACE) printf arglist; }
+#else
+#define FDEBUG(arglist) {}
+#define MDEBUG(arglist) {}
+#endif
+
+/*
+ * bitmap manipulation
+ */
+#define UBITS (CHAR_BIT * sizeof(unsigned))
+#define BSET(uv, sn) ((uv)[(sn)/UBITS] |= (unsigned)1 << ((sn)%UBITS))
+#define ISBSET(uv, sn) ((uv)[(sn)/UBITS] & ((unsigned)1 << ((sn)%UBITS)))
+
+/*
+ * We dissect a chr into byts for colormap table indexing. Here we define a
+ * byt, which will be the same as a byte on most machines... The exact size of
+ * a byt is not critical, but about 8 bits is good, and extraction of 8-bit
+ * chunks is sometimes especially fast.
+ */
+
+#ifndef BYTBITS
+#define BYTBITS 8 /* bits in a byt */
+#endif
+#define BYTTAB (1<<BYTBITS) /* size of table with one entry per byt value */
+#define BYTMASK (BYTTAB-1) /* bit mask for byt */
+#define NBYTS ((CHRBITS+BYTBITS-1)/BYTBITS)
+/* the definition of GETCOLOR(), below, assumes NBYTS <= 4 */
+
+/*
+ * As soon as possible, we map chrs into equivalence classes -- "colors" --
+ * which are of much more manageable number.
+ */
+
+typedef short color; /* colors of characters */
+typedef int pcolor; /* what color promotes to */
+#define COLORLESS (-1) /* impossible color */
+#define WHITE 0 /* default color, parent of all others */
+
+/*
+ * A colormap is a tree -- more precisely, a DAG -- indexed at each level by a
+ * byt of the chr, to map the chr to a color efficiently. Because lower
+ * sections of the tree can be shared, it can exploit the usual sparseness of
+ * such a mapping table. The tree is always NBYTS levels deep (in the past it
+ * was shallower during construction but was "filled" to full depth at the end
+ * of that); areas that are unaltered as yet point to "fill blocks" which are
+ * entirely WHITE in color.
+ */
+
+/* the tree itself */
+struct colors {
+ color ccolor[BYTTAB];
+};
+struct ptrs {
+ union tree *pptr[BYTTAB];
+};
+union tree {
+ struct colors colors;
+ struct ptrs ptrs;
+};
+#define tcolor colors.ccolor
+#define tptr ptrs.pptr
+
+/* Internal per-color descriptor structure for the color machinery */
+struct colordesc {
+ uchr nchrs; /* number of chars of this color */
+ color sub; /* open subcolor (if any); free chain ptr */
+#define NOSUB COLORLESS
+ struct arc *arcs; /* color chain */
+ int flags;
+#define FREECOL 01 /* currently free */
+#define PSEUDO 02 /* pseudocolor, no real chars */
+#define UNUSEDCOLOR(cd) ((cd)->flags&FREECOL)
+ union tree *block; /* block of solid color, if any */
+};
+
+/* the color map itself */
+struct colormap {
+ int magic;
+#define CMMAGIC 0x876
+ struct vars *v; /* for compile error reporting */
+ size_t ncds; /* number of colordescs */
+ size_t max; /* highest in use */
+ color free; /* beginning of free chain (if non-0) */
+ struct colordesc *cd;
+#define CDEND(cm) (&(cm)->cd[(cm)->max + 1])
+#define NINLINECDS ((size_t)10)
+ struct colordesc cdspace[NINLINECDS];
+ union tree tree[NBYTS]; /* tree top, plus fill blocks */
+};
+
+/* optimization magic to do fast chr->color mapping */
+#define B0(c) ((c) & BYTMASK)
+#define B1(c) (((c)>>BYTBITS) & BYTMASK)
+#define B2(c) (((c)>>(2*BYTBITS)) & BYTMASK)
+#define B3(c) (((c)>>(3*BYTBITS)) & BYTMASK)
+#if NBYTS == 1
+#define GETCOLOR(cm, c) ((cm)->tree->tcolor[B0(c)])
+#endif
+/* beware, for NBYTS>1, GETCOLOR() is unsafe -- 2nd arg used repeatedly */
+#if NBYTS == 2
+#define GETCOLOR(cm, c) ((cm)->tree->tptr[B1(c)]->tcolor[B0(c)])
+#endif
+#if NBYTS == 4
+#define GETCOLOR(cm, c) ((cm)->tree->tptr[B3(c)]->tptr[B2(c)]->tptr[B1(c)]->tcolor[B0(c)])
+#endif
+
+/*
+ * Interface definitions for locale-interface functions in locale.c.
+ */
+
+/* Representation of a set of characters. */
+struct cvec {
+ int nchrs; /* number of chrs */
+ int chrspace; /* number of chrs possible */
+ chr *chrs; /* pointer to vector of chrs */
+ int nranges; /* number of ranges (chr pairs) */
+ int rangespace; /* number of chrs possible */
+ chr *ranges; /* pointer to vector of chr pairs */
+};
+
+/*
+ * definitions for non-deterministic finite autmaton (NFA) internal
+ * representation
+ *
+ * Having a "from" pointer within each arc may seem redundant, but it saves a
+ * lot of hassle.
+ */
+
+struct state;
+
+struct arc {
+ int type;
+#define ARCFREE '\0'
+ color co;
+ struct state *from; /* where it's from (and contained within) */
+ struct state *to; /* where it's to */
+ struct arc *outchain; /* *from's outs chain or free chain */
+#define freechain outchain
+ struct arc *inchain; /* *to's ins chain */
+ struct arc *colorchain; /* color's arc chain */
+ struct arc *colorchainRev; /* back-link in color's arc chain */
+};
+
+struct arcbatch { /* for bulk allocation of arcs */
+ struct arcbatch *next;
+#define ABSIZE 10
+ struct arc a[ABSIZE];
+};
+
+struct state {
+ int no;
+#define FREESTATE (-1)
+ char flag; /* marks special states */
+ int nins; /* number of inarcs */
+ struct arc *ins; /* chain of inarcs */
+ int nouts; /* number of outarcs */
+ struct arc *outs; /* chain of outarcs */
+ struct arc *free; /* chain of free arcs */
+ struct state *tmp; /* temporary for traversal algorithms */
+ struct state *next; /* chain for traversing all */
+ struct state *prev; /* back chain */
+ struct arcbatch oas; /* first arcbatch, avoid malloc in easy case */
+ int noas; /* number of arcs used in first arcbatch */
+};
+
+struct nfa {
+ struct state *pre; /* pre-initial state */
+ struct state *init; /* initial state */
+ struct state *final; /* final state */
+ struct state *post; /* post-final state */
+ int nstates; /* for numbering states */
+ struct state *states; /* state-chain header */
+ struct state *slast; /* tail of the chain */
+ struct state *free; /* free list */
+ struct colormap *cm; /* the color map */
+ color bos[2]; /* colors, if any, assigned to BOS and BOL */
+ color eos[2]; /* colors, if any, assigned to EOS and EOL */
+ size_t size; /* Current NFA size; differs from nstates as
+ * it also counts the number of states created
+ * by children of this state. */
+ struct vars *v; /* simplifies compile error reporting */
+ struct nfa *parent; /* parent NFA, if any */
+};
+
+/*
+ * definitions for compacted NFA
+ */
+
+struct carc {
+ color co; /* COLORLESS is list terminator */
+ int to; /* state number */
+};
+
+struct cnfa {
+ int nstates; /* number of states */
+ int ncolors; /* number of colors */
+ int flags;
+#define HASLACONS 01 /* uses lookahead constraints */
+ int pre; /* setup state number */
+ int post; /* teardown state number */
+ color bos[2]; /* colors, if any, assigned to BOS and BOL */
+ color eos[2]; /* colors, if any, assigned to EOS and EOL */
+ struct carc **states; /* vector of pointers to outarc lists */
+ struct carc *arcs; /* the area for the lists */
+};
+#define ZAPCNFA(cnfa) ((cnfa).nstates = 0)
+#define NULLCNFA(cnfa) ((cnfa).nstates == 0)
+
+/*
+ * Used to limit the maximum NFA size to something sane. [Bug 1810264]
+ */
+
+#ifndef REG_MAX_STATES
+# define REG_MAX_STATES 100000
+#endif
+
+/*
+ * subexpression tree
+ */
+
+struct subre {
+ char op; /* '|', '.' (concat), 'b' (backref), '(',
+ * '=' */
+ char flags;
+#define LONGER 01 /* prefers longer match */
+#define SHORTER 02 /* prefers shorter match */
+#define MIXED 04 /* mixed preference below */
+#define CAP 010 /* capturing parens below */
+#define BACKR 020 /* back reference below */
+#define INUSE 0100 /* in use in final tree */
+#define LOCAL 03 /* bits which may not propagate up */
+#define LMIX(f) ((f)<<2) /* LONGER -> MIXED */
+#define SMIX(f) ((f)<<1) /* SHORTER -> MIXED */
+#define UP(f) (((f)&~LOCAL) | (LMIX(f) & SMIX(f) & MIXED))
+#define MESSY(f) ((f)&(MIXED|CAP|BACKR))
+#define PREF(f) ((f)&LOCAL)
+#define PREF2(f1, f2) ((PREF(f1) != 0) ? PREF(f1) : PREF(f2))
+#define COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2))
+ short retry; /* index into retry memory */
+ int subno; /* subexpression number (for 'b' and '(') */
+ short min; /* min repetitions, for backref only */
+ short max; /* max repetitions, for backref only */
+ struct subre *left; /* left child, if any (also freelist chain) */
+ struct subre *right; /* right child, if any */
+ struct state *begin; /* outarcs from here... */
+ struct state *end; /* ...ending in inarcs here */
+ struct cnfa cnfa; /* compacted NFA, if any */
+ struct subre *chain; /* for bookkeeping and error cleanup */
+};
+
+/*
+ * table of function pointers for generic manipulation functions. A regex_t's
+ * re_fns points to one of these.
+ */
+
+struct fns {
+ VOID FUNCPTR(free, (regex_t *));
+};
+
+/*
+ * the insides of a regex_t, hidden behind a void *
+ */
+
+struct guts {
+ int magic;
+#define GUTSMAGIC 0xfed9
+ int cflags; /* copy of compile flags */
+ long info; /* copy of re_info */
+ size_t nsub; /* copy of re_nsub */
+ struct subre *tree;
+ struct cnfa search; /* for fast preliminary search */
+ int ntree;
+ struct colormap cmap;
+ int FUNCPTR(compare, (CONST chr *, CONST chr *, size_t));
+ struct subre *lacons; /* lookahead-constraint vector */
+ int nlacons; /* size of lacons */
+};
+
+/*
+ * Magic for allocating a variable workspace. This default version is
+ * stack-hungry.
+ */
+
+#ifndef AllocVars
+#define AllocVars(vPtr) \
+ struct vars var; \
+ register struct vars *vPtr = &var
+#endif
+#ifndef FreeVars
+#define FreeVars(vPtr) ((void) 0)
+#endif
+
+/*
+ * Local Variables:
+ * mode: c
+ * c-basic-offset: 4
+ * fill-column: 78
+ * End:
+ */