aboutsummaryrefslogtreecommitdiffhomepage
path: root/contrib/hsrex/regcustom.h
diff options
context:
space:
mode:
authorRobin Haberkorn <robin.haberkorn@googlemail.com>2024-09-14 19:00:01 +0200
committerRobin Haberkorn <robin.haberkorn@googlemail.com>2024-09-14 19:00:01 +0200
commitab0d97147d8c19eabc41b11698dff13cd04d67ae (patch)
treeb9f7cb00405b41fce77c5df3ac9a7ea0e2ebfc1c /contrib/hsrex/regcustom.h
parent07d9cdfd3d1462f5f19cfa1422d9b5710c9e139d (diff)
downloadsciteco-ab0d97147d8c19eabc41b11698dff13cd04d67ae.tar.gz
imported Henry Spencer's regex implementation from Tcl
Source: github.com/garyhouston/hsrex * This version should be a Thompson NFA, using backtracking only for backreferences, so it should be much safer than PCRE (GRegex). Search times should be linear and there should be no way to cause stack overflows (unless we would generate backreferences). * Importing the lib makes sure we don't add another compile-time dependency. Also, we could implement our own regcomp() which translates directly from TECO patterns. * This is still WIP and currently only works with the ASCII version. The widechar version does not define re_comp() and re_exec(). * Apparently we can't have an ASCII and widechar version at the same time, so we must build two libtool libraries and somehow mangle the names. * Ideally the widechar version will also work with UTF-8 strings. * An alternative might be to import the Gnulib regex module. How does it choose the encoding anyway? * Or we could just use Oniguruma - but this would have to be a new external library dependency.
Diffstat (limited to 'contrib/hsrex/regcustom.h')
-rw-r--r--contrib/hsrex/regcustom.h185
1 files changed, 185 insertions, 0 deletions
diff --git a/contrib/hsrex/regcustom.h b/contrib/hsrex/regcustom.h
new file mode 100644
index 0000000..c341c23
--- /dev/null
+++ b/contrib/hsrex/regcustom.h
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
+ *
+ * Development of this software was funded, in part, by Cray Research Inc.,
+ * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
+ * Corporation, none of whom are responsible for the results. The author
+ * thanks all of them.
+ *
+ * Redistribution and use in source and binary forms - with or without
+ * modification - are permitted for any purpose, provided that redistributions
+ * in source form retain this entire copyright notice and indicate the origin
+ * and nature of any modifications.
+ *
+ * I'd appreciate being given credit for this package in the documentation of
+ * software which uses it, but that is not a requirement.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Headers if any.
+ */
+
+#ifdef REGEX_STANDALONE
+# include "regalone.h"
+#else
+# include "tclInt.h"
+#endif
+
+/*
+ * Overrides for regguts.h definitions, if any.
+ */
+
+#define FUNCPTR(name, args) (*name)args
+#ifndef REGEX_STANDALONE
+#define MALLOC(n) ckalloc(n)
+#define FREE(p) ckfree(VS(p))
+#define REALLOC(p,n) ckrealloc(VS(p),n)
+#endif
+
+/*
+ * Do not insert extras between the "begin" and "end" lines - this chunk is
+ * automatically extracted to be fitted into regex.h.
+ */
+
+/* --- begin --- */
+/* Ensure certain things don't sneak in from system headers. */
+#ifdef __REG_WIDE_T
+#undef __REG_WIDE_T
+#endif
+#ifdef __REG_WIDE_COMPILE
+#undef __REG_WIDE_COMPILE
+#endif
+#ifdef __REG_WIDE_EXEC
+#undef __REG_WIDE_EXEC
+#endif
+#ifdef __REG_REGOFF_T
+#undef __REG_REGOFF_T
+#endif
+#ifdef __REG_VOID_T
+#undef __REG_VOID_T
+#endif
+#ifdef __REG_CONST
+#undef __REG_CONST
+#endif
+#ifdef __REG_NOFRONT
+#undef __REG_NOFRONT
+#endif
+#ifdef __REG_NOCHAR
+#undef __REG_NOCHAR
+#endif
+/* Interface types */
+#define __REG_WIDE_T Tcl_UniChar
+#define __REG_REGOFF_T long /* Not really right, but good enough... */
+#define __REG_VOID_T void
+#define __REG_CONST const
+/* Names and declarations */
+#define __REG_WIDE_COMPILE TclReComp
+#define __REG_WIDE_EXEC TclReExec
+#define __REG_NOFRONT /* Don't want regcomp() and regexec() */
+#define __REG_NOCHAR /* Or the char versions */
+#define regfree TclReFree
+#define regerror TclReError
+/* --- end --- */
+
+/*
+ * Internal character type and related.
+ */
+
+#ifndef REGEX_STANDALONE
+typedef Tcl_UniChar chr; /* The type itself. */
+#endif
+typedef int pchr; /* What it promotes to. */
+typedef unsigned uchr; /* Unsigned type that will hold a chr. */
+typedef int celt; /* Type to hold chr, or NOCELT */
+#define NOCELT (-1) /* Celt value which is not valid chr */
+#define CHR(c) (UCHAR(c)) /* Turn char literal into chr literal */
+#define DIGITVAL(c) ((c)-'0') /* Turn chr digit into its value */
+#if TCL_UTF_MAX > 3
+#define CHRBITS 32 /* Bits in a chr; must not use sizeof */
+#define CHR_MIN 0x00000000 /* Smallest and largest chr; the value */
+#define CHR_MAX 0xffffffff /* CHR_MAX-CHR_MIN+1 should fit in uchr */
+#elif defined(REGEX_STANDALONE) && ! defined(REGEX_WCHAR)
+# define CHRBITS 8
+# define CHR_MIN 0x00
+# define CHR_MAX 0xff
+#else
+#define CHRBITS 16 /* Bits in a chr; must not use sizeof */
+#define CHR_MIN 0x0000 /* Smallest and largest chr; the value */
+#define CHR_MAX 0xffff /* CHR_MAX-CHR_MIN+1 should fit in uchr */
+#endif
+
+/*
+ * Functions operating on chr.
+ */
+
+#define iscalnum(x) Tcl_UniCharIsAlnum(x)
+#define iscalpha(x) Tcl_UniCharIsAlpha(x)
+#define iscdigit(x) Tcl_UniCharIsDigit(x)
+#define iscspace(x) Tcl_UniCharIsSpace(x)
+
+/*
+ * Name the external functions.
+ */
+
+#ifdef REGEX_STANDALONE
+# ifdef REGEX_WCHAR
+# define compile re_wcomp
+# define exec re_wexec
+# define __REG_NOCHAR
+# else
+# define compile re_comp
+# define exec re_exec
+# undef __REG_NOCHAR
+# endif
+#else
+#define compile TclReComp
+#define exec TclReExec
+#endif
+
+/*
+& Enable/disable debugging code (by whether REG_DEBUG is defined or not).
+*/
+
+#if 0 /* No debug unless requested by makefile. */
+#define REG_DEBUG /* */
+#endif
+
+
+#ifndef REGEX_STANDALONE
+/*
+ * Method of allocating a local workspace. We used a thread-specific data
+ * space to store this because the regular expression engine is never
+ * reentered from the same thread; it doesn't make any callbacks.
+ */
+#define AllocVars(vPtr) \
+ static Tcl_ThreadDataKey varsKey; \
+ register struct vars *vPtr = (struct vars *) \
+ Tcl_GetThreadData(&varsKey, sizeof(struct vars))
+#elif 0
+/*
+ * This strategy for allocating workspace is "more proper" in some sense, but
+ * quite a bit slower. Using TSD (as above) leads to code that is quite a bit
+ * faster in practice (measured!)
+ */
+#define AllocVars(vPtr) \
+ register struct vars *vPtr = (struct vars *) MALLOC(sizeof(struct vars))
+#define FreeVars(vPtr) \
+ FREE(vPtr)
+#endif
+
+/*
+ * And pick up the standard header.
+ */
+
+#include "regex.h"