aboutsummaryrefslogtreecommitdiff
path: root/regtest_terex.sh
diff options
context:
space:
mode:
Diffstat (limited to 'regtest_terex.sh')
-rwxr-xr-xregtest_terex.sh311
1 files changed, 311 insertions, 0 deletions
diff --git a/regtest_terex.sh b/regtest_terex.sh
new file mode 100755
index 0000000..ca499d8
--- /dev/null
+++ b/regtest_terex.sh
@@ -0,0 +1,311 @@
+#!/bin/sh
+#
+# Copyright (c) 2002, Stooges & Cueless CO., All rights reserved.
+#
+# Module:
+# @(#)regtest_terex.sh 1.0 (Stooges & Clueless) 04/xx/02
+# Purpose:
+# @(#)Perform regression test to Henry Spencer's RE libary.
+# Author:
+# Walter Waldo
+# History:
+# 04/xx/02 (ww) Version 1.0
+# 06/xx/26 (rh) Adaptions for terex
+#
+#set -x
+
+H=$HOME
+me=`basename $0`
+rgsrc=regtest_terex.c
+rgbin=regtest_terex
+datsrc=regtest_data.c
+datbin=regtest_data
+CC=gcc
+
+
+cleanup_and_exit()
+{
+ rm -f $rgsrc $rgbin $datsrc $datbin
+ exit 1
+}
+
+
+usage ()
+{
+test X$1 != X && printf "$me: $1\n"
+cat << EOF
+
+$me: Perform regression test to Henry Spencer's RE libary.
+Usage: $me [-h]
+
+Options:
+
+-h This stuff.
+
+EOF
+test X$1 != X && exit 1 || exit 0
+}
+
+. feedback_defs.sh
+
+while getopts h FLAG; do
+ case $FLAG in
+ h) usage
+ ;;
+ ?) quit "Unknown option"
+ ;;
+ esac
+done
+
+
+. regtest_util.sh
+trap "trap '' 0; cleanup_and_exit 1" HUP INT QUIT PIPE TERM
+trap "cleanup_and_exit $?" 0
+
+
+printf "\nTesting Henry Spencer's REs\n\n"
+#-----------------------------------
+# Invocation:
+# $rgbin <number_of_groups> <RE> <string2scan>
+#
+cat<<-EOF>$rgsrc
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
+ #include "regalone.h"
+ #include "regex.h"
+ #ifdef REGEX_UTF8
+ # define re_comp re_ucomp
+ # define re_exec re_uexec
+ #endif
+ size_t hexescapes2bin(unsigned char *t, char *src, size_t mxlen)
+ {
+ char *s, *xs;
+ size_t len;
+ s = xs = src;
+ len = 0;
+ while ( (s = strstr(s, "\\\x")) )
+ {
+ int cbin;
+ sscanf(&s[2], "%2x", &cbin);
+ memcpy(&t[len], xs, (size_t ) (s-xs));
+ len += (size_t ) (s-xs);
+ t[len++] = cbin;
+ s += 4;
+ xs = s;
+ }
+ strcpy((char *)&t[len], xs);
+ len += strlen(xs);
+ return len;
+ }
+ int main(int argc, char *argv[])
+ {
+ unsigned char re[1024*4], dat[1024*8];
+ size_t relen, datlen;
+ regex_t cre;
+ regmatch_t pmatch[100];
+ int cflags, nmatch, rc;
+ char buf[1024*2];
+
+ //memset(&cre, '\0', sizeof(cre));
+ nmatch = atoi(argv[1]);
+ relen = hexescapes2bin(re, argv[2], sizeof(re)/sizeof(char));
+ datlen = hexescapes2bin(dat, argv[3], sizeof(dat)/sizeof(char));
+ cflags = REG_ADVANCED | (nmatch ? 0 : REG_NOSUB);
+ rc = re_comp(&cre, re, relen, cflags);
+ if ( rc != REG_OKAY )
+ {
+ regerror(rc, &cre, buf, sizeof(buf));
+ fprintf(stderr, "Compile error. %s\n", buf);
+ return 1;
+ }
+ if ( nmatch >= 0 && cre.re_nsub != nmatch )
+ {
+ fprintf(stderr,
+ "Mismatch on number of group patterns. "
+ "Expected %d, compiled %zu\n",
+ nmatch, cre.re_nsub);
+ return 1;
+ }
+ rc = re_exec(&cre, dat, datlen, NULL, 100, pmatch, 0);
+ if ( rc != REG_OKAY )
+ {
+ regerror(rc, &cre, buf, sizeof(buf));
+ fprintf(stderr, "Execution error. %s\n", buf);
+ return 1;
+ }
+ if ( cre.re_nsub )
+ {
+ int i;
+
+ buf[0] = '\0';
+ for ( i=1; i<cre.re_nsub+1 && pmatch[i].rm_so>=0; i++ )
+ sprintf(&buf[strlen(buf)], "%s%.*s",
+ i>1 ? ":" : "",
+ (int)(pmatch[i].rm_eo-pmatch[i].rm_so),
+ argv[3]+pmatch[i].rm_so);
+ printf("%s\n", buf);
+ }
+ regfree(&cre);
+ return 0;
+ }
+EOF
+PATH=.:$PATH
+LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH
+export PATH LD_LIBRARY_PATH
+# Either this one
+#$CC -Wall -g -O0 -I. -I$H/inc -L. -lterex -o $rgbin $rgsrc # Test ascii ch
+# Or this one
+$CC -Wall -g -O0 -I. -I$H/inc -L. -lteurex -DREGEX_UTF8 -o $rgbin $rgsrc # Test wide ch
+#-----------------------------------
+resp=`$rgbin 0 "clavo" "Pablito clavo un clavito" 2>&1`
+msg="Simple match"
+test -z "$resp" && f_ok "$msg" || f_no "$msg" "$resp"
+#-----------------------------------
+resp=`$rgbin 0 \
+ "(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])" \
+ "1960-10-12" 2>&1`
+msg="yyyy-mm-dd between 1900-01-01 and 2099-12-31"
+test -z "$resp" && f_ok "$msg" || f_no "$msg" "$resp"
+#-----------------------------------
+resp=`$rgbin 0 \
+ "(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])" \
+ "El arzobispo 1960-14-12 de Constantinopla" 2>&1`
+msg="yyyy-mm-dd out of 1900-01-01 and 2099-12-31"
+if echo "$resp"|grep "failed to match">/dev/null;
+then f_ok "$msg"; else f_no "$msg" "$resp"; fi
+#-----------------------------------
+resp=`$rgbin 0 "^([1-9]|[1-9][0-9]|[1-9][0-9][0-9])$" "432" 2>&1`
+msg="1..999"
+test -z "$resp" && f_ok "$msg" || f_no "$msg" "$resp"
+#-----------------------------------
+resp=`$rgbin 0 "^([1-9]|[1-9][0-9]|[1-9][0-9][0-9])$" " 4321" 2>&1`
+msg="Bad 1..999"
+if echo "$resp"|grep "failed to match">/dev/null;
+then f_ok "$msg"; else f_no "$msg" "$resp"; fi
+#-----------------------------------
+resp=`$rgbin 0 "word1\W+(?:\w+\W+){1,3}?word2" \
+ "word1 clavo un clavito word2" 2>&1`
+msg="Quantifier: One to three words between 'word1' and 'word2'"
+test -z "$resp" && f_ok "$msg" || f_no "$msg" "$resp"
+#-----------------------------------
+resp=`$rgbin 0 "a?a?a?a?a?aaaaaaaaaaaaaaa" \
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 2>&1`
+msg="Pathological: a?^6a^15 against aaaaaaaaaaaaaaaaaa..."
+test -z "$resp" && f_ok "$msg" || f_no "$msg" "$resp"
+#-----------------------------------
+resp=`$rgbin 0 "(a|aa)*b" \
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab" 2>&1`
+msg="Pathological: (a|aa)*b against aaaaaaaaaaaaaaaaaa...b"
+test -z "$resp" && f_ok "$msg" || f_no "$msg" "$resp"
+#-----------------------------------
+cat<<-EOF>$datsrc
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <ctype.h>
+ #ifdef WIN32
+ # include <process.h>
+ # define getpid _getpid
+ #elif defined(unix) || defined(__unix__)
+ # include <unistd.h>
+ #else
+ # error unknown platform
+ #endif
+ char nums[] = "0123456789";
+ char alph[] = "abcdefghijklmnopqrstuvwxyz";
+ int main(int argc, char *argv[])
+ {
+ char dat[16], *arr;
+ int arrsz, datsz, i;
+
+ if ( isdigit(argv[1][0]) )
+ { arr = nums; arrsz = sizeof(nums)-1; }
+ else if ( isalpha(argv[1][0]) )
+ { arr = alph; arrsz = sizeof(alph)-1; }
+ srand(getpid());
+ datsz = rand()%13+1;
+ for ( i=0; i<datsz; i++ ) dat[i] = arr[ rand()%arrsz ];
+ dat[datsz] = '\0';
+ printf("%s\n", dat);
+ return 0;
+ }
+EOF
+$CC -o $datbin $datsrc
+#-----------------------------------
+i=0
+totre="[a-zA-Z]+"
+totdat=`$datbin a`
+while test $i -lt 5; do
+ num=`$datbin 0`
+ alph=`$datbin a`
+ totre=$totre"([0-9]+)[a-zA-Z]+"
+ totdat=$totdat$num$alph
+ test $i -eq 0 && expectedresp=$num || expectedresp=$expectedresp:$num
+ i=`expr $i + 1`
+done
+resp=`$rgbin 5 "$totre" "$totdat" 2>&1`
+msg="5 group patterns taken with bracket ranges"
+test "$resp" = "$expectedresp" && f_ok "$msg" || f_no "$msg" "$resp"
+#-----------------------------------
+i=0
+totre="[a-zA-Z]+"
+totdat=`$datbin a`
+while test $i -lt 10; do
+ num=`$datbin 0`
+ alph=`$datbin a`
+ totre=$totre"([0-9]+)[a-zA-Z]+"
+ totdat=$totdat$num$alph
+ test $i -eq 0 && expectedresp=$num || expectedresp=$expectedresp:$num
+ i=`expr $i + 1`
+done
+resp=`$rgbin 10 "$totre" "$totdat" 2>&1`
+msg="10 group patterns taken with bracket ranges"
+test "$resp" = "$expectedresp" && f_ok "$msg" || f_no "$msg" "$resp"
+#-----------------------------------
+i=0
+totre="[a-zA-Z]+"
+totdat=`$datbin a`
+while test $i -lt 99; do
+ num=`$datbin 0`
+ alph=`$datbin a`
+ totre=$totre"([0-9]+)[a-zA-Z]+"
+ totdat=$totdat$num$alph
+ test $i -eq 0 && expectedresp=$num || expectedresp=$expectedresp:$num
+ i=`expr $i + 1`
+done
+resp=`$rgbin 99 "$totre" "$totdat" 2>&1`
+msg="99 group patterns taken with bracket ranges"
+test "$resp" = "$expectedresp" && f_ok "$msg" || f_no "$msg" "$resp"
+#-----------------------------------
+i=0
+totre="[[:alpha:]]+"
+totdat=`$datbin a`
+while test $i -lt 99; do
+ num=`$datbin 0`
+ alph=`$datbin a`
+ totre=$totre"([[:digit:]]+)[[:alpha:]]+"
+ totdat=$totdat$num$alph
+ test $i -eq 0 && expectedresp=$num || expectedresp=$expectedresp:$num
+ i=`expr $i + 1`
+done
+resp=`$rgbin 99 "$totre" "$totdat" 2>&1`
+msg="99 group patterns taken with character classes"
+test "$resp" = "$expectedresp" && f_ok "$msg" || f_no "$msg" "$resp"
+#-----------------------------------
+resp=`$rgbin 0 "clavo" "Pablito\00clavo un clavito" 2>&1`
+msg="Binary data"
+test -z "$resp" && f_ok "$msg" || f_no "$msg" "$resp"
+#-----------------------------------
+resp=`$rgbin 0 "cl\xFFavo" "Pablito\x00cl\xFFavo un clavito" 2>&1`
+msg="Binary RE and data"
+test -z "$resp" && f_ok "$msg" || f_no "$msg" "$resp"
+#-----------------------------------
+resp=`$rgbin 1 "(?i)(clavo)" "Pablito ClAvO un clavito" 2>&1`
+msg="One group pattern with case-insensitive matching"
+test "$resp" = "ClAvO" && f_ok "$msg" || f_no "$msg" "$resp"
+#-----------------------------------
+# Will only work if REGEX_UTF8
+resp=`$rgbin 1 '([[:alpha:]]+)' 'абвгд' 2>&1`
+msg="Unicode character class"
+test "$resp" = "абвгд" && f_ok "$msg" || f_no "$msg" "$resp"
+#-----------------------------------