diff options
Diffstat (limited to 'regtest_hsrex.sh')
| -rwxr-xr-x | regtest_hsrex.sh | 66 |
1 files changed, 31 insertions, 35 deletions
diff --git a/regtest_hsrex.sh b/regtest_hsrex.sh index 0950c04..566a9f3 100755 --- a/regtest_hsrex.sh +++ b/regtest_hsrex.sh @@ -11,6 +11,8 @@ # History: # 04/xx/02 (ww) Version 1.0 # +#set -x + H=$HOME me=`basename $0` rgsrc=regtest_hsrex.c @@ -71,45 +73,33 @@ cat<<-EOF>$rgsrc #include <string.h> #include "regalone.h" #include "regex.h" - #ifdef REGEX_WCHAR - # define chr wchar_t - # define re_comp re_wcomp - # define re_exec re_wexec - #else - # define chr char + #ifdef REGEX_UTF8 + # define re_comp re_ucomp + # define re_exec re_uexec #endif - size_t hexescapes2bin(chr *t, char *src, size_t mxlen) + size_t hexescapes2bin(unsigned char *t, char *src, size_t mxlen) { char *s, *xs; size_t len; s = xs = src; len = 0; - while ( s = strstr(s, "\\\x") ) + while ( (s = strstr(s, "\\\x")) ) { int cbin; sscanf(&s[2], "%2x", &cbin); - # ifdef REGEX_WCHAR - *s = '\0'; - len += mbstowcs(&t[len], xs, mxlen-len); - # else - memcpy(&t[len], xs, (size_t ) (s-xs)); - len += (size_t ) (s-xs); - # endif + memcpy(&t[len], xs, (size_t ) (s-xs)); + len += (size_t ) (s-xs); t[len++] = cbin; s += 4; xs = s; } - # ifdef REGEX_WCHAR - len += mbstowcs(&t[len], xs, mxlen-len); - # else - strcpy(&t[len], xs); - len += strlen(xs); - # endif + strcpy((char *)&t[len], xs); + len += strlen(xs); return len; } - main(int argc, char *argv[]) + int main(int argc, char *argv[]) { - chr re[1024*4], dat[1024*8]; + unsigned char re[1024*4], dat[1024*8]; size_t relen, datlen; regex_t cre; regmatch_t pmatch[100]; @@ -118,30 +108,30 @@ cat<<-EOF>$rgsrc //memset(&cre, '\0', sizeof(cre)); nmatch = atoi(argv[1]); - relen = hexescapes2bin(re, argv[2], sizeof(re)/sizeof(chr)); - datlen = hexescapes2bin(dat, argv[3], sizeof(dat)/sizeof(chr)); + relen = hexescapes2bin(re, argv[2], sizeof(re)/sizeof(char)); + datlen = hexescapes2bin(dat, argv[3], sizeof(dat)/sizeof(char)); cflags = REG_ADVANCED | (nmatch ? 0 : REG_NOSUB); rc = re_comp(&cre, re, relen, cflags); if ( rc != REG_OKAY ) { regerror(rc, &cre, buf, sizeof(buf)); fprintf(stderr, "Compile error. %s\n", buf); - exit(1); + return 1; } if ( nmatch >= 0 && cre.re_nsub != nmatch ) { fprintf(stderr, - "Mismatch on number of group patterns. ", - "Expected %d, compiled %d\n", + "Mismatch on number of group patterns. " + "Expected %d, compiled %zu\n", nmatch, cre.re_nsub); - exit(1); + return 1; } rc = re_exec(&cre, dat, datlen, NULL, 100, pmatch, 0); if ( rc != REG_OKAY ) { regerror(rc, &cre, buf, sizeof(buf)); fprintf(stderr, "Execution error. %s\n", buf); - exit(1); + return 1; } if ( cre.re_nsub ) { @@ -151,21 +141,21 @@ cat<<-EOF>$rgsrc for ( i=1; i<cre.re_nsub+1 && pmatch[i].rm_so>=0; i++ ) sprintf(&buf[strlen(buf)], "%s%.*s", i>1 ? ":" : "", - pmatch[i].rm_eo-pmatch[i].rm_so, + (int)(pmatch[i].rm_eo-pmatch[i].rm_so), argv[3]+pmatch[i].rm_so); printf("%s\n", buf); } regfree(&cre); - exit(0); + return 0; } EOF PATH=.:$PATH LD_LIBRARY_PATH=.:$LD_LIBRARY_PATH export PATH LD_LIBRARY_PATH # Either this one -$CC -I. -I$H/inc -L. -lhsrex -o $rgbin $rgsrc # Test ascii ch +#$CC -Wall -g -O0 -I. -I$H/inc -L. -lhsrex -o $rgbin $rgsrc # Test ascii ch # Or this one -#$CC -I. -I$H/inc -L. -lhswrex -DREGEX_WCHAR -o $rgbin $rgsrc # Test wide ch +$CC -Wall -g -O0 -I. -I$H/inc -L. -lhsurex -DREGEX_UTF8 -o $rgbin $rgsrc # Test wide ch #----------------------------------- resp=`$rgbin 0 "clavo" "Pablito clavo un clavito" 2>&1` msg="Simple match" @@ -222,7 +212,7 @@ cat<<-EOF>$datsrc #endif char nums[] = "0123456789"; char alph[] = "abcdefghijklmnopqrstuvwxyz"; - main(int argc, char *argv[]) + int main(int argc, char *argv[]) { char dat[16], *arr; int arrsz, datsz, i; @@ -236,6 +226,7 @@ cat<<-EOF>$datsrc for ( i=0; i<datsz; i++ ) dat[i] = arr[ rand()%arrsz ]; dat[datsz] = '\0'; printf("%s\n", dat); + return 0; } EOF $CC -o $datbin $datsrc @@ -312,3 +303,8 @@ resp=`$rgbin 1 "(?i)(clavo)" "Pablito ClAvO un clavito" 2>&1` msg="One group pattern with case-insensitive matching" test "$resp" = "ClAvO" && f_ok "$msg" || f_no "$msg" "$resp" #----------------------------------- +# Will only work if REGEX_UTF8 +resp=`$rgbin 1 '([[:alpha:]]+)' 'абвгд' 2>&1` +msg="Unicode character class" +test "$resp" = "абвгд" && f_ok "$msg" || f_no "$msg" "$resp" +#----------------------------------- |
