diff options
Diffstat (limited to 'src/RESearch.cxx')
-rw-r--r-- | src/RESearch.cxx | 120 |
1 files changed, 70 insertions, 50 deletions
diff --git a/src/RESearch.cxx b/src/RESearch.cxx index 82f08b6e5..e0f821d31 100644 --- a/src/RESearch.cxx +++ b/src/RESearch.cxx @@ -30,6 +30,14 @@ * Modification history: * * $Log$ + * Revision 1.6 2001/04/29 13:32:10 nyamatongwe + * Addition of new target methods - versions of ReplaceTarget that take counted + * strings to allow for nulls, SearchInTarget and Get/SetSearchFlags to use a + * series of calls rather than a structure. + * Handling of \000 in search and replace. + * Handling of /escapes within character ranges of regular expressions. + * Some handling of bare ^ and $ regular expressions. + * * Revision 1.5 2001/04/20 07:36:09 nyamatongwe * Removed DEBUG code that failed to compile on GTK+. * @@ -214,8 +222,6 @@ #include "RESearch.h" -#define EXTEND - #define OKP 1 #define NOP 0 @@ -310,8 +316,20 @@ void RESearch::ChSetWithCase(char c, bool caseSensitive) { } } -const char *RESearch::Compile(const char *pat, bool caseSensitive) { - const char *p; /* pattern pointer */ +const char escapeValue(char ch) { + switch (ch) { + case 'a': return '\a'; + case 'b': return '\b'; + case 'f': return '\f'; + case 'n': return '\n'; + case 'r': return '\r'; + case 't': return '\t'; + case 'v': return '\v'; + } + return 0; +} + +const char *RESearch::Compile(const char *pat, int length, bool caseSensitive) { char *mp=nfa; /* nfa pointer */ char *lp; /* saved pointer.. */ char *sp=nfa; /* another one.. */ @@ -323,14 +341,15 @@ const char *RESearch::Compile(const char *pat, bool caseSensitive) { char mask; /* xor mask -CCL/NCL */ int c1, c2; - if (!pat || !*pat) + if (!pat || !length) if (sta) return 0; else return badpat("No previous regular expression"); sta = NOP; - for (p = pat; *p; p++) { + const char *p=pat; /* pattern pointer */ + for (int i=0; i<length; i++, p++) { lp = mp; switch(*p) { @@ -359,34 +378,46 @@ const char *RESearch::Compile(const char *pat, bool caseSensitive) { case '[': /* match char class..*/ *mp++ = CCL; + i++; if (*++p == '^') { mask = '\377'; + i++; p++; - } - else + } else mask = 0; - if (*p == '-') /* real dash */ + if (*p == '-') { /* real dash */ + i++; ChSet(*p++); - if (*p == ']') /* real brac */ + } + if (*p == ']') { /* real brace */ + i++; ChSet(*p++); + } while (*p && *p != ']') { if (*p == '-' && *(p+1) && *(p+1) != ']') { + i++; p++; c1 = *(p-2) + 1; + i++; c2 = *p++; while (c1 <= c2) { ChSetWithCase(static_cast<char>(c1++), caseSensitive); } - } -#ifdef EXTEND - else if (*p == '\\' && *(p+1)) { + } else if (*p == '\\' && *(p+1)) { + i++; p++; + char escape = escapeValue(*p); + if (escape) + ChSetWithCase(escape, caseSensitive); + else + ChSetWithCase(*p, caseSensitive); + i++; + p++; + } else { + i++; ChSetWithCase(*p++, caseSensitive); } -#endif - else - ChSetWithCase(*p++, caseSensitive); } if (!*p) return badpat("Missing ]"); @@ -430,6 +461,7 @@ const char *RESearch::Compile(const char *pat, bool caseSensitive) { break; case '\\': /* tags, backrefs .. */ + i++; switch(*++p) { case '(': @@ -478,36 +510,16 @@ const char *RESearch::Compile(const char *pat, bool caseSensitive) { else return badpat("Undetermined reference"); break; -#ifdef EXTEND case 'a': - *mp++ = CHR; - *mp++ = '\a'; - break; case 'b': - *mp++ = CHR; - *mp++ = '\b'; - break; case 'n': - *mp++ = CHR; - *mp++ = '\n'; - break; case 'f': - *mp++ = CHR; - *mp++ = '\f'; - break; case 'r': - *mp++ = CHR; - *mp++ = '\r'; - break; case 't': - *mp++ = CHR; - *mp++ = '\t'; - break; case 'v': *mp++ = CHR; - *mp++ = '\v'; + *mp++ = escapeValue(*p); break; -#endif default: *mp++ = CHR; *mp++ = *p; @@ -558,7 +570,7 @@ const char *RESearch::Compile(const char *pat, bool caseSensitive) { * */ -int RESearch::Execute(CharacterIndexer &ci, int lp) { +int RESearch::Execute(CharacterIndexer &ci, int lp, int endp) { char c; int ep = NOTFOUND; char *ap = nfa; @@ -571,17 +583,25 @@ int RESearch::Execute(CharacterIndexer &ci, int lp) { switch(*ap) { case BOL: /* anchored: match from BOL only */ - ep = PMatch(ci, lp, ap); + ep = PMatch(ci, lp, endp, ap); break; + case EOL: /* just searching for end of line normal path doesn't work */ + if (*(ap+1) == END) { + lp = endp; + ep = lp; + break; + } else { + return 0; + } case CHR: /* ordinary char: locate it fast */ c = *(ap+1); - while (ci.CharAt(lp) && ci.CharAt(lp) != c) + while ((lp < endp) && (ci.CharAt(lp) != c)) lp++; - if (!ci.CharAt(lp)) /* if EOS, fail, else fall thru. */ + if (lp >= endp) /* if EOS, fail, else fall thru. */ return 0; default: /* regular matching all the way. */ - while (ci.CharAt(lp)) { - ep = PMatch(ci, lp, ap); + while (lp < endp) { + ep = PMatch(ci, lp, endp, ap); if (ep != NOTFOUND) break; lp++; @@ -667,7 +687,7 @@ static char chrtyp[MAXCHR] = { #define CHRSKIP 3 /* [CLO] CHR chr END ... */ #define CCLSKIP 18 /* [CLO] CCL 16bytes END ... */ -int RESearch::PMatch(CharacterIndexer &ci, int lp, char *ap) { +int RESearch::PMatch(CharacterIndexer &ci, int lp, int endp, char *ap) { int op, c, n; int e; /* extra pointer for CLO */ int bp; /* beginning of subpat.. */ @@ -682,7 +702,7 @@ int RESearch::PMatch(CharacterIndexer &ci, int lp, char *ap) { return NOTFOUND; break; case ANY: - if (!ci.CharAt(lp++)) + if (lp++ >= endp) return NOTFOUND; break; case CCL: @@ -696,7 +716,7 @@ int RESearch::PMatch(CharacterIndexer &ci, int lp, char *ap) { return NOTFOUND; break; case EOL: - if (ci.CharAt(lp)) + if (lp < endp) return NOTFOUND; break; case BOT: @@ -726,18 +746,18 @@ int RESearch::PMatch(CharacterIndexer &ci, int lp, char *ap) { switch(*ap) { case ANY: - while (ci.CharAt(lp)) + while (lp < endp) lp++; n = ANYSKIP; break; case CHR: c = *(ap+1); - while (ci.CharAt(lp) && c == ci.CharAt(lp)) + while ((lp < endp) && (c == ci.CharAt(lp))) lp++; n = CHRSKIP; break; case CCL: - while (((c = ci.CharAt(lp)) != 0) && isinset(ap+1,c)) + while ((lp < endp) && isinset(ap+1,ci.CharAt(lp))) lp++; n = CCLSKIP; break; @@ -750,7 +770,7 @@ int RESearch::PMatch(CharacterIndexer &ci, int lp, char *ap) { ap += n; while (lp >= are) { - if ((e = PMatch(ci, lp, ap)) != NOTFOUND) + if ((e = PMatch(ci, lp, endp, ap)) != NOTFOUND) return e; --lp; } |