diff options
Diffstat (limited to 'src/RESearch.cxx')
| -rw-r--r-- | src/RESearch.cxx | 120 | 
1 files changed, 70 insertions, 50 deletions
| diff --git a/src/RESearch.cxx b/src/RESearch.cxx index 82f08b6e5..e0f821d31 100644 --- a/src/RESearch.cxx +++ b/src/RESearch.cxx @@ -30,6 +30,14 @@   * Modification history:   *   * $Log$ + * Revision 1.6  2001/04/29 13:32:10  nyamatongwe + * Addition of new target methods - versions of ReplaceTarget that take counted + * strings to allow for nulls, SearchInTarget and Get/SetSearchFlags to use a + * series of calls rather than a structure. + * Handling of \000 in search and replace. + * Handling of /escapes within character ranges of regular expressions. + * Some handling of bare ^ and $ regular expressions. + *   * Revision 1.5  2001/04/20 07:36:09  nyamatongwe   * Removed DEBUG code that failed to compile on GTK+.   * @@ -214,8 +222,6 @@  #include "RESearch.h" -#define EXTEND -  #define OKP     1  #define NOP     0 @@ -310,8 +316,20 @@ void RESearch::ChSetWithCase(char c, bool caseSensitive) {  	}  } -const char *RESearch::Compile(const char *pat, bool caseSensitive) { -	const char *p;               /* pattern pointer   */ +const char escapeValue(char ch) { +	switch (ch) { +	case 'a':	return '\a'; +	case 'b':	return '\b'; +	case 'f':	return '\f'; +	case 'n':	return '\n'; +	case 'r':	return '\r'; +	case 't':	return '\t'; +	case 'v':	return '\v'; +	} +	return 0; +} + +const char *RESearch::Compile(const char *pat, int length, bool caseSensitive) {  	char *mp=nfa;          /* nfa pointer       */  	char *lp;              /* saved pointer..   */  	char *sp=nfa;          /* another one..     */ @@ -323,14 +341,15 @@ const char *RESearch::Compile(const char *pat, bool caseSensitive) {  	char mask;		/* xor mask -CCL/NCL */  	int c1, c2; -	if (!pat || !*pat) +	if (!pat || !length)  		if (sta)  			return 0;  		else  			return badpat("No previous regular expression");  	sta = NOP; -	for (p = pat; *p; p++) { +	const char *p=pat;               /* pattern pointer   */ +	for (int i=0; i<length; i++, p++) {  		lp = mp;  		switch(*p) { @@ -359,34 +378,46 @@ const char *RESearch::Compile(const char *pat, bool caseSensitive) {  		case '[':               /* match char class..*/  			*mp++ = CCL; +			i++;  			if (*++p == '^') {  				mask = '\377';	 +				i++;  				p++; -			} -			else +			} else  				mask = 0; -			if (*p == '-')		/* real dash */ +			if (*p == '-') {		/* real dash */ +				i++;  				ChSet(*p++); -			if (*p == ']')		/* real brac */ +			} +			if (*p == ']') {	/* real brace */ +				i++;  				ChSet(*p++); +			}  			while (*p && *p != ']') {  				if (*p == '-' && *(p+1) && *(p+1) != ']') { +					i++;  					p++;  					c1 = *(p-2) + 1; +					i++;  					c2 = *p++;  					while (c1 <= c2) {  						ChSetWithCase(static_cast<char>(c1++), caseSensitive);  					} -				} -#ifdef EXTEND -				else if (*p == '\\' && *(p+1)) { +				} else if (*p == '\\' && *(p+1)) { +					i++;  					p++; +					char escape = escapeValue(*p); +					if (escape) +						ChSetWithCase(escape, caseSensitive); +					else +						ChSetWithCase(*p, caseSensitive); +					i++; +					p++; +				} else { +					i++;  					ChSetWithCase(*p++, caseSensitive);  				} -#endif -				else -					ChSetWithCase(*p++, caseSensitive);  			}  			if (!*p)  				return badpat("Missing ]"); @@ -430,6 +461,7 @@ const char *RESearch::Compile(const char *pat, bool caseSensitive) {  			break;  		case '\\':              /* tags, backrefs .. */ +			i++;  			switch(*++p) {  			case '(': @@ -478,36 +510,16 @@ const char *RESearch::Compile(const char *pat, bool caseSensitive) {  				else  					return badpat("Undetermined reference");  				break; -#ifdef EXTEND  			case 'a': -				*mp++ = CHR; -				*mp++ = '\a'; -				break;  			case 'b': -				*mp++ = CHR; -				*mp++ = '\b'; -				break;  			case 'n': -				*mp++ = CHR; -				*mp++ = '\n'; -				break;  			case 'f': -				*mp++ = CHR; -				*mp++ = '\f'; -				break;  			case 'r': -				*mp++ = CHR; -				*mp++ = '\r'; -				break;  			case 't': -				*mp++ = CHR; -				*mp++ = '\t'; -				break;  			case 'v':  				*mp++ = CHR; -				*mp++ = '\v'; +				*mp++ = escapeValue(*p);  				break; -#endif  			default:  				*mp++ = CHR;  				*mp++ = *p; @@ -558,7 +570,7 @@ const char *RESearch::Compile(const char *pat, bool caseSensitive) {   *   */ -int RESearch::Execute(CharacterIndexer &ci, int lp) { +int RESearch::Execute(CharacterIndexer &ci, int lp, int endp) {  	char c;  	int ep = NOTFOUND;  	char *ap = nfa; @@ -571,17 +583,25 @@ int RESearch::Execute(CharacterIndexer &ci, int lp) {  	switch(*ap) {  	case BOL:			/* anchored: match from BOL only */ -		ep = PMatch(ci, lp, ap); +		ep = PMatch(ci, lp, endp, ap);  		break; +	case EOL:			/* just searching for end of line normal path doesn't work */ +		if (*(ap+1) == END) { +			lp = endp; +			ep = lp; +			break; +		} else { +			return 0; +		}  	case CHR:			/* ordinary char: locate it fast */  		c = *(ap+1); -		while (ci.CharAt(lp) && ci.CharAt(lp) != c) +		while ((lp < endp) && (ci.CharAt(lp) != c))  			lp++; -		if (!ci.CharAt(lp))		/* if EOS, fail, else fall thru. */ +		if (lp >= endp)		/* if EOS, fail, else fall thru. */  			return 0;  	default:			/* regular matching all the way. */ -		while (ci.CharAt(lp)) { -			ep = PMatch(ci, lp, ap); +		while (lp < endp) { +			ep = PMatch(ci, lp, endp, ap);  			if (ep != NOTFOUND)  				break;  			lp++; @@ -667,7 +687,7 @@ static char chrtyp[MAXCHR] = {  #define CHRSKIP	3	/* [CLO] CHR chr END ...     */  #define CCLSKIP 18	/* [CLO] CCL 16bytes END ... */ -int RESearch::PMatch(CharacterIndexer &ci, int lp, char *ap) { +int RESearch::PMatch(CharacterIndexer &ci, int lp, int endp, char *ap) {  	int op, c, n;  	int e;		/* extra pointer for CLO */  	int bp;		/* beginning of subpat.. */ @@ -682,7 +702,7 @@ int RESearch::PMatch(CharacterIndexer &ci, int lp, char *ap) {  				return NOTFOUND;  			break;  		case ANY: -			if (!ci.CharAt(lp++)) +			if (lp++ >= endp)  				return NOTFOUND;  			break;  		case CCL: @@ -696,7 +716,7 @@ int RESearch::PMatch(CharacterIndexer &ci, int lp, char *ap) {  				return NOTFOUND;  			break;  		case EOL: -			if (ci.CharAt(lp)) +			if (lp < endp)  				return NOTFOUND;  			break;  		case BOT: @@ -726,18 +746,18 @@ int RESearch::PMatch(CharacterIndexer &ci, int lp, char *ap) {  			switch(*ap) {  			case ANY: -				while (ci.CharAt(lp)) +				while (lp < endp)  					lp++;  				n = ANYSKIP;  				break;  			case CHR:  				c = *(ap+1); -				while (ci.CharAt(lp) && c == ci.CharAt(lp)) +				while ((lp < endp) && (c == ci.CharAt(lp)))  					lp++;  				n = CHRSKIP;  				break;  			case CCL: -				while (((c = ci.CharAt(lp)) != 0) && isinset(ap+1,c)) +				while ((lp < endp) && isinset(ap+1,ci.CharAt(lp)))  					lp++;  				n = CCLSKIP;  				break; @@ -750,7 +770,7 @@ int RESearch::PMatch(CharacterIndexer &ci, int lp, char *ap) {  			ap += n;  			while (lp >= are) { -				if ((e = PMatch(ci, lp, ap)) != NOTFOUND) +				if ((e = PMatch(ci, lp, endp, ap)) != NOTFOUND)  					return e;  				--lp;  			} | 
