diff options
Diffstat (limited to 'src/Document.cxx')
| -rw-r--r-- | src/Document.cxx | 422 | 
1 files changed, 227 insertions, 195 deletions
| diff --git a/src/Document.cxx b/src/Document.cxx index dba827c8d..bded3a32a 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -73,8 +73,7 @@ Document::Document() {  	lenWatchers = 0;  	matchesValid = false; -	pre = 0; -	substituted = 0; +	regex = 0;  }  Document::~Document() { @@ -84,10 +83,8 @@ Document::~Document() {  	delete []watchers;  	watchers = 0;  	lenWatchers = 0; -	delete pre; -	pre = 0; -	delete []substituted; -	substituted = 0; +	delete regex; +	regex = 0;  }  // Increase reference count and return its previous value. @@ -1015,123 +1012,18 @@ static inline char MakeLowerCase(char ch) {  		return static_cast<char>(ch - 'A' + 'a');  } -// Define a way for the Regular Expression code to access the document -class DocumentIndexer : public CharacterIndexer { -	Document *pdoc; -	int end; -public: -	DocumentIndexer(Document *pdoc_, int end_) : -		pdoc(pdoc_), end(end_) { -	} - -	virtual ~DocumentIndexer() { -	} - -	virtual char CharAt(int index) { -		if (index < 0 || index >= end) -			return 0; -		else -			return pdoc->CharAt(index); -	} -}; -  /**   * Find text in document, supporting both forward and backward   * searches (just pass minPos > maxPos to do a backward search)   * Has not been tested with backwards DBCS searches yet.   */  long Document::FindText(int minPos, int maxPos, const char *s, -                        bool caseSensitive, bool word, bool wordStart, bool regExp, bool posix, +                        bool caseSensitive, bool word, bool wordStart, bool regExp, int flags,                          int *length) {  	if (regExp) { -		if (!pre) -			pre = new RESearch(&charClass); -		if (!pre) -			return -1; - -		int increment = (minPos <= maxPos) ? 1 : -1; - -		int startPos = minPos; -		int endPos = maxPos; - -		// Range endpoints should not be inside DBCS characters, but just in case, move them. -		startPos = MovePositionOutsideChar(startPos, 1, false); -		endPos = MovePositionOutsideChar(endPos, 1, false); - -		const char *errmsg = pre->Compile(s, *length, caseSensitive, posix); -		if (errmsg) { -			return -1; -		} -		// Find a variable in a property file: \$(\([A-Za-z0-9_.]+\)) -		// Replace first '.' with '-' in each property file variable reference: -		//     Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\)) -		//     Replace: $(\1-\2) -		int lineRangeStart = LineFromPosition(startPos); -		int lineRangeEnd = LineFromPosition(endPos); -		if ((increment == 1) && -			(startPos >= LineEnd(lineRangeStart)) && -			(lineRangeStart < lineRangeEnd)) { -			// the start position is at end of line or between line end characters. -			lineRangeStart++; -			startPos = LineStart(lineRangeStart); -		} -		int pos = -1; -		int lenRet = 0; -		char searchEnd = s[*length - 1]; -		int lineRangeBreak = lineRangeEnd + increment; -		for (int line = lineRangeStart; line != lineRangeBreak; line += increment) { -			int startOfLine = LineStart(line); -			int endOfLine = LineEnd(line); -			if (increment == 1) { -				if (line == lineRangeStart) { -					if ((startPos != startOfLine) && (s[0] == '^')) -						continue;	// Can't match start of line if start position after start of line -					startOfLine = startPos; -				} -				if (line == lineRangeEnd) { -					if ((endPos != endOfLine) && (searchEnd == '$')) -						continue;	// Can't match end of line if end position before end of line -					endOfLine = endPos; -				} -			} else { -				if (line == lineRangeEnd) { -					if ((endPos != startOfLine) && (s[0] == '^')) -						continue;	// Can't match start of line if end position after start of line -					startOfLine = endPos; -				} -				if (line == lineRangeStart) { -					if ((startPos != endOfLine) && (searchEnd == '$')) -						continue;	// Can't match end of line if start position before end of line -					endOfLine = startPos; -				} -			} - -			DocumentIndexer di(this, endOfLine); -			int success = pre->Execute(di, startOfLine, endOfLine); -			if (success) { -				pos = pre->bopat[0]; -				lenRet = pre->eopat[0] - pre->bopat[0]; -				if (increment == -1) { -					// Check for the last match on this line. -					int repetitions = 1000;	// Break out of infinite loop -					while (success && (pre->eopat[0] <= endOfLine) && (repetitions--)) { -						success = pre->Execute(di, pos+1, endOfLine); -						if (success) { -							if (pre->eopat[0] <= minPos) { -								pos = pre->bopat[0]; -								lenRet = pre->eopat[0] - pre->bopat[0]; -							} else { -								success = 0; -							} -						} -					} -				} -				break; -			} -		} -		*length = lenRet; -		return pos; - +		if (!regex) +			regex = CreateRegexSearch(&charClass); +		return regex->FindText(this, minPos, maxPos, s, caseSensitive, word, wordStart, flags, length);  	} else {  		bool forward = minPos <= maxPos; @@ -1201,86 +1093,7 @@ long Document::FindText(int minPos, int maxPos, const char *s,  }  const char *Document::SubstituteByPosition(const char *text, int *length) { -	if (!pre) -		return 0; -	delete []substituted; -	substituted = 0; -	DocumentIndexer di(this, Length()); -	if (!pre->GrabMatches(di)) -		return 0; -	unsigned int lenResult = 0; -	for (int i = 0; i < *length; i++) { -		if (text[i] == '\\') { -			if (text[i + 1] >= '1' && text[i + 1] <= '9') { -				unsigned int patNum = text[i + 1] - '0'; -				lenResult += pre->eopat[patNum] - pre->bopat[patNum]; -				i++; -			} else { -				switch (text[i + 1]) { -				case 'a': -				case 'b': -				case 'f': -				case 'n': -				case 'r': -				case 't': -				case 'v': -					i++; -				} -				lenResult++; -			} -		} else { -			lenResult++; -		} -	} -	substituted = new char[lenResult + 1]; -	if (!substituted) -		return 0; -	char *o = substituted; -	for (int j = 0; j < *length; j++) { -		if (text[j] == '\\') { -			if (text[j + 1] >= '1' && text[j + 1] <= '9') { -				unsigned int patNum = text[j + 1] - '0'; -				unsigned int len = pre->eopat[patNum] - pre->bopat[patNum]; -				if (pre->pat[patNum])	// Will be null if try for a match that did not occur -					memcpy(o, pre->pat[patNum], len); -				o += len; -				j++; -			} else { -				j++; -				switch (text[j]) { -				case 'a': -					*o++ = '\a'; -					break; -				case 'b': -					*o++ = '\b'; -					break; -				case 'f': -					*o++ = '\f'; -					break; -				case 'n': -					*o++ = '\n'; -					break; -				case 'r': -					*o++ = '\r'; -					break; -				case 't': -					*o++ = '\t'; -					break; -				case 'v': -					*o++ = '\v'; -					break; -				default: -					*o++ = '\\'; -					j--; -				} -			} -		} else { -			*o++ = text[j]; -		} -	} -	*o = '\0'; -	*length = lenResult; -	return substituted; +	return regex->SubstituteByPosition(this, text, length);  }  int Document::LinesTotal() const { @@ -1630,3 +1443,222 @@ int Document::BraceMatch(int position, int /*maxReStyle*/) {  	}  	return - 1;  } + +/** + * Implementation of RegexSearchBase for the default built-in regular expression engine + */ +class BuiltinRegex : public RegexSearchBase { +public: +	BuiltinRegex(CharClassify *charClassTable) : search(charClassTable), substituted(NULL) {} + +	virtual ~BuiltinRegex() { +		delete substituted; +	} + +	virtual long FindText(Document *doc, int minPos, int maxPos, const char *s, +                        bool caseSensitive, bool word, bool wordStart, int flags, +                        int *length); + +	virtual const char *SubstituteByPosition(Document* doc, const char *text, int *length); + +private: +	RESearch search; +	char *substituted; +}; + +// Define a way for the Regular Expression code to access the document +class DocumentIndexer : public CharacterIndexer { +	Document *pdoc; +	int end; +public: +	DocumentIndexer(Document *pdoc_, int end_) : +		pdoc(pdoc_), end(end_) { +	} + +	virtual ~DocumentIndexer() { +	} + +	virtual char CharAt(int index) { +		if (index < 0 || index >= end) +			return 0; +		else +			return pdoc->CharAt(index); +	} +}; + +long BuiltinRegex::FindText(Document *doc, int minPos, int maxPos, const char *s, +                        bool caseSensitive, bool, bool, int flags, +                        int *length) { +	bool posix = (flags & SCFIND_POSIX) != 0; +	int increment = (minPos <= maxPos) ? 1 : -1; + +	int startPos = minPos; +	int endPos = maxPos; + +	// Range endpoints should not be inside DBCS characters, but just in case, move them. +	startPos = doc->MovePositionOutsideChar(startPos, 1, false); +	endPos = doc->MovePositionOutsideChar(endPos, 1, false); + +	const char *errmsg = search.Compile(s, *length, caseSensitive, posix); +	if (errmsg) { +		return -1; +	} +	// Find a variable in a property file: \$(\([A-Za-z0-9_.]+\)) +	// Replace first '.' with '-' in each property file variable reference: +	//     Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\)) +	//     Replace: $(\1-\2) +	int lineRangeStart = doc->LineFromPosition(startPos); +	int lineRangeEnd = doc->LineFromPosition(endPos); +	if ((increment == 1) && +		(startPos >= doc->LineEnd(lineRangeStart)) && +		(lineRangeStart < lineRangeEnd)) { +		// the start position is at end of line or between line end characters. +		lineRangeStart++; +		startPos = doc->LineStart(lineRangeStart); +	} +	int pos = -1; +	int lenRet = 0; +	char searchEnd = s[*length - 1]; +	int lineRangeBreak = lineRangeEnd + increment; +	for (int line = lineRangeStart; line != lineRangeBreak; line += increment) { +		int startOfLine = doc->LineStart(line); +		int endOfLine = doc->LineEnd(line); +		if (increment == 1) { +			if (line == lineRangeStart) { +				if ((startPos != startOfLine) && (s[0] == '^')) +					continue;	// Can't match start of line if start position after start of line +				startOfLine = startPos; +			} +			if (line == lineRangeEnd) { +				if ((endPos != endOfLine) && (searchEnd == '$')) +					continue;	// Can't match end of line if end position before end of line +				endOfLine = endPos; +			} +		} else { +			if (line == lineRangeEnd) { +				if ((endPos != startOfLine) && (s[0] == '^')) +					continue;	// Can't match start of line if end position after start of line +				startOfLine = endPos; +			} +			if (line == lineRangeStart) { +				if ((startPos != endOfLine) && (searchEnd == '$')) +					continue;	// Can't match end of line if start position before end of line +				endOfLine = startPos; +			} +		} + +		DocumentIndexer di(doc, endOfLine); +		int success = search.Execute(di, startOfLine, endOfLine); +		if (success) { +			pos = search.bopat[0]; +			lenRet = search.eopat[0] - search.bopat[0]; +			if (increment == -1) { +				// Check for the last match on this line. +				int repetitions = 1000;	// Break out of infinite loop +				while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) { +					success = search.Execute(di, pos+1, endOfLine); +					if (success) { +						if (search.eopat[0] <= minPos) { +							pos = search.bopat[0]; +							lenRet = search.eopat[0] - search.bopat[0]; +						} else { +							success = 0; +						} +					} +				} +			} +			break; +		} +	} +	*length = lenRet; +	return pos; +} + +const char *BuiltinRegex::SubstituteByPosition(Document* doc, const char *text, int *length) { +	delete []substituted; +	substituted = 0; +	DocumentIndexer di(doc, doc->Length()); +	if (!search.GrabMatches(di)) +		return 0; +	unsigned int lenResult = 0; +	for (int i = 0; i < *length; i++) { +		if (text[i] == '\\') { +			if (text[i + 1] >= '1' && text[i + 1] <= '9') { +				unsigned int patNum = text[i + 1] - '0'; +				lenResult += search.eopat[patNum] - search.bopat[patNum]; +				i++; +			} else { +				switch (text[i + 1]) { +				case 'a': +				case 'b': +				case 'f': +				case 'n': +				case 'r': +				case 't': +				case 'v': +					i++; +				} +				lenResult++; +			} +		} else { +			lenResult++; +		} +	} +	substituted = new char[lenResult + 1]; +	if (!substituted) +		return 0; +	char *o = substituted; +	for (int j = 0; j < *length; j++) { +		if (text[j] == '\\') { +			if (text[j + 1] >= '1' && text[j + 1] <= '9') { +				unsigned int patNum = text[j + 1] - '0'; +				unsigned int len = search.eopat[patNum] - search.bopat[patNum]; +				if (search.pat[patNum])	// Will be null if try for a match that did not occur +					memcpy(o, search.pat[patNum], len); +				o += len; +				j++; +			} else { +				j++; +				switch (text[j]) { +				case 'a': +					*o++ = '\a'; +					break; +				case 'b': +					*o++ = '\b'; +					break; +				case 'f': +					*o++ = '\f'; +					break; +				case 'n': +					*o++ = '\n'; +					break; +				case 'r': +					*o++ = '\r'; +					break; +				case 't': +					*o++ = '\t'; +					break; +				case 'v': +					*o++ = '\v'; +					break; +				default: +					*o++ = '\\'; +					j--; +				} +			} +		} else { +			*o++ = text[j]; +		} +	} +	*o = '\0'; +	*length = lenResult; +	return substituted; +} + +#ifndef SCI_OWNREGEX + +RegexSearchBase *CreateRegexSearch(CharClassify *charClassTable) { +	return new BuiltinRegex(charClassTable); +} + +#endif | 
