1 files changed, 227 insertions, 195 deletions
diff --git a/src/Document.cxx b/src/Document.cxx
index dba827c8d..bded3a32a 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -73,8 +73,7 @@ Document::Document() {
 	lenWatchers = 0;
 
 	matchesValid = false;
-	pre = 0;
-	substituted = 0;
+	regex = 0;
 }
 
 Document::~Document() {
@@ -84,10 +83,8 @@ Document::~Document() {
 	delete []watchers;
 	watchers = 0;
 	lenWatchers = 0;
-	delete pre;
-	pre = 0;
-	delete []substituted;
-	substituted = 0;
+	delete regex;
+	regex = 0;
 }
 
 // Increase reference count and return its previous value.
@@ -1015,123 +1012,18 @@ static inline char MakeLowerCase(char ch) {
 		return static_cast<char>(ch - 'A' + 'a');
 }
 
-// Define a way for the Regular Expression code to access the document
-class DocumentIndexer : public CharacterIndexer {
-	Document *pdoc;
-	int end;
-public:
-	DocumentIndexer(Document *pdoc_, int end_) :
-		pdoc(pdoc_), end(end_) {
-	}
-
-	virtual ~DocumentIndexer() {
-	}
-
-	virtual char CharAt(int index) {
-		if (index < 0 || index >= end)
-			return 0;
-		else
-			return pdoc->CharAt(index);
-	}
-};
-
 /**
  * Find text in document, supporting both forward and backward
  * searches (just pass minPos > maxPos to do a backward search)
  * Has not been tested with backwards DBCS searches yet.
  */
 long Document::FindText(int minPos, int maxPos, const char *s,
-                        bool caseSensitive, bool word, bool wordStart, bool regExp, bool posix,
+                        bool caseSensitive, bool word, bool wordStart, bool regExp, int flags,
                         int *length) {
 	if (regExp) {
-		if (!pre)
-			pre = new RESearch(&charClass);
-		if (!pre)
-			return -1;
-
-		int increment = (minPos <= maxPos) ? 1 : -1;
-
-		int startPos = minPos;
-		int endPos = maxPos;
-
-		// Range endpoints should not be inside DBCS characters, but just in case, move them.
-		startPos = MovePositionOutsideChar(startPos, 1, false);
-		endPos = MovePositionOutsideChar(endPos, 1, false);
-
-		const char *errmsg = pre->Compile(s, *length, caseSensitive, posix);
-		if (errmsg) {
-			return -1;
-		}
-		// Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
-		// Replace first '.' with '-' in each property file variable reference:
-		//     Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
-		//     Replace: $(\1-\2)
-		int lineRangeStart = LineFromPosition(startPos);
-		int lineRangeEnd = LineFromPosition(endPos);
-		if ((increment == 1) &&
-			(startPos >= LineEnd(lineRangeStart)) &&
-			(lineRangeStart < lineRangeEnd)) {
-			// the start position is at end of line or between line end characters.
-			lineRangeStart++;
-			startPos = LineStart(lineRangeStart);
-		}
-		int pos = -1;
-		int lenRet = 0;
-		char searchEnd = s[*length - 1];
-		int lineRangeBreak = lineRangeEnd + increment;
-		for (int line = lineRangeStart; line != lineRangeBreak; line += increment) {
-			int startOfLine = LineStart(line);
-			int endOfLine = LineEnd(line);
-			if (increment == 1) {
-				if (line == lineRangeStart) {
-					if ((startPos != startOfLine) && (s[0] == '^'))
-						continue;	// Can't match start of line if start position after start of line
-					startOfLine = startPos;
-				}
-				if (line == lineRangeEnd) {
-					if ((endPos != endOfLine) && (searchEnd == '$'))
-						continue;	// Can't match end of line if end position before end of line
-					endOfLine = endPos;
-				}
-			} else {
-				if (line == lineRangeEnd) {
-					if ((endPos != startOfLine) && (s[0] == '^'))
-						continue;	// Can't match start of line if end position after start of line
-					startOfLine = endPos;
-				}
-				if (line == lineRangeStart) {
-					if ((startPos != endOfLine) && (searchEnd == '$'))
-						continue;	// Can't match end of line if start position before end of line
-					endOfLine = startPos;
-				}
-			}
-
-			DocumentIndexer di(this, endOfLine);
-			int success = pre->Execute(di, startOfLine, endOfLine);
-			if (success) {
-				pos = pre->bopat[0];
-				lenRet = pre->eopat[0] - pre->bopat[0];
-				if (increment == -1) {
-					// Check for the last match on this line.
-					int repetitions = 1000;	// Break out of infinite loop
-					while (success && (pre->eopat[0] <= endOfLine) && (repetitions--)) {
-						success = pre->Execute(di, pos+1, endOfLine);
-						if (success) {
-							if (pre->eopat[0] <= minPos) {
-								pos = pre->bopat[0];
-								lenRet = pre->eopat[0] - pre->bopat[0];
-							} else {
-								success = 0;
-							}
-						}
-					}
-				}
-				break;
-			}
-		}
-		*length = lenRet;
-		return pos;
-
+		if (!regex)
+			regex = CreateRegexSearch(&charClass);
+		return regex->FindText(this, minPos, maxPos, s, caseSensitive, word, wordStart, flags, length);
 	} else {
 
 		bool forward = minPos <= maxPos;
@@ -1201,86 +1093,7 @@ long Document::FindText(int minPos, int maxPos, const char *s,
 }
 
 const char *Document::SubstituteByPosition(const char *text, int *length) {
-	if (!pre)
-		return 0;
-	delete []substituted;
-	substituted = 0;
-	DocumentIndexer di(this, Length());
-	if (!pre->GrabMatches(di))
-		return 0;
-	unsigned int lenResult = 0;
-	for (int i = 0; i < *length; i++) {
-		if (text[i] == '\\') {
-			if (text[i + 1] >= '1' && text[i + 1] <= '9') {
-				unsigned int patNum = text[i + 1] - '0';
-				lenResult += pre->eopat[patNum] - pre->bopat[patNum];
-				i++;
-			} else {
-				switch (text[i + 1]) {
-				case 'a':
-				case 'b':
-				case 'f':
-				case 'n':
-				case 'r':
-				case 't':
-				case 'v':
-					i++;
-				}
-				lenResult++;
-			}
-		} else {
-			lenResult++;
-		}
-	}
-	substituted = new char[lenResult + 1];
-	if (!substituted)
-		return 0;
-	char *o = substituted;
-	for (int j = 0; j < *length; j++) {
-		if (text[j] == '\\') {
-			if (text[j + 1] >= '1' && text[j + 1] <= '9') {
-				unsigned int patNum = text[j + 1] - '0';
-				unsigned int len = pre->eopat[patNum] - pre->bopat[patNum];
-				if (pre->pat[patNum])	// Will be null if try for a match that did not occur
-					memcpy(o, pre->pat[patNum], len);
-				o += len;
-				j++;
-			} else {
-				j++;
-				switch (text[j]) {
-				case 'a':
-					*o++ = '\a';
-					break;
-				case 'b':
-					*o++ = '\b';
-					break;
-				case 'f':
-					*o++ = '\f';
-					break;
-				case 'n':
-					*o++ = '\n';
-					break;
-				case 'r':
-					*o++ = '\r';
-					break;
-				case 't':
-					*o++ = '\t';
-					break;
-				case 'v':
-					*o++ = '\v';
-					break;
-				default:
-					*o++ = '\\';
-					j--;
-				}
-			}
-		} else {
-			*o++ = text[j];
-		}
-	}
-	*o = '\0';
-	*length = lenResult;
-	return substituted;
+	return regex->SubstituteByPosition(this, text, length);
 }
 
 int Document::LinesTotal() const {
@@ -1630,3 +1443,222 @@ int Document::BraceMatch(int position, int /*maxReStyle*/) {
 	}
 	return - 1;
 }
+
+/**
+ * Implementation of RegexSearchBase for the default built-in regular expression engine
+ */
+class BuiltinRegex : public RegexSearchBase {
+public:
+	BuiltinRegex(CharClassify *charClassTable) : search(charClassTable), substituted(NULL) {}
+
+	virtual ~BuiltinRegex() {
+		delete substituted;
+	}
+
+	virtual long FindText(Document *doc, int minPos, int maxPos, const char *s,
+                        bool caseSensitive, bool word, bool wordStart, int flags,
+                        int *length);
+
+	virtual const char *SubstituteByPosition(Document* doc, const char *text, int *length);
+
+private:
+	RESearch search;
+	char *substituted;
+};
+
+// Define a way for the Regular Expression code to access the document
+class DocumentIndexer : public CharacterIndexer {
+	Document *pdoc;
+	int end;
+public:
+	DocumentIndexer(Document *pdoc_, int end_) :
+		pdoc(pdoc_), end(end_) {
+	}
+
+	virtual ~DocumentIndexer() {
+	}
+
+	virtual char CharAt(int index) {
+		if (index < 0 || index >= end)
+			return 0;
+		else
+			return pdoc->CharAt(index);
+	}
+};
+
+long BuiltinRegex::FindText(Document *doc, int minPos, int maxPos, const char *s,
+                        bool caseSensitive, bool, bool, int flags,
+                        int *length) {
+	bool posix = (flags & SCFIND_POSIX) != 0;
+	int increment = (minPos <= maxPos) ? 1 : -1;
+
+	int startPos = minPos;
+	int endPos = maxPos;
+
+	// Range endpoints should not be inside DBCS characters, but just in case, move them.
+	startPos = doc->MovePositionOutsideChar(startPos, 1, false);
+	endPos = doc->MovePositionOutsideChar(endPos, 1, false);
+
+	const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
+	if (errmsg) {
+		return -1;
+	}
+	// Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
+	// Replace first '.' with '-' in each property file variable reference:
+	//     Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
+	//     Replace: $(\1-\2)
+	int lineRangeStart = doc->LineFromPosition(startPos);
+	int lineRangeEnd = doc->LineFromPosition(endPos);
+	if ((increment == 1) &&
+		(startPos >= doc->LineEnd(lineRangeStart)) &&
+		(lineRangeStart < lineRangeEnd)) {
+		// the start position is at end of line or between line end characters.
+		lineRangeStart++;
+		startPos = doc->LineStart(lineRangeStart);
+	}
+	int pos = -1;
+	int lenRet = 0;
+	char searchEnd = s[*length - 1];
+	int lineRangeBreak = lineRangeEnd + increment;
+	for (int line = lineRangeStart; line != lineRangeBreak; line += increment) {
+		int startOfLine = doc->LineStart(line);
+		int endOfLine = doc->LineEnd(line);
+		if (increment == 1) {
+			if (line == lineRangeStart) {
+				if ((startPos != startOfLine) && (s[0] == '^'))
+					continue;	// Can't match start of line if start position after start of line
+				startOfLine = startPos;
+			}
+			if (line == lineRangeEnd) {
+				if ((endPos != endOfLine) && (searchEnd == '$'))
+					continue;	// Can't match end of line if end position before end of line
+				endOfLine = endPos;
+			}
+		} else {
+			if (line == lineRangeEnd) {
+				if ((endPos != startOfLine) && (s[0] == '^'))
+					continue;	// Can't match start of line if end position after start of line
+				startOfLine = endPos;
+			}
+			if (line == lineRangeStart) {
+				if ((startPos != endOfLine) && (searchEnd == '$'))
+					continue;	// Can't match end of line if start position before end of line
+				endOfLine = startPos;
+			}
+		}
+
+		DocumentIndexer di(doc, endOfLine);
+		int success = search.Execute(di, startOfLine, endOfLine);
+		if (success) {
+			pos = search.bopat[0];
+			lenRet = search.eopat[0] - search.bopat[0];
+			if (increment == -1) {
+				// Check for the last match on this line.
+				int repetitions = 1000;	// Break out of infinite loop
+				while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) {
+					success = search.Execute(di, pos+1, endOfLine);
+					if (success) {
+						if (search.eopat[0] <= minPos) {
+							pos = search.bopat[0];
+							lenRet = search.eopat[0] - search.bopat[0];
+						} else {
+							success = 0;
+						}
+					}
+				}
+			}
+			break;
+		}
+	}
+	*length = lenRet;
+	return pos;
+}
+
+const char *BuiltinRegex::SubstituteByPosition(Document* doc, const char *text, int *length) {
+	delete []substituted;
+	substituted = 0;
+	DocumentIndexer di(doc, doc->Length());
+	if (!search.GrabMatches(di))
+		return 0;
+	unsigned int lenResult = 0;
+	for (int i = 0; i < *length; i++) {
+		if (text[i] == '\\') {
+			if (text[i + 1] >= '1' && text[i + 1] <= '9') {
+				unsigned int patNum = text[i + 1] - '0';
+				lenResult += search.eopat[patNum] - search.bopat[patNum];
+				i++;
+			} else {
+				switch (text[i + 1]) {
+				case 'a':
+				case 'b':
+				case 'f':
+				case 'n':
+				case 'r':
+				case 't':
+				case 'v':
+					i++;
+				}
+				lenResult++;
+			}
+		} else {
+			lenResult++;
+		}
+	}
+	substituted = new char[lenResult + 1];
+	if (!substituted)
+		return 0;
+	char *o = substituted;
+	for (int j = 0; j < *length; j++) {
+		if (text[j] == '\\') {
+			if (text[j + 1] >= '1' && text[j + 1] <= '9') {
+				unsigned int patNum = text[j + 1] - '0';
+				unsigned int len = search.eopat[patNum] - search.bopat[patNum];
+				if (search.pat[patNum])	// Will be null if try for a match that did not occur
+					memcpy(o, search.pat[patNum], len);
+				o += len;
+				j++;
+			} else {
+				j++;
+				switch (text[j]) {
+				case 'a':
+					*o++ = '\a';
+					break;
+				case 'b':
+					*o++ = '\b';
+					break;
+				case 'f':
+					*o++ = '\f';
+					break;
+				case 'n':
+					*o++ = '\n';
+					break;
+				case 'r':
+					*o++ = '\r';
+					break;
+				case 't':
+					*o++ = '\t';
+					break;
+				case 'v':
+					*o++ = '\v';
+					break;
+				default:
+					*o++ = '\\';
+					j--;
+				}
+			}
+		} else {
+			*o++ = text[j];
+		}
+	}
+	*o = '\0';
+	*length = lenResult;
+	return substituted;
+}
+
+#ifndef SCI_OWNREGEX
+
+RegexSearchBase *CreateRegexSearch(CharClassify *charClassTable) {
+	return new BuiltinRegex(charClassTable);
+}
+
+#endif