diff options
Diffstat (limited to 'src/Document.cxx')
-rw-r--r-- | src/Document.cxx | 422 |
1 files changed, 227 insertions, 195 deletions
diff --git a/src/Document.cxx b/src/Document.cxx index dba827c8d..bded3a32a 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -73,8 +73,7 @@ Document::Document() { lenWatchers = 0; matchesValid = false; - pre = 0; - substituted = 0; + regex = 0; } Document::~Document() { @@ -84,10 +83,8 @@ Document::~Document() { delete []watchers; watchers = 0; lenWatchers = 0; - delete pre; - pre = 0; - delete []substituted; - substituted = 0; + delete regex; + regex = 0; } // Increase reference count and return its previous value. @@ -1015,123 +1012,18 @@ static inline char MakeLowerCase(char ch) { return static_cast<char>(ch - 'A' + 'a'); } -// Define a way for the Regular Expression code to access the document -class DocumentIndexer : public CharacterIndexer { - Document *pdoc; - int end; -public: - DocumentIndexer(Document *pdoc_, int end_) : - pdoc(pdoc_), end(end_) { - } - - virtual ~DocumentIndexer() { - } - - virtual char CharAt(int index) { - if (index < 0 || index >= end) - return 0; - else - return pdoc->CharAt(index); - } -}; - /** * Find text in document, supporting both forward and backward * searches (just pass minPos > maxPos to do a backward search) * Has not been tested with backwards DBCS searches yet. */ long Document::FindText(int minPos, int maxPos, const char *s, - bool caseSensitive, bool word, bool wordStart, bool regExp, bool posix, + bool caseSensitive, bool word, bool wordStart, bool regExp, int flags, int *length) { if (regExp) { - if (!pre) - pre = new RESearch(&charClass); - if (!pre) - return -1; - - int increment = (minPos <= maxPos) ? 1 : -1; - - int startPos = minPos; - int endPos = maxPos; - - // Range endpoints should not be inside DBCS characters, but just in case, move them. - startPos = MovePositionOutsideChar(startPos, 1, false); - endPos = MovePositionOutsideChar(endPos, 1, false); - - const char *errmsg = pre->Compile(s, *length, caseSensitive, posix); - if (errmsg) { - return -1; - } - // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\)) - // Replace first '.' with '-' in each property file variable reference: - // Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\)) - // Replace: $(\1-\2) - int lineRangeStart = LineFromPosition(startPos); - int lineRangeEnd = LineFromPosition(endPos); - if ((increment == 1) && - (startPos >= LineEnd(lineRangeStart)) && - (lineRangeStart < lineRangeEnd)) { - // the start position is at end of line or between line end characters. - lineRangeStart++; - startPos = LineStart(lineRangeStart); - } - int pos = -1; - int lenRet = 0; - char searchEnd = s[*length - 1]; - int lineRangeBreak = lineRangeEnd + increment; - for (int line = lineRangeStart; line != lineRangeBreak; line += increment) { - int startOfLine = LineStart(line); - int endOfLine = LineEnd(line); - if (increment == 1) { - if (line == lineRangeStart) { - if ((startPos != startOfLine) && (s[0] == '^')) - continue; // Can't match start of line if start position after start of line - startOfLine = startPos; - } - if (line == lineRangeEnd) { - if ((endPos != endOfLine) && (searchEnd == '$')) - continue; // Can't match end of line if end position before end of line - endOfLine = endPos; - } - } else { - if (line == lineRangeEnd) { - if ((endPos != startOfLine) && (s[0] == '^')) - continue; // Can't match start of line if end position after start of line - startOfLine = endPos; - } - if (line == lineRangeStart) { - if ((startPos != endOfLine) && (searchEnd == '$')) - continue; // Can't match end of line if start position before end of line - endOfLine = startPos; - } - } - - DocumentIndexer di(this, endOfLine); - int success = pre->Execute(di, startOfLine, endOfLine); - if (success) { - pos = pre->bopat[0]; - lenRet = pre->eopat[0] - pre->bopat[0]; - if (increment == -1) { - // Check for the last match on this line. - int repetitions = 1000; // Break out of infinite loop - while (success && (pre->eopat[0] <= endOfLine) && (repetitions--)) { - success = pre->Execute(di, pos+1, endOfLine); - if (success) { - if (pre->eopat[0] <= minPos) { - pos = pre->bopat[0]; - lenRet = pre->eopat[0] - pre->bopat[0]; - } else { - success = 0; - } - } - } - } - break; - } - } - *length = lenRet; - return pos; - + if (!regex) + regex = CreateRegexSearch(&charClass); + return regex->FindText(this, minPos, maxPos, s, caseSensitive, word, wordStart, flags, length); } else { bool forward = minPos <= maxPos; @@ -1201,86 +1093,7 @@ long Document::FindText(int minPos, int maxPos, const char *s, } const char *Document::SubstituteByPosition(const char *text, int *length) { - if (!pre) - return 0; - delete []substituted; - substituted = 0; - DocumentIndexer di(this, Length()); - if (!pre->GrabMatches(di)) - return 0; - unsigned int lenResult = 0; - for (int i = 0; i < *length; i++) { - if (text[i] == '\\') { - if (text[i + 1] >= '1' && text[i + 1] <= '9') { - unsigned int patNum = text[i + 1] - '0'; - lenResult += pre->eopat[patNum] - pre->bopat[patNum]; - i++; - } else { - switch (text[i + 1]) { - case 'a': - case 'b': - case 'f': - case 'n': - case 'r': - case 't': - case 'v': - i++; - } - lenResult++; - } - } else { - lenResult++; - } - } - substituted = new char[lenResult + 1]; - if (!substituted) - return 0; - char *o = substituted; - for (int j = 0; j < *length; j++) { - if (text[j] == '\\') { - if (text[j + 1] >= '1' && text[j + 1] <= '9') { - unsigned int patNum = text[j + 1] - '0'; - unsigned int len = pre->eopat[patNum] - pre->bopat[patNum]; - if (pre->pat[patNum]) // Will be null if try for a match that did not occur - memcpy(o, pre->pat[patNum], len); - o += len; - j++; - } else { - j++; - switch (text[j]) { - case 'a': - *o++ = '\a'; - break; - case 'b': - *o++ = '\b'; - break; - case 'f': - *o++ = '\f'; - break; - case 'n': - *o++ = '\n'; - break; - case 'r': - *o++ = '\r'; - break; - case 't': - *o++ = '\t'; - break; - case 'v': - *o++ = '\v'; - break; - default: - *o++ = '\\'; - j--; - } - } - } else { - *o++ = text[j]; - } - } - *o = '\0'; - *length = lenResult; - return substituted; + return regex->SubstituteByPosition(this, text, length); } int Document::LinesTotal() const { @@ -1630,3 +1443,222 @@ int Document::BraceMatch(int position, int /*maxReStyle*/) { } return - 1; } + +/** + * Implementation of RegexSearchBase for the default built-in regular expression engine + */ +class BuiltinRegex : public RegexSearchBase { +public: + BuiltinRegex(CharClassify *charClassTable) : search(charClassTable), substituted(NULL) {} + + virtual ~BuiltinRegex() { + delete substituted; + } + + virtual long FindText(Document *doc, int minPos, int maxPos, const char *s, + bool caseSensitive, bool word, bool wordStart, int flags, + int *length); + + virtual const char *SubstituteByPosition(Document* doc, const char *text, int *length); + +private: + RESearch search; + char *substituted; +}; + +// Define a way for the Regular Expression code to access the document +class DocumentIndexer : public CharacterIndexer { + Document *pdoc; + int end; +public: + DocumentIndexer(Document *pdoc_, int end_) : + pdoc(pdoc_), end(end_) { + } + + virtual ~DocumentIndexer() { + } + + virtual char CharAt(int index) { + if (index < 0 || index >= end) + return 0; + else + return pdoc->CharAt(index); + } +}; + +long BuiltinRegex::FindText(Document *doc, int minPos, int maxPos, const char *s, + bool caseSensitive, bool, bool, int flags, + int *length) { + bool posix = (flags & SCFIND_POSIX) != 0; + int increment = (minPos <= maxPos) ? 1 : -1; + + int startPos = minPos; + int endPos = maxPos; + + // Range endpoints should not be inside DBCS characters, but just in case, move them. + startPos = doc->MovePositionOutsideChar(startPos, 1, false); + endPos = doc->MovePositionOutsideChar(endPos, 1, false); + + const char *errmsg = search.Compile(s, *length, caseSensitive, posix); + if (errmsg) { + return -1; + } + // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\)) + // Replace first '.' with '-' in each property file variable reference: + // Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\)) + // Replace: $(\1-\2) + int lineRangeStart = doc->LineFromPosition(startPos); + int lineRangeEnd = doc->LineFromPosition(endPos); + if ((increment == 1) && + (startPos >= doc->LineEnd(lineRangeStart)) && + (lineRangeStart < lineRangeEnd)) { + // the start position is at end of line or between line end characters. + lineRangeStart++; + startPos = doc->LineStart(lineRangeStart); + } + int pos = -1; + int lenRet = 0; + char searchEnd = s[*length - 1]; + int lineRangeBreak = lineRangeEnd + increment; + for (int line = lineRangeStart; line != lineRangeBreak; line += increment) { + int startOfLine = doc->LineStart(line); + int endOfLine = doc->LineEnd(line); + if (increment == 1) { + if (line == lineRangeStart) { + if ((startPos != startOfLine) && (s[0] == '^')) + continue; // Can't match start of line if start position after start of line + startOfLine = startPos; + } + if (line == lineRangeEnd) { + if ((endPos != endOfLine) && (searchEnd == '$')) + continue; // Can't match end of line if end position before end of line + endOfLine = endPos; + } + } else { + if (line == lineRangeEnd) { + if ((endPos != startOfLine) && (s[0] == '^')) + continue; // Can't match start of line if end position after start of line + startOfLine = endPos; + } + if (line == lineRangeStart) { + if ((startPos != endOfLine) && (searchEnd == '$')) + continue; // Can't match end of line if start position before end of line + endOfLine = startPos; + } + } + + DocumentIndexer di(doc, endOfLine); + int success = search.Execute(di, startOfLine, endOfLine); + if (success) { + pos = search.bopat[0]; + lenRet = search.eopat[0] - search.bopat[0]; + if (increment == -1) { + // Check for the last match on this line. + int repetitions = 1000; // Break out of infinite loop + while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) { + success = search.Execute(di, pos+1, endOfLine); + if (success) { + if (search.eopat[0] <= minPos) { + pos = search.bopat[0]; + lenRet = search.eopat[0] - search.bopat[0]; + } else { + success = 0; + } + } + } + } + break; + } + } + *length = lenRet; + return pos; +} + +const char *BuiltinRegex::SubstituteByPosition(Document* doc, const char *text, int *length) { + delete []substituted; + substituted = 0; + DocumentIndexer di(doc, doc->Length()); + if (!search.GrabMatches(di)) + return 0; + unsigned int lenResult = 0; + for (int i = 0; i < *length; i++) { + if (text[i] == '\\') { + if (text[i + 1] >= '1' && text[i + 1] <= '9') { + unsigned int patNum = text[i + 1] - '0'; + lenResult += search.eopat[patNum] - search.bopat[patNum]; + i++; + } else { + switch (text[i + 1]) { + case 'a': + case 'b': + case 'f': + case 'n': + case 'r': + case 't': + case 'v': + i++; + } + lenResult++; + } + } else { + lenResult++; + } + } + substituted = new char[lenResult + 1]; + if (!substituted) + return 0; + char *o = substituted; + for (int j = 0; j < *length; j++) { + if (text[j] == '\\') { + if (text[j + 1] >= '1' && text[j + 1] <= '9') { + unsigned int patNum = text[j + 1] - '0'; + unsigned int len = search.eopat[patNum] - search.bopat[patNum]; + if (search.pat[patNum]) // Will be null if try for a match that did not occur + memcpy(o, search.pat[patNum], len); + o += len; + j++; + } else { + j++; + switch (text[j]) { + case 'a': + *o++ = '\a'; + break; + case 'b': + *o++ = '\b'; + break; + case 'f': + *o++ = '\f'; + break; + case 'n': + *o++ = '\n'; + break; + case 'r': + *o++ = '\r'; + break; + case 't': + *o++ = '\t'; + break; + case 'v': + *o++ = '\v'; + break; + default: + *o++ = '\\'; + j--; + } + } + } else { + *o++ = text[j]; + } + } + *o = '\0'; + *length = lenResult; + return substituted; +} + +#ifndef SCI_OWNREGEX + +RegexSearchBase *CreateRegexSearch(CharClassify *charClassTable) { + return new BuiltinRegex(charClassTable); +} + +#endif |