aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authornyamatongwe <unknown>2003-09-22 12:48:54 +0000
committernyamatongwe <unknown>2003-09-22 12:48:54 +0000
commitb427ff4fabf40f186fdb8f65e41094e1559663ec (patch)
tree2362a83d0be56ccfdce723bd90f419244e123ee5
parentc0db98d7ff8b71cb5a8cf6f596d99f919fb1889d (diff)
downloadscintilla-mirror-b427ff4fabf40f186fdb8f65e41094e1559663ec.tar.gz
Removed modification history.
-rw-r--r--src/RESearch.cxx146
1 files changed, 41 insertions, 105 deletions
diff --git a/src/RESearch.cxx b/src/RESearch.cxx
index fcb9dedea..b7ea71bfb 100644
--- a/src/RESearch.cxx
+++ b/src/RESearch.cxx
@@ -10,11 +10,11 @@
* Dept. of Computer Science
* York University
*
- * Original code available from http://www.cs.yorku.ca/~oz/
+ * Original code available from http://www.cs.yorku.ca/~oz/
* Translation to C++ by Neil Hodgson neilh@scintilla.org
* Removed all use of register.
* Converted to modern function prototypes.
- * Put all global/static variables into an object so this code can be
+ * Put all global/static variables into an object so this code can be
* used from multiple threads etc.
*
* These routines are the PUBLIC DOMAIN equivalents of regex
@@ -27,72 +27,8 @@
* see Henry Spencer's regexp routines, or GNU Emacs pattern
* matching module.
*
- * Modification history:
- *
- * $Log$
- * Revision 1.10 2003/08/26 11:45:22 nyamatongwe
- * Fixed bug that ignored high bit of characters in comparisons.
- *
- * Revision 1.9 2003/03/21 10:36:08 nyamatongwe
- * Detect patterns too long in regular expression search.
- *
- * Revision 1.8 2003/03/04 10:53:59 nyamatongwe
- * Patch from Jakub to optionally implement more POSIX compatible regular
- * expressions. \(..\) changes to (..)
- * Fixes problem where find previous would not find earlier matches on same
- * line.
- *
- * Revision 1.8 2003/03/03 20:12:56 vrana
- * Added posix syntax.
- *
- * Revision 1.7 2002/09/28 00:33:28 nyamatongwe
- * Fixed problem with character ranges caused by expansion to 8 bits.
- *
- * Revision 1.6 2001/04/29 13:32:10 nyamatongwe
- * Addition of new target methods - versions of ReplaceTarget that take counted
- * strings to allow for nulls, SearchInTarget and Get/SetSearchFlags to use a
- * series of calls rather than a structure.
- * Handling of \000 in search and replace.
- * Handling of /escapes within character ranges of regular expressions.
- * Some handling of bare ^ and $ regular expressions.
- *
- * Revision 1.5 2001/04/20 07:36:09 nyamatongwe
- * Removed DEBUG code that failed to compile on GTK+.
- *
- * Revision 1.4 2001/04/13 03:52:13 nyamatongwe
- * Added URL to find original code to comments.
- *
- * Revision 1.3 2001/04/06 12:24:21 nyamatongwe
- * Made regular expression searching work on a line by line basis, made ^ and
- * $ work, made [set] work, and added a case insensitive option.
- *
- * Revision 1.2 2001/04/05 01:58:04 nyamatongwe
- * Replace target functionality to make find and replace operations faster
- * by diminishing screen updates and allow for \d patterns in the replacement
- * text.
- *
- * Revision 1.1 2001/04/04 12:52:44 nyamatongwe
- * Moved to public domain regular expresion implementation.
- *
- * Revision 1.4 1991/10/17 03:56:42 oz
- * miscellaneous changes, small cleanups etc.
- *
- * Revision 1.3 1989/04/01 14:18:09 oz
- * Change all references to a dfa: this is actually an nfa.
- *
- * Revision 1.2 88/08/28 15:36:04 oz
- * Use a complement bitmap to represent NCL.
- * This removes the need to have seperate
- * code in the PMatch case block - it is
- * just CCL code now.
- *
- * Use the actual CCL code in the CLO
- * section of PMatch. No need for a recursive
- * PMatch call.
- *
- * Use a bitmap table to set char bits in an
- * 8-bit chunk.
- *
+ * Modification history removed.
+ *
* Interfaces:
* RESearch::Compile: compile a regular expression into a NFA.
*
@@ -122,7 +58,7 @@
* void re_fail(msg, op)
* char *msg;
* char op;
- *
+ *
* Regular Expressions:
*
* [1] char matches itself, unless it is a special
@@ -132,20 +68,20 @@
*
* [3] \ matches the character following it, except
* when followed by a left or right round bracket,
- * a digit 1 to 9 or a left or right angle bracket.
+ * a digit 1 to 9 or a left or right angle bracket.
* (see [7], [8] and [9])
- * It is used as an escape character for all
+ * It is used as an escape character for all
* other meta-characters, and itself. When used
* in a set ([4]), it is treated as an ordinary
* character.
*
* [4] [set] matches one of the characters in the set.
* If the first character in the set is "^",
- * it matches a character NOT in the set, i.e.
- * complements the set. A shorthand S-E is
- * used to specify a set of characters S upto
- * E, inclusive. The special characters "]" and
- * "-" have no special meaning if they appear
+ * it matches a character NOT in the set, i.e.
+ * complements the set. A shorthand S-E is
+ * used to specify a set of characters S upto
+ * E, inclusive. The special characters "]" and
+ * "-" have no special meaning if they appear
* as the first chars in the set.
* examples: match:
*
@@ -210,8 +146,8 @@
* Notes:
*
* This implementation uses a bit-set representation for character
- * classes for speed and compactness. Each character is represented
- * by one bit in a 128-bit block. Thus, CCL always takes a
+ * classes for speed and compactness. Each character is represented
+ * by one bit in a 128-bit block. Thus, CCL always takes a
* constant 16 bytes in the internal nfa, and RESearch::Execute does a single
* bit comparison to locate the character in the set.
*
@@ -221,7 +157,7 @@
* compile: CHR f CHR o CLO CHR o END CLO ANY END END
* matches: fo foo fooo foobar fobar foxx ...
*
- * pattern: fo[ob]a[rz]
+ * pattern: fo[ob]a[rz]
* compile: CHR f CHR o CCL bitset CHR a CCL bitset END
* matches: fobar fooar fobaz fooaz
*
@@ -269,7 +205,7 @@
const char bitarr[] = {1,2,4,8,16,32,64,'\200'};
#define badpat(x) (*nfa = END, x)
-
+
RESearch::RESearch() {
Init();
}
@@ -359,7 +295,7 @@ const char *RESearch::Compile(const char *pat, int length, bool caseSensitive, b
int n;
char mask; /* xor mask -CCL/NCL */
int c1, c2;
-
+
if (!pat || !length)
if (sta)
return 0;
@@ -401,7 +337,7 @@ const char *RESearch::Compile(const char *pat, int length, bool caseSensitive, b
i++;
if (*++p == '^') {
- mask = '\377';
+ mask = '\377';
i++;
p++;
} else
@@ -445,7 +381,7 @@ const char *RESearch::Compile(const char *pat, int length, bool caseSensitive, b
for (n = 0; n < BITBLK; bittab[n++] = (char) 0)
*mp++ = static_cast<char>(mask ^ bittab[n]);
-
+
break;
case '*': /* match 0 or more.. */
@@ -590,7 +526,7 @@ const char *RESearch::Compile(const char *pat, int length, bool caseSensitive, b
* RESearch::Execute:
* execute nfa to find a match.
*
- * special cases: (nfa[0])
+ * special cases: (nfa[0])
* BOL
* Match only once, starting from the
* beginning.
@@ -615,7 +551,7 @@ int RESearch::Execute(CharacterIndexer &ci, int lp, int endp) {
bol = lp;
failure = 0;
-
+
Clear();
switch(*ap) {
@@ -656,7 +592,7 @@ int RESearch::Execute(CharacterIndexer &ci, int lp, int endp) {
return 1;
}
-/*
+/*
* PMatch: internal routine for the hard part
*
* This code is partly snarfed from an early grep written by
@@ -682,7 +618,7 @@ int RESearch::Execute(CharacterIndexer &ci, int lp, int endp) {
*
* At the end of a successful match, bopat[n] and eopat[n]
* are set to the beginning and end of subpatterns matched
- * by tagged expressions (n = 1 to 9).
+ * by tagged expressions (n = 1 to 9).
*
*/
@@ -693,23 +629,23 @@ extern void re_fail(char *,char);
* and EOW. the reason for not using ctype macros is that we can
* let the user add into our own table. see RESearch::ModifyWord. This table
* is not in the bitset form, since we may wish to extend it in the
- * future for other character classifications.
+ * future for other character classifications.
*
* TRUE for 0-9 A-Z a-z _
*/
static char chrtyp[MAXCHR] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
- 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 0, 0, 0, 0, 1, 0, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
+ 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 0, 0, 0, 0, 1, 0, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 0, 0, 0, 0, 0
};
@@ -831,10 +767,10 @@ int RESearch::PMatch(CharacterIndexer &ci, int lp, int endp, char *ap) {
* the compact bitset representation for the default table]
*/
-static char deftab[16] = {
- 0, 0, 0, 0, 0, 0, '\377', 003, '\376', '\377', '\377', '\207',
- '\376', '\377', '\377', 007
-};
+static char deftab[16] = {
+ 0, 0, 0, 0, 0, 0, '\377', 003, '\376', '\377', '\377', '\207',
+ '\376', '\377', '\377', 007
+};
void RESearch::ModifyWord(char *s) {
int i;
@@ -881,7 +817,7 @@ int RESearch::Substitute(CharacterIndexer &ci, char *src, char *dst) {
pin = c - '0';
break;
}
-
+
default:
*dst++ = c;
continue;