aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--src/RESearch.cxx109
-rw-r--r--src/RESearch.h2
2 files changed, 48 insertions, 63 deletions
diff --git a/src/RESearch.cxx b/src/RESearch.cxx
index ac41138c8..86f2d0e9e 100644
--- a/src/RESearch.cxx
+++ b/src/RESearch.cxx
@@ -333,14 +333,11 @@ constexpr int isinset(const char *ap, unsigned char c) noexcept {
* @return the char if it resolves to a simple char,
* or -1 for a char class. In this case, bittab is changed.
*/
-int RESearch::GetBackslashExpression(
- const char *pattern,
- int &incr) noexcept {
+int RESearch::GetBackslashExpression(const char *pattern, int &incr) noexcept {
// Since error reporting is primitive and messages are not used anyway,
// I choose to interpret unexpected syntax in a logical way instead
// of reporting errors. Otherwise, we can stick on, eg., PCRE behaviour.
incr = 0; // Most of the time, will skip the char "naturally".
- int c = 0;
int result = -1;
const unsigned char bsc = *pattern;
if (!bsc) {
@@ -372,12 +369,12 @@ int RESearch::GetBackslashExpression(
}
break;
case 'd':
- for (c = '0'; c <= '9'; c++) {
+ for (int c = '0'; c <= '9'; c++) {
ChSet(static_cast<unsigned char>(c));
}
break;
case 'D':
- for (c = 0; c < MAXCHR; c++) {
+ for (int c = 0; c < MAXCHR; c++) {
if (c < '0' || c > '9') {
ChSet(static_cast<unsigned char>(c));
}
@@ -392,21 +389,21 @@ int RESearch::GetBackslashExpression(
ChSet('\v');
break;
case 'S':
- for (c = 0; c < MAXCHR; c++) {
+ for (int c = 0; c < MAXCHR; c++) {
if (c != ' ' && !(c >= 0x09 && c <= 0x0D)) {
ChSet(static_cast<unsigned char>(c));
}
}
break;
case 'w':
- for (c = 0; c < MAXCHR; c++) {
+ for (int c = 0; c < MAXCHR; c++) {
if (iswordc(static_cast<unsigned char>(c))) {
ChSet(static_cast<unsigned char>(c));
}
}
break;
case 'W':
- for (c = 0; c < MAXCHR; c++) {
+ for (int c = 0; c < MAXCHR; c++) {
if (!iswordc(static_cast<unsigned char>(c))) {
ChSet(static_cast<unsigned char>(c));
}
@@ -431,19 +428,14 @@ const char *RESearch::Compile(const char *pattern, Sci::Position length, bool ca
nfa[0] = END;
char *mp=nfa; /* nfa pointer */
- char *lp=nullptr; /* saved pointer */
- char *sp=nfa; /* another one */
- const char * mpMax = mp + MAXNFA - BITBLK - 10;
+ char *sp=nfa; /* another saved pointer */
+ const char * const mpMax = mp + MAXNFA - BITBLK - 10;
int tagstk[MAXTAG]{}; /* subpat tag stack */
int tagi = 0; /* tag stack index */
int tagc = 1; /* actual tag count */
- int n = 0;
char mask = 0; /* xor mask -CCL/NCL */
- int c1 = 0;
- int c2 = 0;
- int prevChar = 0;
sta = NOP;
@@ -451,7 +443,7 @@ const char *RESearch::Compile(const char *pattern, Sci::Position length, bool ca
for (int i=0; i<length; i++, p++) {
if (mp > mpMax)
return badpat("Pattern too long");
- lp = mp;
+ char *lp = mp; /* saved pointer */
switch (*p) {
case '.': /* match any char */
@@ -468,7 +460,7 @@ const char *RESearch::Compile(const char *pattern, Sci::Position length, bool ca
break;
case '$': /* match endofline */
- if (!*(p+1)) {
+ if (!p[1]) {
*mp++ = EOL;
} else {
*mp++ = CHR;
@@ -476,9 +468,9 @@ const char *RESearch::Compile(const char *pattern, Sci::Position length, bool ca
}
break;
- case '[': /* match char class */
+ case '[': { /* match char class */
*mp++ = CCL;
- prevChar = 0;
+ int prevChar = 0;
i++;
if (*++p == '^') {
@@ -505,13 +497,13 @@ const char *RESearch::Compile(const char *pattern, Sci::Position length, bool ca
// Previous def. was a char class like \d, take dash literally
prevChar = *p;
ChSet(*p);
- } else if (*(p+1)) {
- if (*(p+1) != ']') {
- c1 = prevChar + 1;
+ } else if (p[1]) {
+ if (p[1] != ']') {
+ int c1 = prevChar + 1;
i++;
- c2 = static_cast<unsigned char>(*++p);
+ int c2 = static_cast<unsigned char>(*++p);
if (c2 == '\\') {
- if (!*(p+1)) { // End of RE
+ if (!p[1]) { // End of RE
return badpat("Missing ]");
} else {
i++;
@@ -548,7 +540,7 @@ const char *RESearch::Compile(const char *pattern, Sci::Position length, bool ca
} else {
return badpat("Missing ]");
}
- } else if (*p == '\\' && *(p+1)) {
+ } else if (*p == '\\' && p[1]) {
i++;
p++;
int incr;
@@ -573,10 +565,10 @@ const char *RESearch::Compile(const char *pattern, Sci::Position length, bool ca
if (!*p)
return badpat("Missing ]");
- for (n = 0; n < BITBLK; bittab[n++] = 0)
+ for (int n = 0; n < BITBLK; bittab[n++] = 0)
*mp++ = static_cast<char>(mask ^ bittab[n]);
- break;
+ } break;
case '*': /* match 0 or more... */
case '+': /* match 1 or more... */
@@ -610,7 +602,7 @@ const char *RESearch::Compile(const char *pattern, Sci::Position length, bool ca
while (--mp > lp)
*mp = mp[-1];
if (*p == '?') *mp = CLQ;
- else if (*(p+1) == '?') *mp = LCLO;
+ else if (p[1] == '?') *mp = LCLO;
else *mp = CLO;
mp = sp;
@@ -635,8 +627,8 @@ const char *RESearch::Compile(const char *pattern, Sci::Position length, bool ca
case '6':
case '7':
case '8':
- case '9':
- n = *p-'0';
+ case '9': {
+ const int n = *p-'0';
if (tagi > 0 && tagstk[tagi] == n)
return badpat("Cyclical reference");
if (tagc > n) {
@@ -645,7 +637,7 @@ const char *RESearch::Compile(const char *pattern, Sci::Position length, bool ca
} else {
return badpat("Undetermined reference");
}
- break;
+ } break;
default:
if (!posix && *p == '(') {
if (tagc < MAXTAG) {
@@ -675,7 +667,7 @@ const char *RESearch::Compile(const char *pattern, Sci::Position length, bool ca
} else {
*mp++ = CCL;
mask = 0;
- for (n = 0; n < BITBLK; bittab[n++] = 0)
+ for (int n = 0; n < BITBLK; bittab[n++] = 0)
*mp++ = static_cast<char>(mask ^ bittab[n]);
}
}
@@ -711,7 +703,7 @@ const char *RESearch::Compile(const char *pattern, Sci::Position length, bool ca
*mp++ = CCL;
mask = 0;
ChSetWithCase(c, false);
- for (n = 0; n < BITBLK; bittab[n++] = 0)
+ for (int n = 0; n < BITBLK; bittab[n++] = 0)
*mp++ = static_cast<char>(mask ^ bittab[n]);
}
}
@@ -748,7 +740,6 @@ const char *RESearch::Compile(const char *pattern, Sci::Position length, bool ca
*
*/
int RESearch::Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Position endp) {
- unsigned char c = 0;
Sci::Position ep = NOTFOUND;
const char * const ap = nfa;
@@ -770,13 +761,14 @@ int RESearch::Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Positio
} else {
return 0;
}
- case CHR: /* ordinary char: locate it fast */
- c = ap[1];
+ case CHR: { /* ordinary char: locate it fast */
+ const unsigned char c = ap[1];
while ((lp < endp) && (static_cast<unsigned char>(ci.CharAt(lp)) != c))
lp++;
if (lp >= endp) /* if EOS, fail, else fall through. */
return 0;
- [[fallthrough]];
+ }
+ [[fallthrough]];
default: /* regular matching all the way. */
while (lp < endp) {
ep = PMatch(ci, lp, endp, ap);
@@ -836,14 +828,7 @@ int RESearch::Execute(const CharacterIndexer &ci, Sci::Position lp, Sci::Positio
#define CCLSKIP 34 /* [CLO] CCL 32 bytes END */
Sci::Position RESearch::PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci::Position endp, const char *ap) {
- int op = 0;
- int c = 0;
- int n = 0;
- Sci::Position e = 0; /* extra pointer for CLO */
- Sci::Position bp = 0; /* beginning of subpat... */
- Sci::Position ep = 0; /* ending of subpat... */
- Sci::Position are = 0; /* to save the line ptr. */
- Sci::Position llp = 0; /* lazy lp for LCLO */
+ unsigned char op = 0;
while ((op = *ap++) != END)
switch (op) {
@@ -872,10 +857,10 @@ Sci::Position RESearch::PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci
return NOTFOUND;
break;
case BOT:
- bopat[static_cast<int>(*ap++)] = lp;
+ bopat[static_cast<unsigned char>(*ap++)] = lp;
break;
case EOT:
- eopat[static_cast<int>(*ap++)] = lp;
+ eopat[static_cast<unsigned char>(*ap++)] = lp;
break;
case BOW:
if ((lp!=bol && iswordc(ci.CharAt(lp-1))) || !iswordc(ci.CharAt(lp)))
@@ -885,18 +870,19 @@ Sci::Position RESearch::PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci
if (lp==bol || !iswordc(ci.CharAt(lp-1)) || iswordc(ci.CharAt(lp)))
return NOTFOUND;
break;
- case REF:
- n = *ap++;
- bp = bopat[n];
- ep = eopat[n];
+ case REF: {
+ const int n = static_cast<unsigned char>(*ap++);
+ Sci::Position bp = bopat[n]; /* beginning of subpat... */
+ const Sci::Position ep = eopat[n]; /* ending of subpat... */
while (bp < ep)
if (ci.CharAt(bp++) != ci.CharAt(lp++))
return NOTFOUND;
- break;
+ } break;
case LCLO:
case CLQ:
- case CLO:
- are = lp;
+ case CLO: {
+ int n = 0;
+ const Sci::Position are = lp; /* to save the line ptr. */
switch (*ap) {
case ANY:
@@ -908,15 +894,15 @@ Sci::Position RESearch::PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci
n = ANYSKIP;
break;
- case CHR:
- c = ap[1];
+ case CHR: {
+ const char c = ap[1];
if (op == CLO || op == LCLO)
while ((lp < endp) && (c == ci.CharAt(lp)))
lp++;
else if ((lp < endp) && (c == ci.CharAt(lp)))
lp++;
n = CHRSKIP;
- break;
+ } break;
case CCL:
while ((lp < endp) && isinset(ap+1, ci.CharAt(lp)))
lp++;
@@ -929,8 +915,8 @@ Sci::Position RESearch::PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci
}
ap += n;
- llp = lp;
- e = NOTFOUND;
+ Sci::Position llp = lp; /* lazy lp for LCLO */
+ Sci::Position e = NOTFOUND; /* extra pointer for CLO */
while (llp >= are) {
Sci::Position q;
if ((q = PMatch(ci, llp, endp, ap)) != NOTFOUND) {
@@ -944,11 +930,10 @@ Sci::Position RESearch::PMatch(const CharacterIndexer &ci, Sci::Position lp, Sci
if (*ap == EOT)
PMatch(ci, lp, endp, ap);
return e;
+ }
default:
//re_fail("RESearch::Execute: bad nfa.", static_cast<char>(op));
return NOTFOUND;
}
return lp;
}
-
-
diff --git a/src/RESearch.h b/src/RESearch.h
index 5ac3f00dc..4dee666a0 100644
--- a/src/RESearch.h
+++ b/src/RESearch.h
@@ -50,8 +50,8 @@ private:
Sci::Position bol;
char nfa[MAXNFA]; /* automaton */
int sta;
- unsigned char bittab[BITBLK]; /* bit table for CCL pre-set bits */
int failure;
+ unsigned char bittab[BITBLK]; /* bit table for CCL pre-set bits */
CharClassify *charClass;
bool iswordc(unsigned char x) const noexcept {
return charClass->IsWord(x);