diff options
Diffstat (limited to 'src/PosRegExp.cxx')
-rw-r--r-- | src/PosRegExp.cxx | 1192 |
1 files changed, 0 insertions, 1192 deletions
diff --git a/src/PosRegExp.cxx b/src/PosRegExp.cxx deleted file mode 100644 index eb240e2fb..000000000 --- a/src/PosRegExp.cxx +++ /dev/null @@ -1,1192 +0,0 @@ -#include <string.h> -#include <stdio.h> -#include <ctype.h> -#include <malloc.h> - -#include "PosRegExp.h" - -#ifdef _MSC_VER -#pragma warning(disable: 4244) -#endif -#ifdef __BORLANDC__ -// Too much effort to to cean this code up so just ignore badly -// bracketed initialisers, conversions losing significant digits, -// and values assigned but not used. -#pragma warn -pin -#pragma warn -sig -#pragma warn -aus -#endif -//Up: /[A-Z \x80-\x9f \xf0 ]/x -//Lo: /[a-z \xa0-\xaf \xe0-\xef \xf1 ]/x -//Wd: /[\d _ A-Z a-z \xa0-\xaf \xe0-\xf1 \x80-\x9f]/x -//* // Dos866 -SCharData UCData = {0x0, 0x0, 0x7fffffe, 0x0, 0xffffffff, 0x0, 0x0, 0x10000}, - LCData = {0x0, 0x0, 0x0, 0x7fffffe, 0x0, 0xffff, 0x0, 0x2ffff}, - WdData = {0x0, 0x3ff0000, 0x87fffffe, 0x7fffffe, 0xffffffff, 0xffff, 0x0, 0x3ffff}, - DigData = {0x0, 0x3ff0000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; -/*/ // cp1251 -SCharData UCData = {0x0, 0x0, 0x7fffffe, 0x0, 0x0, 0x0, 0xffffffff, 0x0}, - LCData = {0x0, 0x0, 0x0, 0x7fffffe, 0x0, 0x0, 0x0, 0xffffffff}, - WdData = {0x0, 0x3ff0000, 0x87fffffe, 0x7fffffe, 0x0, 0x0, 0xffffffff, 0xffffffff}, - DigData = {0x0, 0x3ff0000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; -//*/ - -/////////////////////////////////////////////// - -int GetNumber(int *str,int s,int e) { - int r = 1, num = 0; - if (e < s) return -1; - for(int i = e-1; i >= s; i--) { - if (str[i] > '9' || str[i] < '0') return -1; - num += (str[i] - 0x30)*r; - r *= 10; - }; - return num; - /* - char tmp[20]; - double Res; - if (e == s) return -1; - for (int i = s;i < e;i++) - tmp[i-s] = (char)Str[i]; - tmp[e-s] = 0; - GetNumber(tmp,&Res); - return (int)Res; - */ -}; - -bool IsDigit(char Symb) { - return DigData.GetBit(Symb); -}; -bool IsWord(char Symb) { - return WdData.GetBit(Symb); -}; -bool IsUpperCase(char Symb) { - return UCData.GetBit(Symb); -}; -bool IsLowerCase(char Symb) { - return LCData.GetBit(Symb); -}; -char LowCase(char Chr) { - if (UCData.GetBit(Chr)) - return Chr+0x20; - return Chr; -}; - -/////////////////////////////////////////////// - -SRegInfo::SRegInfo() { - Next = Parent = 0; - un.Param = 0; - Op = ReEmpty; -}; -SRegInfo::~SRegInfo() { - if (Next) delete Next; - if (un.Param) - switch(Op) { - case ReEnum: - case ReNEnum: - delete un.ChrClass; - break; - default: - if (Op > ReBlockOps && Op < ReSymbolOps || Op == ReBrackets) - delete un.Param; - break; - }; -}; - -/////////////////////////////////////////////// - -void SCharData::SetBit(unsigned char Bit) { - int p = Bit/8; - CArr[p] |= (1 << Bit%8); -}; -void SCharData::ClearBit(unsigned char Bit) { - int p = Bit/8; - CArr[p] &= ~(1 << Bit%8); -}; -bool SCharData::GetBit(unsigned char Bit) { - int p = (unsigned char)Bit/8; - return (CArr[p] & (1 << Bit%8))!=0; -}; - -///////////////////////////////////////////////////////////////// -////////////////////// RegExp Class /////////////////////////// -///////////////////////////////////////////////////////////////// - -PosRegExp::PosRegExp() { - Info = 0; - Exprn = 0; - NoMoves = false; - Error = true; - FirstChar = 0; - CurMatch = 0; -}; -PosRegExp::~PosRegExp() { - if (Info) delete Info; -}; - -bool PosRegExp::SetExpr(const char *Expr) { - if (!this) return false; - Error = true; - CurMatch = 0; - if (SetExprLow(Expr)) Error = false; - return !Error; -}; -bool PosRegExp::isok() { - return !Error; -}; - - -bool PosRegExp::SetExprLow(const char *Expr) { - int Len = strlen(Expr); - bool Ok = false; - int i,j,s = 0,pos,tmp; - int EnterBr = 0,EnterGr = 0,EnterFg = 0; - - if (Info) delete Info; - Info = new SRegInfo; - Exprn = new int[Len]; - - NoCase = false; - Extend = false; - if (Expr[0] == '/') s++; - else return false; - - for (i = Len; i > 0 && !Ok;i--) - if (Expr[i] == '/') { - Len = i-s; - Ok = true; - for (int j = i+1; Expr[j]; j++) { - if (Expr[j] == 'i') NoCase = true; - if (Expr[j] == 'x') Extend = true; - }; - }; - if (!Ok) return false; - - //////////////////////////////// - for (j = 0,pos = 0; j < Len; j++,pos++) { - if (Extend && Expr[j+s] == ' ') { - pos--; - continue; - }; - - Exprn[pos] = (int)(unsigned char)Expr[j+s]; - - if (Expr[j+s] == BackSlash) { - switch (Expr[j+s+1]) { - case 'd': - Exprn[pos] = ReDigit; - break; - case 'D': - Exprn[pos] = ReNDigit; - break; - case 'w': - Exprn[pos] = ReWordSymb; - break; - case 'W': - Exprn[pos] = ReNWordSymb; - break; - case 's': - Exprn[pos] = ReWSpace; - break; - case 'S': - Exprn[pos] = ReNWSpace; - break; - case 'u': - Exprn[pos] = ReUCase; - break; - case 'l': - Exprn[pos] = ReNUCase; - break; - case 't': - Exprn[pos] = '\t'; - break; - case 'n': - Exprn[pos] = '\n'; - break; - case 'r': - Exprn[pos] = '\r'; - break; - case 'b': - Exprn[pos] = ReWBound; - break; - case 'B': - Exprn[pos] = ReNWBound; - break; - case 'c': - Exprn[pos] = RePreNW; - break; - case 'm': - Exprn[pos] = ReStart; - break; - case 'M': - Exprn[pos] = ReEnd; - break; - case 'x': - tmp = toupper(Expr[j+s+2])-0x30; - tmp = (tmp>9?tmp-7:tmp)<<4; - tmp += (toupper(Expr[j+s+3])-0x30)>9?toupper(Expr[j+s+3])-0x37:(toupper(Expr[j+s+3])-0x30); - Exprn[pos] = tmp; - j+=2; - break; - case 'y': - tmp = Expr[j+s+2] - 0x30; - if (tmp >= 0 && tmp <= 9) { - if (tmp == 1) { - tmp = 10 + Expr[j+s+3] - 0x30; - if (tmp >= 10 && tmp <= 19) j++; - else tmp = 1; - }; - Exprn[pos] = ReBkTrace + tmp; - j++; - break; - }; - default: - tmp = Expr[j+s+1] - 0x30; - if (tmp >= 0 && tmp <= 9) { - if (tmp == 1) { - tmp = 10 + Expr[j+s+2] - 0x30; - if (tmp >= 10 && tmp <= 19) j++; - else tmp = 1; - }; - Exprn[pos] = ReBkBrack + tmp; - break; - } else - Exprn[pos] = Expr[j+s+1]; - break; - }; - j++; - continue; - }; - if (Expr[j+s] == ']') { - Exprn[pos] = ReEnumE; - if (EnterFg || !EnterGr) return false; - EnterGr--; - }; - if (Expr[j+s] == '-' && EnterGr) Exprn[pos] = ReFrToEnum; - - if (EnterGr) continue; - - if (Expr[j+s] == '[' && Expr[j+s+1] == '^') { - Exprn[pos] = ReNEnumS; - if (EnterFg) return false; - EnterGr++; - j++; - continue; - }; - if (Expr[j+s] == '*' && Expr[j+s+1] == '?') { - Exprn[pos] = ReNGMul; - j++; - continue; - }; - if (Expr[j+s] == '+' && Expr[j+s+1] == '?') { - Exprn[pos] = ReNGPlus; - j++; - continue; - }; - if (Expr[j+s] == '?' && Expr[j+s+1] == '?') { - Exprn[pos] = ReNGQuest; - j++; - continue; - }; - if (Expr[j+s] == '?' && Expr[j+s+1] == '#' && - Expr[j+s+2]>='0' && Expr[j+s+2]<='9') { - Exprn[pos] = ReBehind+Expr[j+s+2]-0x30; - j+=2; - continue; - }; - if (Expr[j+s] == '?' && Expr[j+s+1] == '~' && - Expr[j+s+2]>='0' && Expr[j+s+2]<='9') { - Exprn[pos] = ReNBehind+Expr[j+s+2]-0x30; - j+=2; - continue; - }; - if (Expr[j+s] == '?' && Expr[j+s+1] == '=') { - Exprn[pos] = ReAhead; - j++; - continue; - }; - if (Expr[j+s] == '?' && Expr[j+s+1] == '!') { - Exprn[pos] = ReNAhead; - j++; - continue; - }; - - if (Expr[j+s] == '(') { - Exprn[pos] = ReLBrack; - if (EnterFg) return false; - EnterBr++; - }; - if (Expr[j+s] == ')') { - Exprn[pos] = ReRBrack; - if (!EnterBr || EnterFg) return false; - EnterBr--; - }; - if (Expr[j+s] == '[') { - Exprn[pos] = ReEnumS; - if (EnterFg) return false; - EnterGr++; - }; - if (Expr[j+s] == '{') { - Exprn[pos] = ReRangeS; - if (EnterFg) return false; - EnterFg++; - }; - if (Expr[j+s] == '}' && Expr[j+s+1] == '?') { - Exprn[pos] = ReNGRangeE; - if (!EnterFg) return false; - EnterFg--; - j++; - continue; - }; - if (Expr[j+s] == '}') { - Exprn[pos] = ReRangeE; - if (!EnterFg) return false; - EnterFg--; - }; - - if (Expr[j+s] == '^') Exprn[pos] = ReSoL; - if (Expr[j+s] == '$') Exprn[pos] = ReEoL; - if (Expr[j+s] == '.') Exprn[pos] = ReAnyChr; - if (Expr[j+s] == '*') Exprn[pos] = ReMul; - if (Expr[j+s] == '+') Exprn[pos] = RePlus; - if (Expr[j+s] == '?') Exprn[pos] = ReQuest; - if (Expr[j+s] == '|') Exprn[pos] = ReOr; - }; - if (EnterGr || EnterBr || EnterFg) return false; - - Info->Op = ReBrackets; - Info->un.Param = new SRegInfo; - Info->s = CurMatch++; - - if (!SetStructs(Info->un.Param,0,pos)) return false; - Optimize(); - delete Exprn; - return true; -}; - -void PosRegExp::Optimize() { - PRegInfo Next = Info; - FirstChar = 0; - while(Next) { - if (Next->Op == ReBrackets || Next->Op == RePlus || Next->Op == ReNGPlus) { - Next = Next->un.Param; - continue; - }; - if (Next->Op == ReSymb) { - if (Next->un.Symb & 0xFF00 && Next->un.Symb != ReSoL && Next->un.Symb != ReWBound) - break; - FirstChar = Next->un.Symb; - break; - }; - break; - }; -}; - -bool PosRegExp::SetStructs(PRegInfo &re,int start,int end) { - PRegInfo Next,Prev,Prev2; - int comma,st,en,ng,i, j,k; - int EnterBr; - bool Add; - - if (end - start < 0) return false; - Next = re; - for (i = start; i < end; i++) { - Add = false; - // Ops - if (Exprn[i] > ReBlockOps && Exprn[i] < ReSymbolOps) { - Next->un.Param = 0; - Next->Op = (EOps)Exprn[i]; - Add = true; - }; - // {n,m} - if (Exprn[i] == ReRangeS) { - st = i; - en = -1; - comma = -1; - ng = 0; - for (j = i;j < end;j++) { - if (Exprn[j] == ReNGRangeE) { - en = j; - ng = 1; - break; - }; - if (Exprn[j] == ReRangeE) { - en = j; - break; - }; - if ((char)Exprn[j] == ',') - comma = j; - }; - if (en == -1) return false; - if (comma == -1) comma = en; - Next->s = (char)GetNumber(Exprn,st+1,comma); - if (comma != en) - Next->e = (char)GetNumber(Exprn,comma+1,en); - else - Next->e = Next->s; - Next->un.Param = 0; - Next->Op = ng?ReNGRangeNM:ReRangeNM; - if (en-comma == 1) { - Next->e = -1; - Next->Op = ng?ReNGRangeN:ReRangeN; - }; - i=j; - Add = true; - }; - // [] [^] - if (Exprn[i] == ReEnumS || Exprn[i] == ReNEnumS) { - Next->Op = (Exprn[i] == ReEnumS)?ReEnum:ReNEnum; - for (j = i+1;j < end;j++) { - if (Exprn[j] == ReEnumE) - break; - }; - if (j == end) return false; - Next->un.ChrClass = new SCharData; - memset(Next->un.ChrClass, 0, 32); - for (j = i+1;Exprn[j] != ReEnumE;j++) { - if (Exprn[j+1] == ReFrToEnum) { - for (i = (Exprn[j]&0xFF); i < (Exprn[j+2]&0xFF);i++) - Next->un.ChrClass->SetBit(i&0xFF); - j++; - continue; - }; - switch(Exprn[j]) { - case ReDigit: - for (k = 0x30;k < 0x40;k++) - if (IsDigit((char)k)) - Next->un.ChrClass->SetBit(k); - break; - case ReNDigit: - for (k = 0x30;k < 0x40;k++) - if (!IsDigit((char)k)) - Next->un.ChrClass->SetBit(k); - Next->un.ChrClass->ClearBit(0x0a); - Next->un.ChrClass->ClearBit(0x0d); - break; - case ReWordSymb: - for (k = 0;k < 256;k++) - if (IsWord((char)k)) - Next->un.ChrClass->SetBit(k); - break; - case ReNWordSymb: - for (k = 0;k < 256;k++) - if (!IsWord((char)k)) - Next->un.ChrClass->SetBit(k); - Next->un.ChrClass->ClearBit(0x0a); - Next->un.ChrClass->ClearBit(0x0d); - break; - case ReWSpace: - Next->un.ChrClass->SetBit(0x20); - Next->un.ChrClass->SetBit(0x09); - break; - case ReNWSpace: - memset(Next->un.ChrClass->IArr, 0xFF, 32); - Next->un.ChrClass->ClearBit(0x20); - Next->un.ChrClass->ClearBit(0x09); - Next->un.ChrClass->ClearBit(0x0a); - Next->un.ChrClass->ClearBit(0x0d); - break; - default: - if (!(Exprn[j]&0xFF00)) - Next->un.ChrClass->SetBit(Exprn[j]&0xFF); - break; - }; - }; - Add = true; - i=j; - }; - // ( ... ) - if (Exprn[i] == ReLBrack) { - EnterBr = 1; - for (j = i+1;j < end;j++) { - if (Exprn[j] == ReLBrack) EnterBr++; - if (Exprn[j] == ReRBrack) EnterBr--; - if (!EnterBr) break; - }; - if (EnterBr) return false; - Next->Op = ReBrackets; - Next->un.Param = new SRegInfo; - Next->un.Param->Parent = Next; - Next->s = CurMatch++; - if (CurMatch > MatchesNum) CurMatch = MatchesNum; - if (!SetStructs(Next->un.Param,i+1,j)) return false; - Add = true; - i=j; - }; - if ((Exprn[i]&0xFF00) == ReBkTrace) { - Next->Op = ReBkTrace; - Next->un.Symb = Exprn[i]&0xFF; - Add = true; - }; - if ((Exprn[i]&0xFF00) == ReBkBrack) { - Next->Op = ReBkBrack; - Next->un.Symb = Exprn[i]&0xFF; - Add = true; - }; - if ((Exprn[i]&0xFF00) == ReBehind) { - Next->Op = ReBehind; - Next->s = Exprn[i]&0xFF; - Add = true; - }; - if ((Exprn[i]&0xFF00) == ReNBehind) { - Next->Op = ReNBehind; - Next->s = Exprn[i]&0xFF; - Add = true; - }; - // Chars - if (Exprn[i] >= ReAnyChr && Exprn[i] < ReTemp || Exprn[i] < 0x100) { - Next->Op = ReSymb; - Next->un.Symb = Exprn[i]; - Add = true; - }; - // Next - if (Add && i != end-1) { - Next->Next = new SRegInfo; - Next->Next->Parent = Next->Parent; - Next = Next->Next; - }; - }; - Next = re; - Prev = Prev2 = 0; - while(Next) { - if (Next->Op > ReBlockOps && Next->Op < ReSymbolOps) { - if (!Prev) return false; - if (!Prev2) re = Next; - else Prev2->Next = Next; - //if (Prev->Op > ReBlockOps && Prev->Op < ReSymbolOps) return false; - Prev->Parent = Next; - Prev->Next = 0; - Next->un.Param = Prev; - Prev = Prev2; - }; - Prev2 = Prev; - Prev = Next; - Next = Next->Next; - }; - - return true; -}; - -///////////////////////////////////////////////////////////////// -///////////////////////// Parsing ///////////////////////////// -///////////////////////////////////////////////////////////////// - -bool PosRegExp::CheckSymb(int Symb,bool Inc) { - bool Res; - char ch; - switch(Symb) { - case ReAnyChr: - if (posParse >= posEnd) return false; - ch = CharAt(posParse,param); - Res = ch != '\r' && ch != '\n'; - if (Res && Inc) posParse++; - return Res; - case ReSoL: - if (posStart == posParse) - return true; - ch = CharAt(posParse-1,param); - return ch == '\n' || ch == '\r'; - case ReEoL: - if (posEnd == posParse) - return true; - ch = CharAt(posParse,param); - return ch == '\n' || ch == '\r'; - case ReDigit: - if (posParse >= posEnd) return false; - ch = CharAt(posParse,param); - Res = (ch >= 0x30 && ch <= 0x39); - if (Res && Inc) posParse++; - return Res; - case ReNDigit: - if (posParse >= posEnd) return false; - ch = CharAt(posParse,param); - Res = !(ch >= 0x30 && ch <= 0x39) && ch != '\r' && ch != '\n'; - if (Res && Inc) posParse++; - return Res; - case ReWordSymb: - if (posParse >= posEnd) return false; - Res = IsWord(CharAt(posParse,param)); - if (Res && Inc) posParse++; - return Res; - case ReNWordSymb: - if (posParse >= posEnd) return false; - ch = CharAt(posParse,param); - Res = !IsWord(ch) && ch != '\r' && ch != '\n'; - if (Res && Inc) posParse++; - return Res; - case ReWSpace: - if (posParse >= posEnd) return false; - ch = CharAt(posParse,param); - Res = (ch == 0x20 || ch == '\t'); - if (Res && Inc) posParse++; - return Res; - case ReNWSpace: - if (posParse >= posEnd) return false; - ch = CharAt(posParse,param); - Res = !(ch == 0x20 || ch == '\t') && ch != '\r' && ch != '\n'; - if (Res && Inc) posParse++; - return Res; - case ReUCase: - if (posParse >= posEnd) return false; - Res = IsUpperCase(CharAt(posParse,param)); - if (Res && Inc) posParse++; - return Res; - case ReNUCase: - if (posParse >= posEnd) return false; - Res = IsLowerCase(CharAt(posParse,param)); - if (Res && Inc) posParse++; - return Res; - case ReWBound: - if (posParse >= posEnd) return true; - ch = CharAt(posParse,param); - return IsWord(CharAt(posParse,param)) && (posParse == posStart || !IsWord(CharAt(posParse-1,param))); - case ReNWBound: - if (posParse >= posEnd) return true; - return !IsWord(CharAt(posParse,param)) && IsWord(CharAt(posParse-1,param)); - case RePreNW: - if (posParse >= posEnd) return true; - return (posParse == posStart || !IsWord(CharAt(posParse-1,param))); - case ReStart: - Matches->s[0] = (posParse-posStart); - return true; - case ReEnd: - Matches->e[0] = (posParse-posStart); - return true; - default: - if ((Symb & 0xFF00) || posParse >= posEnd) return false; - if (NoCase) { - if (LowCase(CharAt(posParse,param)) != LowCase((char)Symb&0xFF)) return false; - } else - if (CharAt(posParse,param) != (char)(Symb&0xFF)) return false; - if (Inc) posParse++; - return true; - }; -} - -bool PosRegExp::LowParseRe(PRegInfo &Next) { - PRegInfo OrNext; - int i,match,sv; - int posStr; - - switch(Next->Op) { - case ReSymb: - if (!CheckSymb(Next->un.Symb,true)) return false; - break; - case ReEmpty: - break; - case ReBkTrace: - if (!posBkStr | !BkTrace) return false; - sv = Next->un.Symb; - posStr = posParse; - for (i = BkTrace->s[sv]; i < BkTrace->e[sv]; i++) { - if (CharAt(posStr,param) != CharAt(posBkStr+i,param) || posEnd == posStr) return false; - posStr++; - }; - posParse = posStr; - break; - case ReBkBrack: - sv = Next->un.Symb; - posStr = posParse; - if (Matches->s[sv] == -1 || Matches->e[sv] == -1) return false; - for (i = Matches->s[sv]; i < Matches->e[sv]; i++) { - if (CharAt(posStr,param) != CharAt(posStart+i,param) || posEnd == posStr) return false; - posStr++; - }; - posParse = posStr; - break; - case ReBehind: - sv = Next->s; - posStr = posParse; - posParse -= sv; - if (!LowParse(Next->un.Param)) return false; - posParse = posStr; - break; - case ReNBehind: - sv = Next->s; - posStr = posParse; - posParse -= sv; - if (LowParse(Next->un.Param)) return false; - posParse = posStr; - break; - case ReAhead: - posStr = posParse; - if (!LowParse(Next->un.Param)) return false; - posParse = posStr; - break; - case ReNAhead: - posStr = posParse; - if (LowParse(Next->un.Param)) return false; - posParse = posStr; - break; - case ReEnum: - if (posParse >= posEnd) return false; - if (!Next->un.ChrClass->GetBit(CharAt(posParse,param))) return false; - posParse++; - break; - case ReNEnum: - if (posParse >= posEnd) return false; - if (Next->un.ChrClass->GetBit(CharAt(posParse,param))) return false; - posParse++; - break; - case ReBrackets: - match = Next->s; - sv = posParse-posStart; - posStr = posParse; - if (LowParse(Next->un.Param)) { - if (match || (Matches->s[match] == -1)) - Matches->s[match] = sv; - if (match || (Matches->e[match] == -1)) - Matches->e[match] = posParse-posStart; - return true; - }; - posParse = posStr; - return false; - case ReMul: - posStr = posParse; - while (LowParse(Next->un.Param)); - while(!LowCheckNext(Next) && posStr < posParse) posParse--; - break; - case ReNGMul: - do { - if (LowCheckNext(Next)) break; - } while (LowParse(Next->un.Param)); - break; - case RePlus: - posStr = posParse; - match = false; - while (LowParse(Next->un.Param)) - match = true; - if (!match) return false; - while(!LowCheckNext(Next) && posStr < posParse) posParse--; - break; - case ReNGPlus: - if (!LowParse(Next->un.Param)) return false; - do { - if (LowCheckNext(Next)) break; - } while (LowParse(Next->un.Param)); - break; - case ReQuest: - LowParse(Next->un.Param); - break; - case ReNGQuest: - if (LowCheckNext(Next)) break; - if (!LowParse(Next->un.Param)) return false; - break; - case ReOr: - OrNext = Next; - // posStr = posParse; - if (LowParse(Next->un.Param)) { - while (OrNext && OrNext->Op == ReOr) - OrNext = OrNext->Next; - /*if (!LowCheckNext(OrNext)){ - posParse = posStr; - OrNext = Next; - };*/ - }; - Next = OrNext; - break; - case ReRangeN: - posStr = posParse; - i = 0; - while (LowParse(Next->un.Param)) i++; // ??? - do { - if (i < Next->s) { - posParse = posStr; - return false; - }; - i--; - } while(!LowCheckNext(Next) && posStr < posParse--); - break; - case ReNGRangeN: - posStr = posParse; - i = 0; - while (LowParse(Next->un.Param)) { - i++; - if (i >= Next->s && LowCheckNext(Next)) // ??? - break; - }; - if (i < Next->s) { - posParse = posStr; - return false; - }; - break; - case ReRangeNM: - posStr = posParse; - i = 0; - while (i < Next->s && LowParse(Next->un.Param)) // ??? - i++; - if (i < Next->s) { - posParse = posStr; - return false; - }; - while (i < Next->e && LowParse(Next->un.Param)) // ??? - i++; - - while(!LowCheckNext(Next)) { - i--; - posParse--; - if (i < Next->s) { - posParse = posStr; - return false; - }; - }; - break; - case ReNGRangeNM: - posStr = posParse; - i = 0; - while (i < Next->s && LowParse(Next->un.Param)) // ??? - i++; - if (i < Next->s) { - posParse = posStr; - return false; - }; - while(!LowCheckNext(Next)) { - i++; - if (!LowParse(Next->un.Param) || i > Next->e) { // ??? - posParse = posStr; - return false; - }; - }; - break; - }; - return true; -}; - -bool PosRegExp::LowCheckNext(PRegInfo Re) { - PRegInfo Next; - int tmp = posParse; - Next = Re; - do { - if (Next && Next->Op == ReOr) - while (Next && Next->Op == ReOr) - Next = Next->Next; - if (Next->Next && !LowParse(Next->Next)) { - posParse = tmp; - Ok = false; - return false; - }; - Next = Next->Parent; - } while(Next); - posParse = tmp; - if (Ok != false) Ok = true; - return true; -}; - -bool PosRegExp::LowParse(PRegInfo Re) { - while(Re && posParse <= posEnd) { - if (!LowParseRe(Re)) return false; - if (Re) Re = Re->Next; - }; - return true; -}; - -bool PosRegExp::QuickCheck() { - if (!NoMoves || !FirstChar) - return true; - switch(FirstChar) { - case ReSoL: - if (posParse != posStart) return false; - return true; - case ReWBound: - return IsWord(CharAt(posParse,param)) && (posParse == posStart || !IsWord(CharAt(posParse-1,param))); - default: - if (NoCase && LowCase(CharAt(posParse,param)) != LowCase(FirstChar)) return false; - if (!NoCase && CharAt(posParse,param) != (char)FirstChar) return false; - return true; - }; -}; - -bool PosRegExp::ParseRe(int posStr) { - if (Error) return false; - - posParse = posStr; - if (!QuickCheck()) return false; - - for (int i = 0; i < MatchesNum; i++) - Matches->s[i] = Matches->e[i] = -1; - Matches->CurMatch = CurMatch; - - Ok = -1; - //try{ - do { - if (!LowParse(Info)) { - if (NoMoves) return false; - } else - return true; - posParse = ++posStr; - } while(posParse != posEnd+1); - return false; - //}__except(){ - // return true; - //}; -} -; - -bool PosRegExp::Parse(int posStr,int posSol, int posEol, PMatches Mtch, int Moves) { - if (!this) return false; - - bool s = NoMoves; - if (Moves != -1) NoMoves = Moves!=0; - posStart = posSol; - posEnd = posEol; - Matches = Mtch; - bool r = ParseRe(posStr); - NoMoves = s; - return r; -}; - -bool PosRegExp::Parse(int posStr, int posStop, PMatches Mtch) { - if (!this) return false; - posStart = posStr; - posEnd = posStop; - Matches = Mtch; - return ParseRe(posStr); -}; - -bool PosRegExp::SetNoMoves(bool Moves) { - NoMoves = Moves; - return true; -}; - -bool PosRegExp::SetBkTrace(int posStr,PMatches Trace) { - BkTrace = Trace; - posBkStr = posStr; - return true; -}; - -#define EVAL_MATCHES 16 -#define EVAL_CHUNKSIZE 256 - -#define EVAL_LOWERCASE 1 -#define EVAL_UPPERCASE 2 -#define EVAL_LOWERCASE_NEXT 4 -#define EVAL_UPPERCASE_NEXT 8 - -bool PosRegExp::Evaluate(char *Expr, int posStr, PMatches Mtch, char **Res) { - int length, - newlength, - chunklength, - value, - size, - src, - end; - unsigned flag; - char ch, - *dest, - *pool; - - size = EVAL_CHUNKSIZE; - pool = (char*) malloc (size); - dest = pool; - length = 0; - flag = 0; - while (*Expr) { - switch (ch = *Expr++) { - case '\\': - switch (ch = *Expr++) { - case 'A': - case 'B': - case 'C': - case 'D': - case 'E': - case 'F': - ch -= ('A' - '0'); - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - value = ch - '0'; - if (Mtch->s[value] != -1 && value < EVAL_MATCHES) { - chunklength = Mtch->e[value] - Mtch->s[value]; - if (chunklength) { - newlength = chunklength + length; - if (newlength > size) { - do - size += EVAL_CHUNKSIZE; - while (size < newlength); - pool = (char*) realloc (pool, size); - dest = pool + length; - } - length = newlength; - src = posStr + Mtch->s[value]; - end = posStr + Mtch->e[value]; - if (flag & EVAL_UPPERCASE) { - if (flag & EVAL_LOWERCASE_NEXT) { - *dest++ = tolower (CharAt(src++,param)); - flag &= ~EVAL_LOWERCASE_NEXT; - } - while (src < end) - *dest++ = toupper (CharAt(src++,param)); - } else if (flag & EVAL_LOWERCASE) { - if (flag & EVAL_UPPERCASE_NEXT) { - *dest++ = toupper (CharAt(src++,param)); - flag &= ~EVAL_UPPERCASE_NEXT; - } - while (src < end) - *dest++ = tolower (CharAt(src++,param)); - } else { - if (flag & EVAL_LOWERCASE_NEXT) { - *dest++ = tolower (CharAt(src++,param)); - flag &= ~EVAL_LOWERCASE_NEXT; - } else if (flag & EVAL_UPPERCASE_NEXT) { - *dest++ = toupper (CharAt(src++,param)); - flag &= ~EVAL_UPPERCASE_NEXT; - } - while (src < end) - *dest++ = CharAt(src++,param); - } - } - } else - goto error; - continue; - case '\0': - goto error; - case 'r': - ch = '\r'; - break; - case 'n': - ch = '\n'; - break; - case 'b': - ch = '\b'; - break; - case 'a': - ch = '\a'; - break; - case 't': - ch = '\t'; - break; - case 'U': - flag |= EVAL_UPPERCASE; - continue; - case 'u': - flag |= EVAL_UPPERCASE_NEXT; - continue; - case 'L': - flag |= EVAL_LOWERCASE; - continue; - case 'l': - flag |= EVAL_LOWERCASE_NEXT; - continue; - case 'Q': - case 'q': - flag &= ~(EVAL_UPPERCASE | EVAL_LOWERCASE); - continue; - case 'x': - { - if (!*Expr) - goto error; - value = toupper (*Expr) - '0'; - if (value > 9) - value = value + '0' - 'A' + 10; - if (value > 15) - goto error; - ch = value << 4; - Expr++; - if (!*Expr) - goto error; - value = toupper (*Expr) - '0'; - if (value > 9) - value = value + '0' - 'A' + 10; - if (value > 15) - goto error; - Expr++; - ch |= value; - break; - } - case 'd': - { - if (!*Expr) - goto error; - value = toupper (*Expr) - '0'; - if (value > 9) - goto error; - ch = value * 100; - Expr++; - if (!*Expr) - goto error; - value = toupper (*Expr) - '0'; - if (value > 9) - goto error; - ch += value * 10; - Expr++; - if (!*Expr) - goto error; - value = toupper (*Expr) - '0'; - if (value > 9) - goto error; - ch += value; - Expr++; - break; - } - case 'o': - { - if (!*Expr) - goto error; - value = toupper (*Expr) - '0'; - if (value > 9) - goto error; - ch = value << 6; - Expr++; - if (!*Expr) - goto error; - value = toupper (*Expr) - '0'; - if (value > 9) - goto error; - ch += value << 3; - Expr++; - if (!*Expr) - goto error; - value = toupper (*Expr) - '0'; - if (value > 9) - goto error; - ch |= value; - Expr++; - /* break; */ - } - /* default: - break; */ - } - default: - if (++length > size) { - do - size += EVAL_CHUNKSIZE; - while (size < length); - pool = (char*) realloc (pool, size); - dest = pool + length - 1; - } - if (flag & EVAL_LOWERCASE_NEXT) { - *dest++ = tolower (ch); - flag &= ~EVAL_LOWERCASE_NEXT; - } else if (flag & EVAL_UPPERCASE_NEXT) { - *dest++ = toupper (ch); - flag &= ~EVAL_UPPERCASE_NEXT; - } else if (flag & EVAL_UPPERCASE) - *dest++ = toupper (ch); - else if (flag & EVAL_LOWERCASE) - *dest++ = tolower (ch); - else - *dest++ = ch; - } - } - if (++length > size) { - do - size += EVAL_CHUNKSIZE; - while (size < length); - pool = (char*) realloc (pool, size); - dest = pool + length - 1; - } - *dest = '\0'; - *Res = pool; - return true; -error: - free (pool); - return false; -} |