#include #include #include #include #include "PosRegExp.h" //Up: /[A-Z \x80-\x9f \xf0 ]/x //Lo: /[a-z \xa0-\xaf \xe0-\xef \xf1 ]/x //Wd: /[\d _ A-Z a-z \xa0-\xaf \xe0-\xf1 \x80-\x9f]/x //* // Dos866 SCharData UCData = {0x0, 0x0, 0x7fffffe, 0x0, 0xffffffff, 0x0, 0x0, 0x10000}, LCData = {0x0, 0x0, 0x0, 0x7fffffe, 0x0, 0xffff, 0x0, 0x2ffff}, WdData = {0x0, 0x3ff0000, 0x87fffffe, 0x7fffffe, 0xffffffff, 0xffff, 0x0, 0x3ffff}, DigData = {0x0, 0x3ff0000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; /*/ // cp1251 SCharData UCData = {0x0, 0x0, 0x7fffffe, 0x0, 0x0, 0x0, 0xffffffff, 0x0}, LCData = {0x0, 0x0, 0x0, 0x7fffffe, 0x0, 0x0, 0x0, 0xffffffff}, WdData = {0x0, 0x3ff0000, 0x87fffffe, 0x7fffffe, 0x0, 0x0, 0xffffffff, 0xffffffff}, DigData = {0x0, 0x3ff0000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; //*/ /////////////////////////////////////////////// int GetNumber(int *str,int s,int e) { int r = 1, num = 0; if (e < s) return -1; for(int i = e-1; i >= s; i--) { if (str[i] > '9' || str[i] < '0') return -1; num += (str[i] - 0x30)*r; r *= 10; }; return num; /* char tmp[20]; double Res; if (e == s) return -1; for (int i = s;i < e;i++) tmp[i-s] = (char)Str[i]; tmp[e-s] = 0; GetNumber(tmp,&Res); return (int)Res; */ }; bool IsDigit(char Symb) { return DigData.GetBit(Symb); }; bool IsWord(char Symb) { return WdData.GetBit(Symb); }; bool IsUpperCase(char Symb) { return UCData.GetBit(Symb); }; bool IsLowerCase(char Symb) { return LCData.GetBit(Symb); }; char LowCase(char Chr) { if (UCData.GetBit(Chr)) return Chr+0x20; return Chr; }; /////////////////////////////////////////////// SRegInfo::SRegInfo() { Next = Parent = 0; un.Param = 0; Op = ReEmpty; }; SRegInfo::~SRegInfo() { if (Next) delete Next; if (un.Param) switch(Op) { case ReEnum: case ReNEnum: delete un.ChrClass; break; default: if (Op > ReBlockOps && Op < ReSymbolOps || Op == ReBrackets) delete un.Param; break; }; }; /////////////////////////////////////////////// void SCharData::SetBit(unsigned char Bit) { int p = Bit/8; CArr[p] |= (1 << Bit%8); }; void SCharData::ClearBit(unsigned char Bit) { int p = Bit/8; CArr[p] &= ~(1 << Bit%8); }; bool SCharData::GetBit(unsigned char Bit) { int p = (unsigned char)Bit/8; return (CArr[p] & (1 << Bit%8))!=0; }; ///////////////////////////////////////////////////////////////// ////////////////////// RegExp Class /////////////////////////// ///////////////////////////////////////////////////////////////// PosRegExp::PosRegExp() { Info = 0; Exprn = 0; NoMoves = false; Error = true; FirstChar = 0; CurMatch = 0; }; PosRegExp::~PosRegExp() { if (Info) delete Info; }; bool PosRegExp::SetExpr(const char *Expr) { if (!this) return false; Error = true; CurMatch = 0; if (SetExprLow(Expr)) Error = false; return !Error; }; bool PosRegExp::isok() { return !Error; }; bool PosRegExp::SetExprLow(const char *Expr) { int Len = strlen(Expr); bool Ok = false; int i,j,s = 0,pos,tmp; int EnterBr = 0,EnterGr = 0,EnterFg = 0; if (Info) delete Info; Info = new SRegInfo; Exprn = new int[Len]; NoCase = false; Extend = false; if (Expr[0] == '/') s++; else return false; for (i = Len; i > 0 && !Ok;i--) if (Expr[i] == '/') { Len = i-s; Ok = true; for (int j = i+1; Expr[j]; j++) { if (Expr[j] == 'i') NoCase = true; if (Expr[j] == 'x') Extend = true; }; }; if (!Ok) return false; //////////////////////////////// for (j = 0,pos = 0; j < Len; j++,pos++) { if (Extend && Expr[j+s] == ' ') { pos--; continue; }; Exprn[pos] = (int)(unsigned char)Expr[j+s]; if (Expr[j+s] == BackSlash) { switch (Expr[j+s+1]) { case 'd': Exprn[pos] = ReDigit; break; case 'D': Exprn[pos] = ReNDigit; break; case 'w': Exprn[pos] = ReWordSymb; break; case 'W': Exprn[pos] = ReNWordSymb; break; case 's': Exprn[pos] = ReWSpace; break; case 'S': Exprn[pos] = ReNWSpace; break; case 'u': Exprn[pos] = ReUCase; break; case 'l': Exprn[pos] = ReNUCase; break; case 't': Exprn[pos] = '\t'; break; case 'n': Exprn[pos] = '\n'; break; case 'r': Exprn[pos] = '\r'; break; case 'b': Exprn[pos] = ReWBound; break; case 'B': Exprn[pos] = ReNWBound; break; case 'c': Exprn[pos] = RePreNW; break; case 'm': Exprn[pos] = ReStart; break; case 'M': Exprn[pos] = ReEnd; break; case 'x': tmp = toupper(Expr[j+s+2])-0x30; tmp = (tmp>9?tmp-7:tmp)<<4; tmp += (toupper(Expr[j+s+3])-0x30)>9?toupper(Expr[j+s+3])-0x37:(toupper(Expr[j+s+3])-0x30); Exprn[pos] = tmp; j+=2; break; case 'y': tmp = Expr[j+s+2] - 0x30; if (tmp >= 0 && tmp <= 9) { if (tmp == 1) { tmp = 10 + Expr[j+s+3] - 0x30; if (tmp >= 10 && tmp <= 19) j++; else tmp = 1; }; Exprn[pos] = ReBkTrace + tmp; j++; break; }; default: tmp = Expr[j+s+1] - 0x30; if (tmp >= 0 && tmp <= 9) { if (tmp == 1) { tmp = 10 + Expr[j+s+2] - 0x30; if (tmp >= 10 && tmp <= 19) j++; else tmp = 1; }; Exprn[pos] = ReBkBrack + tmp; break; } else Exprn[pos] = Expr[j+s+1]; break; }; j++; continue; }; if (Expr[j+s] == ']') { Exprn[pos] = ReEnumE; if (EnterFg || !EnterGr) return false; EnterGr--; }; if (Expr[j+s] == '-' && EnterGr) Exprn[pos] = ReFrToEnum; if (EnterGr) continue; if (Expr[j+s] == '[' && Expr[j+s+1] == '^') { Exprn[pos] = ReNEnumS; if (EnterFg) return false; EnterGr++; j++; continue; }; if (Expr[j+s] == '*' && Expr[j+s+1] == '?') { Exprn[pos] = ReNGMul; j++; continue; }; if (Expr[j+s] == '+' && Expr[j+s+1] == '?') { Exprn[pos] = ReNGPlus; j++; continue; }; if (Expr[j+s] == '?' && Expr[j+s+1] == '?') { Exprn[pos] = ReNGQuest; j++; continue; }; if (Expr[j+s] == '?' && Expr[j+s+1] == '#' && Expr[j+s+2]>='0' && Expr[j+s+2]<='9') { Exprn[pos] = ReBehind+Expr[j+s+2]-0x30; j+=2; continue; }; if (Expr[j+s] == '?' && Expr[j+s+1] == '~' && Expr[j+s+2]>='0' && Expr[j+s+2]<='9') { Exprn[pos] = ReNBehind+Expr[j+s+2]-0x30; j+=2; continue; }; if (Expr[j+s] == '?' && Expr[j+s+1] == '=') { Exprn[pos] = ReAhead; j++; continue; }; if (Expr[j+s] == '?' && Expr[j+s+1] == '!') { Exprn[pos] = ReNAhead; j++; continue; }; if (Expr[j+s] == '(') { Exprn[pos] = ReLBrack; if (EnterFg) return false; EnterBr++; }; if (Expr[j+s] == ')') { Exprn[pos] = ReRBrack; if (!EnterBr || EnterFg) return false; EnterBr--; }; if (Expr[j+s] == '[') { Exprn[pos] = ReEnumS; if (EnterFg) return false; EnterGr++; }; if (Expr[j+s] == '{') { Exprn[pos] = ReRangeS; if (EnterFg) return false; EnterFg++; }; if (Expr[j+s] == '}' && Expr[j+s+1] == '?') { Exprn[pos] = ReNGRangeE; if (!EnterFg) return false; EnterFg--; j++; continue; }; if (Expr[j+s] == '}') { Exprn[pos] = ReRangeE; if (!EnterFg) return false; EnterFg--; }; if (Expr[j+s] == '^') Exprn[pos] = ReSoL; if (Expr[j+s] == '$') Exprn[pos] = ReEoL; if (Expr[j+s] == '.') Exprn[pos] = ReAnyChr; if (Expr[j+s] == '*') Exprn[pos] = ReMul; if (Expr[j+s] == '+') Exprn[pos] = RePlus; if (Expr[j+s] == '?') Exprn[pos] = ReQuest; if (Expr[j+s] == '|') Exprn[pos] = ReOr; }; if (EnterGr || EnterBr || EnterFg) return false; Info->Op = ReBrackets; Info->un.Param = new SRegInfo; Info->s = CurMatch++; if (!SetStructs(Info->un.Param,0,pos)) return false; Optimize(); delete Exprn; return true; }; void PosRegExp::Optimize() { PRegInfo Next = Info; FirstChar = 0; while(Next) { if (Next->Op == ReBrackets || Next->Op == RePlus || Next->Op == ReNGPlus) { Next = Next->un.Param; continue; }; if (Next->Op == ReSymb) { if (Next->un.Symb & 0xFF00 && Next->un.Symb != ReSoL && Next->un.Symb != ReWBound) break; FirstChar = Next->un.Symb; break; }; break; }; }; bool PosRegExp::SetStructs(PRegInfo &re,int start,int end) { PRegInfo Next,Prev,Prev2; int comma,st,en,ng,i, j,k; int EnterBr; bool Add; if (end - start < 0) return false; Next = re; for (i = start; i < end; i++) { Add = false; // Ops if (Exprn[i] > ReBlockOps && Exprn[i] < ReSymbolOps) { Next->un.Param = 0; Next->Op = (EOps)Exprn[i]; Add = true; }; // {n,m} if (Exprn[i] == ReRangeS) { st = i; en = -1; comma = -1; ng = 0; for (j = i;j < end;j++) { if (Exprn[j] == ReNGRangeE) { en = j; ng = 1; break; }; if (Exprn[j] == ReRangeE) { en = j; break; }; if ((char)Exprn[j] == ',') comma = j; }; if (en == -1) return false; if (comma == -1) comma = en; Next->s = (char)GetNumber(Exprn,st+1,comma); if (comma != en) Next->e = (char)GetNumber(Exprn,comma+1,en); else Next->e = Next->s; Next->un.Param = 0; Next->Op = ng?ReNGRangeNM:ReRangeNM; if (en-comma == 1) { Next->e = -1; Next->Op = ng?ReNGRangeN:ReRangeN; }; i=j; Add = true; }; // [] [^] if (Exprn[i] == ReEnumS || Exprn[i] == ReNEnumS) { Next->Op = (Exprn[i] == ReEnumS)?ReEnum:ReNEnum; for (j = i+1;j < end;j++) { if (Exprn[j] == ReEnumE) break; }; if (j == end) return false; Next->un.ChrClass = new SCharData; memset(Next->un.ChrClass, 0, 32); for (j = i+1;Exprn[j] != ReEnumE;j++) { if (Exprn[j+1] == ReFrToEnum) { for (i = (Exprn[j]&0xFF); i < (Exprn[j+2]&0xFF);i++) Next->un.ChrClass->SetBit(i&0xFF); j++; continue; }; switch(Exprn[j]) { case ReDigit: for (k = 0x30;k < 0x40;k++) if (IsDigit((char)k)) Next->un.ChrClass->SetBit(k); break; case ReNDigit: for (k = 0x30;k < 0x40;k++) if (!IsDigit((char)k)) Next->un.ChrClass->SetBit(k); Next->un.ChrClass->ClearBit(0x0a); Next->un.ChrClass->ClearBit(0x0d); break; case ReWordSymb: for (k = 0;k < 256;k++) if (IsWord((char)k)) Next->un.ChrClass->SetBit(k); break; case ReNWordSymb: for (k = 0;k < 256;k++) if (!IsWord((char)k)) Next->un.ChrClass->SetBit(k); Next->un.ChrClass->ClearBit(0x0a); Next->un.ChrClass->ClearBit(0x0d); break; case ReWSpace: Next->un.ChrClass->SetBit(0x20); Next->un.ChrClass->SetBit(0x09); break; case ReNWSpace: memset(Next->un.ChrClass->IArr, 0xFF, 32); Next->un.ChrClass->ClearBit(0x20); Next->un.ChrClass->ClearBit(0x09); Next->un.ChrClass->ClearBit(0x0a); Next->un.ChrClass->ClearBit(0x0d); break; default: if (!(Exprn[j]&0xFF00)) Next->un.ChrClass->SetBit(Exprn[j]&0xFF); break; }; }; Add = true; i=j; }; // ( ... ) if (Exprn[i] == ReLBrack) { EnterBr = 1; for (j = i+1;j < end;j++) { if (Exprn[j] == ReLBrack) EnterBr++; if (Exprn[j] == ReRBrack) EnterBr--; if (!EnterBr) break; }; if (EnterBr) return false; Next->Op = ReBrackets; Next->un.Param = new SRegInfo; Next->un.Param->Parent = Next; Next->s = CurMatch++; if (CurMatch > MatchesNum) CurMatch = MatchesNum; if (!SetStructs(Next->un.Param,i+1,j)) return false; Add = true; i=j; }; if ((Exprn[i]&0xFF00) == ReBkTrace) { Next->Op = ReBkTrace; Next->un.Symb = Exprn[i]&0xFF; Add = true; }; if ((Exprn[i]&0xFF00) == ReBkBrack) { Next->Op = ReBkBrack; Next->un.Symb = Exprn[i]&0xFF; Add = true; }; if ((Exprn[i]&0xFF00) == ReBehind) { Next->Op = ReBehind; Next->s = Exprn[i]&0xFF; Add = true; }; if ((Exprn[i]&0xFF00) == ReNBehind) { Next->Op = ReNBehind; Next->s = Exprn[i]&0xFF; Add = true; }; // Chars if (Exprn[i] >= ReAnyChr && Exprn[i] < ReTemp || Exprn[i] < 0x100) { Next->Op = ReSymb; Next->un.Symb = Exprn[i]; Add = true; }; // Next if (Add && i != end-1) { Next->Next = new SRegInfo; Next->Next->Parent = Next->Parent; Next = Next->Next; }; }; Next = re; Prev = Prev2 = 0; while(Next) { if (Next->Op > ReBlockOps && Next->Op < ReSymbolOps) { if (!Prev) return false; if (!Prev2) re = Next; else Prev2->Next = Next; //if (Prev->Op > ReBlockOps && Prev->Op < ReSymbolOps) return false; Prev->Parent = Next; Prev->Next = 0; Next->un.Param = Prev; Prev = Prev2; }; Prev2 = Prev; Prev = Next; Next = Next->Next; }; return true; }; ///////////////////////////////////////////////////////////////// ///////////////////////// Parsing ///////////////////////////// ///////////////////////////////////////////////////////////////// bool PosRegExp::CheckSymb(int Symb,bool Inc) { bool Res; char ch; switch(Symb) { case ReAnyChr: if (posParse >= posEnd) return false; ch = CharAt(posParse,param); Res = ch != '\r' && ch != '\n'; if (Res && Inc) posParse++; return Res; case ReSoL: if (posStart == posParse) return true; ch = CharAt(posParse-1,param); return ch == '\n' || ch == '\r'; case ReEoL: if (posEnd == posParse) return true; ch = CharAt(posParse,param); return ch == '\n' || ch == '\r'; case ReDigit: if (posParse >= posEnd) return false; ch = CharAt(posParse,param); Res = (ch >= 0x30 && ch <= 0x39); if (Res && Inc) posParse++; return Res; case ReNDigit: if (posParse >= posEnd) return false; ch = CharAt(posParse,param); Res = !(ch >= 0x30 && ch <= 0x39) && ch != '\r' && ch != '\n'; if (Res && Inc) posParse++; return Res; case ReWordSymb: if (posParse >= posEnd) return false; Res = IsWord(CharAt(posParse,param)); if (Res && Inc) posParse++; return Res; case ReNWordSymb: if (posParse >= posEnd) return false; ch = CharAt(posParse,param); Res = !IsWord(ch) && ch != '\r' && ch != '\n'; if (Res && Inc) posParse++; return Res; case ReWSpace: if (posParse >= posEnd) return false; ch = CharAt(posParse,param); Res = (ch == 0x20 || ch == '\t'); if (Res && Inc) posParse++; return Res; case ReNWSpace: if (posParse >= posEnd) return false; ch = CharAt(posParse,param); Res = !(ch == 0x20 || ch == '\t') && ch != '\r' && ch != '\n'; if (Res && Inc) posParse++; return Res; case ReUCase: if (posParse >= posEnd) return false; Res = IsUpperCase(CharAt(posParse,param)); if (Res && Inc) posParse++; return Res; case ReNUCase: if (posParse >= posEnd) return false; Res = IsLowerCase(CharAt(posParse,param)); if (Res && Inc) posParse++; return Res; case ReWBound: if (posParse >= posEnd) return true; ch = CharAt(posParse,param); return IsWord(CharAt(posParse,param)) && (posParse == posStart || !IsWord(CharAt(posParse-1,param))); case ReNWBound: if (posParse >= posEnd) return true; return !IsWord(CharAt(posParse,param)) && IsWord(CharAt(posParse-1,param)); case RePreNW: if (posParse >= posEnd) return true; return (posParse == posStart || !IsWord(CharAt(posParse-1,param))); case ReStart: Matches->s[0] = (posParse-posStart); return true; case ReEnd: Matches->e[0] = (posParse-posStart); return true; default: if ((Symb & 0xFF00) || posParse >= posEnd) return false; if (NoCase) { if (LowCase(CharAt(posParse,param)) != LowCase((char)Symb&0xFF)) return false; } else if (CharAt(posParse,param) != (char)(Symb&0xFF)) return false; if (Inc) posParse++; return true; }; } bool PosRegExp::LowParseRe(PRegInfo &Next) { PRegInfo OrNext; int i,match,sv; int posStr; switch(Next->Op) { case ReSymb: if (!CheckSymb(Next->un.Symb,true)) return false; break; case ReEmpty: break; case ReBkTrace: if (!posBkStr | !BkTrace) return false; sv = Next->un.Symb; posStr = posParse; for (i = BkTrace->s[sv]; i < BkTrace->e[sv]; i++) { if (CharAt(posStr,param) != CharAt(posBkStr+i,param) || posEnd == posStr) return false; posStr++; }; posParse = posStr; break; case ReBkBrack: sv = Next->un.Symb; posStr = posParse; if (Matches->s[sv] == -1 || Matches->e[sv] == -1) return false; for (i = Matches->s[sv]; i < Matches->e[sv]; i++) { if (CharAt(posStr,param) != CharAt(posStart+i,param) || posEnd == posStr) return false; posStr++; }; posParse = posStr; break; caseHTTP/1.1 200 OK Connection: keep-alive Connection: keep-alive Content-Disposition: inline; filename="PosRegExp.cxx" Content-Disposition: inline; filename="PosRegExp.cxx" Content-Length: 26769 Content-Length: 26769 Content-Security-Policy: default-src 'none' Content-Security-Policy: default-src 'none' Content-Type: text/plain; charset=UTF-8 Content-Type: text/plain; charset=UTF-8 Date: Sun, 19 Oct 2025 13:24:40 UTC ETag: "ea719b7c288f5e0c0460686ddae0c765cd3b7b1d" ETag: "ea719b7c288f5e0c0460686ddae0c765cd3b7b1d" Expires: Wed, 17 Oct 2035 13:24:40 GMT Expires: Wed, 17 Oct 2035 13:24:40 GMT Last-Modified: Sun, 19 Oct 2025 13:24:40 GMT Last-Modified: Sun, 19 Oct 2025 13:24:40 GMT Server: OpenBSD httpd Server: OpenBSD httpd X-Content-Type-Options: nosniff X-Content-Type-Options: nosniff #include #include #include #include #include "PosRegExp.h" //Up: /[A-Z \x80-\x9f \xf0 ]/x //Lo: /[a-z \xa0-\xaf \xe0-\xef \xf1 ]/x //Wd: /[\d _ A-Z a-z \xa0-\xaf \xe0-\xf1 \x80-\x9f]/x //* // Dos866 SCharData UCData = {0x0, 0x0, 0x7fffffe, 0x0, 0xffffffff, 0x0, 0x0, 0x10000}, LCData = {0x0, 0x0, 0x0, 0x7fffffe, 0x0, 0xffff, 0x0, 0x2ffff}, WdData = {0x0, 0x3ff0000, 0x87fffffe, 0x7fffffe, 0xffffffff, 0xffff, 0x0, 0x3ffff}, DigData = {0x0, 0x3ff0000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; /*/ // cp1251 SCharData UCData = {0x0, 0x0, 0x7fffffe, 0x0, 0x0, 0x0, 0xffffffff, 0x0}, LCData = {0x0, 0x0, 0x0, 0x7fffffe, 0x0, 0x0, 0x0, 0xffffffff}, WdData = {0x0, 0x3ff0000, 0x87fffffe, 0x7fffffe, 0x0, 0x0, 0xffffffff, 0xffffffff}, DigData = {0x0, 0x3ff0000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; //*/ /////////////////////////////////////////////// int GetNumber(int *str,int s,int e) { int r = 1, num = 0; if (e < s) return -1; for(int i = e-1; i >= s; i--) { if (str[i] > '9' || str[i] < '0') return -1; num += (str[i] - 0x30)*r; r *= 10; }; return num; /* char tmp[20]; double Res; if (e == s) return -1; for (int i = s;i < e;i++) tmp[i-s] = (char)Str[i]; tmp[e-s] = 0; GetNumber(tmp,&Res); return (int)Res; */ }; bool IsDigit(char Symb) { return DigData.GetBit(Symb); }; bool IsWord(char Symb) { return WdData.GetBit(Symb); }; bool IsUpperCase(char Symb) { return UCData.GetBit(Symb); }; bool IsLowerCase(char Symb) { return LCData.GetBit(Symb); }; char LowCase(char Chr) { if (UCData.GetBit(Chr)) return Chr+0x20; return Chr; }; /////////////////////////////////////////////// SRegInfo::SRegInfo() { Next = Parent = 0; un.Param = 0; Op = ReEmpty; }; SRegInfo::~SRegInfo() { if (Next) delete Next; if (un.Param) switch(Op) { case ReEnum: case ReNEnum: delete un.ChrClass; break; default: if (Op > ReBlockOps && Op < ReSymbolOps || Op == ReBrackets) delete un.Param; break; }; }; /////////////////////////////////////////////// void SCharData::SetBit(unsigned char Bit) { int p = Bit/8; CArr[p] |= (1 << Bit%8); }; void SCharData::ClearBit(unsigned char Bit) { int p = Bit/8; CArr[p] &= ~(1 << Bit%8); }; bool SCharData::GetBit(unsigned char Bit) { int p = (unsigned char)Bit/8; return (CArr[p] & (1 << Bit%8))!=0; }; ///////////////////////////////////////////////////////////////// ////////////////////// RegExp Class /////////////////////////// ///////////////////////////////////////////////////////////////// PosRegExp::PosRegExp() { Info = 0; Exprn = 0; NoMoves = false; Error = true; FirstChar = 0; CurMatch = 0; }; PosRegExp::~PosRegExp() { if (Info) delete Info; }; bool PosRegExp::SetExpr(const char *Expr) { if (!this) return false; Error = true; CurMatch = 0; if (SetExprLow(Expr)) Error = false; return !Error; }; bool PosRegExp::isok() { return !Error; }; bool PosRegExp::SetExprLow(const char *Expr) { int Len = strlen(Expr); bool Ok = false; int i,j,s = 0,pos,tmp; int EnterBr = 0,EnterGr = 0,EnterFg = 0; if (Info) delete Info; Info = new SRegInfo; Exprn = new int[Len]; NoCase = false; Extend = false; if (Expr[0] == '/') s++; else return false; for (i = Len; i > 0 && !Ok;i--) if (Expr[i] == '/') { Len = i-s; Ok = true; for (int j = i+1; Expr[j]; j++) { if (Expr[j] == 'i') NoCase = true; if (Expr[j] == 'x') Extend = true; }; }; if (!Ok) return false; //////////////////////////////// for (j = 0,pos = 0; j < Len; j++,pos++) { if (Extend && Expr[j+s] == ' ') { pos--; continue; }; Exprn[pos] = (int)(unsigned char)Expr[j+s]; if (Expr[j+s] == BackSlash) { switch (Expr[j+s+1]) { case 'd': Exprn[pos] = ReDigit; break; case 'D': Exprn[pos] = ReNDigit; break; case 'w': Exprn[pos] = ReWordSymb; break; case 'W': Exprn[pos] = ReNWordSymb; break; case 's': Exprn[pos] = ReWSpace; break; case 'S': Exprn[pos] = ReNWSpace; break; case 'u': Exprn[pos] = ReUCase; break; case 'l': Exprn[pos] = ReNUCase; break; case 't': Exprn[pos] = '\t'; break; case 'n': Exprn[pos] = '\n'; break; case 'r': Exprn[pos] = '\r'; break; case 'b': Exprn[pos] = ReWBound; break; case 'B': Exprn[pos] = ReNWBound; break; case 'c': Exprn[pos] = RePreNW; break; case 'm': Exprn[pos] = ReStart; break; case 'M': Exprn[pos] = ReEnd; break; case 'x': tmp = toupper(Expr[j+s+2])-0x30; tmp = (tmp>9?tmp-7:tmp)<<4; tmp += (toupper(Expr[j+s+3])-0x30)>9?toupper(Expr[j+s+3])-0x37:(toupper(Expr[j+s+3])-0x30); Exprn[pos] = tmp; j+=2; break; case 'y': tmp = Expr[j+s+2] - 0x30; if (tmp >= 0 && tmp <= 9) { if (tmp == 1) { tmp = 10 + Expr[j+s+3] - 0x30; if (tmp >= 10 && tmp <= 19) j++; else tmp = 1; }; Exprn[pos] = ReBkTrace + tmp; j++; break; }; default: tmp = Expr[j+s+1] - 0x30; if (tmp >= 0 && tmp <= 9) { if (tmp == 1) { tmp = 10 + Expr[j+s+2] - 0x30; if (tmp >= 10 && tmp <= 19) j++; else tmp = 1; }; Exprn[pos] = ReBkBrack + tmp; break; } else Exprn[pos] = Expr[j+s+1]; break; }; j++; continue; }; if (Expr[j+s] == ']') { Exprn[pos] = ReEnumE; if (EnterFg || !EnterGr) return false; EnterGr--; }; if (Expr[j+s] == '-' && EnterGr) Exprn[pos] = ReFrToEnum; if (EnterGr) continue; if (Expr[j+s] == '[' && Expr[j+s+1] == '^') { Exprn[pos] = ReNEnumS; if (EnterFg) return false; EnterGr++; j++; continue; }; if (Expr[j+s] == '*' && Expr[j+s+1] == '?') { Exprn[pos] = ReNGMul; j++; continue; }; if (Expr[j+s] == '+' && Expr[j+s+1] == '?') { Exprn[pos] = ReNGPlus; j++; continue; }; if (Expr[j+s] == '?' && Expr[j+s+1] == '?') { Exprn[pos] = ReNGQuest; j++; continue; }; if (Expr[j+s] == '?' && Expr[j+s+1] == '#' && Expr[j+s+2]>='0' && Expr[j+s+2]<='9') { Exprn[pos] = ReBehind+Expr[j+s+2]-0x30; j+=2; continue; }; if (Expr[j+s] == '?' && Expr[j+s+1] == '~' && Expr[j+s+2]>='0' && Expr[j+s+2]<='9') { Exprn[pos] = ReNBehind+Expr[j+s+2]-0x30; j+=2; continue; }; if (Expr[j+s] == '?' && Expr[j+s+1] == '=') { Exprn[pos] = ReAhead; j++; continue; }; if (Expr[j+s] == '?' && Expr[j+s+1] == '!') { Exprn[pos] = ReNAhead; j++; continue; }; if (Expr[j+s] == '(') { Exprn[pos] = ReLBrack; if (EnterFg) return false; EnterBr++; }; if (Expr[j+s] == ')') { Exprn[pos] = ReRBrack; if (!EnterBr || EnterFg) return false; EnterBr--; }; if (Expr[j+s] == '[') { Exprn[pos] = ReEnumS; if (EnterFg) return false; EnterGr++; }; if (Expr[j+s] == '{') { Exprn[pos] = ReRangeS; if (EnterFg) return false; EnterFg++; }; if (Expr[j+s] == '}' && Expr[j+s+1] == '?') { Exprn[pos] = ReNGRangeE; if (!EnterFg) return false; EnterFg--; j++; continue; }; if (Expr[j+s] == '}') { Exprn[pos] = ReRangeE; if (!EnterFg) return false; EnterFg--; }; if (Expr[j+s] == '^') Exprn[pos] = ReSoL; if (Expr[j+s] == '$') Exprn[pos] = ReEoL; if (Expr[j+s] == '.') Exprn[pos] = ReAnyChr; if (Expr[j+s] == '*') Exprn[pos] = ReMul; if (Expr[j+s] == '+') Exprn[pos] = RePlus; if (Expr[j+s] == '?') Exprn[pos] = ReQuest; if (Expr[j+s] == '|') Exprn[pos] = ReOr; }; if (EnterGr || EnterBr || EnterFg) return false; Info->Op = ReBrackets; Info->un.Param = new SRegInfo; Info->s = CurMatch++; if (!SetStructs(Info->un.Param,0,pos)) return false; Optimize(); delete Exprn; return true; }; void PosRegExp::Optimize() { PRegInfo Next = Info; FirstChar = 0; while(Next) { if (Next->Op == ReBrackets || Next->Op == RePlus || Next->Op == ReNGPlus) { Next = Next->un.Param; continue; }; if (Next->Op == ReSymb) { if (Next->un.Symb & 0xFF00 && Next->un.Symb != ReSoL && Next->un.Symb != ReWBound) break; FirstChar = Next->un.Symb; break; }; break; }; }; bool PosRegExp::SetStructs(PRegInfo &re,int start,int end) { PRegInfo Next,Prev,Prev2; int comma,st,en,ng,i, j,k; int EnterBr; bool Add; if (end - start < 0) return false; Next = re; for (i = start; i < end; i++) { Add = false; // Ops if (Exprn[i] > ReBlockOps && Exprn[i] < ReSymbolOps) { Next->un.Param = 0; Next->Op = (EOps)Exprn[i]; Add = true; }; // {n,m} if (Exprn[i] == ReRangeS) { st = i; en = -1; comma = -1; ng = 0; for (j = i;j < end;j++) { if (Exprn[j] == ReNGRangeE) { en = j; ng = 1; break; }; if (Exprn[j] == ReRangeE) { en = j; break; }; if ((char)Exprn[j] == ',') comma = j; }; if (en == -1) return false; if (comma == -1) comma = en; Next->s = (char)GetNumber(Exprn,st+1,comma); if (comma != en) Next->e = (char)GetNumber(Exprn,comma+1,en); else Next->e = Next->s; Next->un.Param = 0; Next->Op = ng?ReNGRangeNM:ReRangeNM; if (en-comma == 1) { Next->e = -1; Next->Op = ng?ReNGRangeN:ReRangeN; }; i=j; Add = true; }; // [] [^] if (Exprn[i] == ReEnumS || Exprn[i] == ReNEnumS) { Next->Op = (Exprn[i] == ReEnumS)?ReEnum:ReNEnum; for (j = i+1;j < end;j++) { if (Exprn[j] == ReEnumE) break; }; if (j == end) return false; Next->un.ChrClass = new SCharData; memset(Next->u