diff options
| -rw-r--r-- | include/PosRegExp.h | 138 | ||||
| -rw-r--r-- | src/PosRegExp.cxx | 1181 | 
2 files changed, 1319 insertions, 0 deletions
| diff --git a/include/PosRegExp.h b/include/PosRegExp.h new file mode 100644 index 000000000..b915b1ed4 --- /dev/null +++ b/include/PosRegExp.h @@ -0,0 +1,138 @@ +#ifndef POSREGEXP_H +#define POSREGEXP_H + +#define MatchesNum 0x10 + +enum EOps +{ +  ReBlockOps = 0x1000, +  ReMul,              // * +  RePlus,             // + +  ReQuest,            // ? +  ReNGMul,            // *? +  ReNGPlus,           // +? +  ReNGQuest,          // ?? +  ReRangeN,           // {n,} +  ReRangeNM,          // {n,m} +  ReNGRangeN,         // {n,}? +  ReNGRangeNM,        // {n,m}? +  ReOr,               // | +  ReBehind  = 0x1100, // ?#n +  ReNBehind = 0x1200, // ?~n +  ReAhead   = 0x1300, // ?= +  ReNAhead  = 0x1400, // ?! + +  ReSymbolOps = 0x2000, +  ReEmpty, +  ReSymb,             // a b \W \s ... +  ReEnum,             // [] +  ReNEnum,            // [^] +  ReBrackets,         // (...) +  ReBkTrace = 0x2100, // \yN +  ReBkBrack = 0x2200 // \N +}; + +enum ESymbols +{ +  ReAnyChr = 0x4000,  // . +  ReSoL,              // ^ +  ReEoL,              // $ +  ReDigit,            // \d +  ReNDigit,           // \D +  ReWordSymb,         // \w +  ReNWordSymb,        // \W +  ReWSpace,           // \s +  ReNWSpace,          // \S +  ReUCase,            // \u +  ReNUCase ,          // \l +  ReWBound,           // \b +  ReNWBound,          // \B +  RePreNW,            // \c +  ReStart,            // \m +  ReEnd,              // \M + +  ReChr    = 0x0      // Char in Lower Byte +}; +enum ETempSymb +{ +  ReTemp = 0x7000, +  ReLBrack, ReRBrack, +  ReEnumS, ReEnumE, ReNEnumS, +  ReRangeS, ReRangeE, ReNGRangeE, ReFrToEnum +}; + +#define BackSlash '\\' + +typedef union SCharData +{ +  int  IArr[8]; +  char CArr[32]; +  void SetBit(unsigned char Bit); +  void ClearBit(unsigned char Bit); +  bool GetBit(unsigned char Bit); +} *PCharData; + +typedef struct SRegInfo +{ +  SRegInfo(); +  ~SRegInfo(); + +  EOps   Op; +  union{ +    SRegInfo *Param; +    int Symb; +    PCharData ChrClass; +  }un; +  int s,e; +  SRegInfo *Parent; +  SRegInfo *Next; +} *PRegInfo; + +typedef struct SMatches +{ +  int s[MatchesNum]; +  int e[MatchesNum]; +  int CurMatch; +} *PMatches; + +typedef class PosRegExp +{ +  PRegInfo Info; +  PMatches BkTrace; +  bool NoCase,Extend,NoMoves; +  bool Error; +  int  *Exprn; +  int  posParse; +  int  posEnd,posStart; +  int  posBkStr; +  int  FirstChar; + +  bool SetExprLow(const char *Expr); +  bool SetStructs(PRegInfo &Info,int st,int end); +  void Optimize(); +  bool CheckSymb(int Symb,bool Inc); +  bool LowParse(PRegInfo Re); +  bool LowParseRe(PRegInfo &Next); +  bool LowCheckNext(PRegInfo Re); +  bool ParseRe(int posStr); +  bool QuickCheck(); +public: +  PMatches Matches; +  int Ok, CurMatch; + +  void *param; +  char (*CharAt)(int pos, void *param); + +  PosRegExp(); +  ~PosRegExp(); + +  bool isok(); +  bool SetNoMoves(bool Moves); +  bool SetBkTrace(int posStr,PMatches Trace); +  bool SetExpr(const char *Expr); +  bool Parse(int posStr, int posStop, PMatches Mtch); +  bool Parse(int posStr,int posSol, int posEol, PMatches Mtch, int Moves = -1); +  bool Evaluate(char *Expr, int posStr, PMatches Mtch, char **Res); +} *PPosRegExp; + +#endif /* POSREGEXP_H */ diff --git a/src/PosRegExp.cxx b/src/PosRegExp.cxx new file mode 100644 index 000000000..ea719b7c2 --- /dev/null +++ b/src/PosRegExp.cxx @@ -0,0 +1,1181 @@ +#include <string.h> +#include <stdio.h> +#include <ctype.h> +#include <malloc.h> + +#include "PosRegExp.h" + +//Up: /[A-Z \x80-\x9f \xf0 ]/x +//Lo: /[a-z \xa0-\xaf \xe0-\xef \xf1 ]/x +//Wd: /[\d _ A-Z a-z \xa0-\xaf \xe0-\xf1 \x80-\x9f]/x +//*   // Dos866 +SCharData UCData  = {0x0, 0x0, 0x7fffffe, 0x0, 0xffffffff, 0x0, 0x0, 0x10000}, +                     LCData  = {0x0, 0x0, 0x0, 0x7fffffe, 0x0, 0xffff, 0x0, 0x2ffff}, +                     WdData  = {0x0, 0x3ff0000, 0x87fffffe, 0x7fffffe, 0xffffffff, 0xffff, 0x0, 0x3ffff}, +                     DigData = {0x0, 0x3ff0000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; +/*/   // cp1251 +SCharData UCData  = {0x0, 0x0, 0x7fffffe, 0x0, 0x0, 0x0, 0xffffffff, 0x0}, +          LCData  = {0x0, 0x0, 0x0, 0x7fffffe, 0x0, 0x0, 0x0, 0xffffffff}, +          WdData  = {0x0, 0x3ff0000, 0x87fffffe, 0x7fffffe, 0x0, 0x0, 0xffffffff, 0xffffffff}, +          DigData = {0x0, 0x3ff0000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; +//*/ + +/////////////////////////////////////////////// + +int GetNumber(int *str,int s,int e) { +	int r = 1, num = 0; +	if (e < s) return -1; +	for(int i = e-1; i >= s; i--) { +		if (str[i] > '9' || str[i] < '0') return -1; +		num += (str[i] - 0x30)*r; +		r *= 10; +	}; +	return num; +	/* +	char tmp[20]; +	double Res; +	  if (e == s) return -1; +	  for (int i = s;i < e;i++) +	    tmp[i-s] = (char)Str[i]; +	  tmp[e-s] = 0; +	  GetNumber(tmp,&Res); +	  return (int)Res; +	*/ +}; + +bool IsDigit(char Symb) { +	return DigData.GetBit(Symb); +}; +bool IsWord(char Symb) { +	return WdData.GetBit(Symb); +}; +bool IsUpperCase(char Symb) { +	return UCData.GetBit(Symb); +}; +bool IsLowerCase(char Symb) { +	return LCData.GetBit(Symb); +}; +char LowCase(char Chr) { +	if (UCData.GetBit(Chr)) +		return Chr+0x20; +	return Chr; +}; + +/////////////////////////////////////////////// + +SRegInfo::SRegInfo() { +	Next = Parent = 0; +	un.Param = 0; +	Op = ReEmpty; +}; +SRegInfo::~SRegInfo() { +	if (Next) delete Next; +	if (un.Param) +		switch(Op) { +		case ReEnum: +		case ReNEnum: +			delete un.ChrClass; +			break; +		default: +			if (Op > ReBlockOps && Op < ReSymbolOps || Op == ReBrackets) +				delete un.Param; +			break; +		}; +}; + +/////////////////////////////////////////////// + +void SCharData::SetBit(unsigned char Bit) { +	int p = Bit/8; +	CArr[p] |= (1 << Bit%8); +}; +void SCharData::ClearBit(unsigned char Bit) { +	int p = Bit/8; +	CArr[p] &= ~(1 << Bit%8); +}; +bool SCharData::GetBit(unsigned char Bit) { +	int p = (unsigned char)Bit/8; +	return (CArr[p] & (1 << Bit%8))!=0; +}; + +///////////////////////////////////////////////////////////////// +//////////////////////  RegExp Class  /////////////////////////// +///////////////////////////////////////////////////////////////// + +PosRegExp::PosRegExp() { +	Info = 0; +	Exprn = 0; +	NoMoves = false; +	Error = true; +	FirstChar = 0; +	CurMatch = 0; +}; +PosRegExp::~PosRegExp() { +	if (Info) delete Info; +}; + +bool PosRegExp::SetExpr(const char *Expr) { +	if (!this) return false; +	Error = true; +	CurMatch = 0; +	if (SetExprLow(Expr)) Error = false; +	return !Error; +}; +bool PosRegExp::isok() { +	return !Error; +}; + + +bool PosRegExp::SetExprLow(const char *Expr) { +	int Len = strlen(Expr); +	bool  Ok = false; +	int i,j,s = 0,pos,tmp; +	int EnterBr = 0,EnterGr = 0,EnterFg = 0; + +	if (Info) delete Info; +	Info = new SRegInfo; +	Exprn = new int[Len]; + +	NoCase = false; +	Extend = false; +	if (Expr[0] == '/') s++; +	else return false; + +	for (i = Len; i > 0 && !Ok;i--) +		if (Expr[i] == '/') { +			Len = i-s; +			Ok = true; +			for (int j = i+1; Expr[j]; j++) { +				if (Expr[j] == 'i') NoCase = true; +				if (Expr[j] == 'x') Extend = true; +			}; +		}; +	if (!Ok) return false; + +	//////////////////////////////// +	for (j = 0,pos = 0; j < Len; j++,pos++) { +		if (Extend && Expr[j+s] == ' ') { +			pos--; +			continue; +		}; + +		Exprn[pos] = (int)(unsigned char)Expr[j+s]; + +		if (Expr[j+s] == BackSlash) { +			switch (Expr[j+s+1]) { +			case 'd': +				Exprn[pos] = ReDigit; +				break; +			case 'D': +				Exprn[pos] = ReNDigit; +				break; +			case 'w': +				Exprn[pos] = ReWordSymb; +				break; +			case 'W': +				Exprn[pos] = ReNWordSymb; +				break; +			case 's': +				Exprn[pos] = ReWSpace; +				break; +			case 'S': +				Exprn[pos] = ReNWSpace; +				break; +			case 'u': +				Exprn[pos] = ReUCase; +				break; +			case 'l': +				Exprn[pos] = ReNUCase; +				break; +			case 't': +				Exprn[pos] = '\t'; +				break; +			case 'n': +				Exprn[pos] = '\n'; +				break; +			case 'r': +				Exprn[pos] = '\r'; +				break; +			case 'b': +				Exprn[pos] = ReWBound; +				break; +			case 'B': +				Exprn[pos] = ReNWBound; +				break; +			case 'c': +				Exprn[pos] = RePreNW; +				break; +			case 'm': +				Exprn[pos] = ReStart; +				break; +			case 'M': +				Exprn[pos] = ReEnd; +				break; +			case 'x': +				tmp = toupper(Expr[j+s+2])-0x30; +				tmp = (tmp>9?tmp-7:tmp)<<4; +				tmp += (toupper(Expr[j+s+3])-0x30)>9?toupper(Expr[j+s+3])-0x37:(toupper(Expr[j+s+3])-0x30); +				Exprn[pos] = tmp; +				j+=2; +				break; +			case 'y': +				tmp = Expr[j+s+2] - 0x30; +				if (tmp >= 0 && tmp <= 9) { +					if (tmp == 1) { +						tmp = 10 + Expr[j+s+3] - 0x30; +						if (tmp >= 10 && tmp <= 19) j++; +						else tmp = 1; +					}; +					Exprn[pos] = ReBkTrace + tmp; +					j++; +					break; +				}; +			default: +				tmp = Expr[j+s+1] - 0x30; +				if (tmp >= 0 && tmp <= 9) { +					if (tmp == 1) { +						tmp = 10 + Expr[j+s+2] - 0x30; +						if (tmp >= 10 && tmp <= 19) j++; +						else tmp = 1; +					}; +					Exprn[pos] = ReBkBrack + tmp; +					break; +				} else +					Exprn[pos] = Expr[j+s+1]; +				break; +			}; +			j++; +			continue; +		}; +		if (Expr[j+s] == ']') { +			Exprn[pos] = ReEnumE; +			if (EnterFg || !EnterGr) return false; +			EnterGr--; +		}; +		if (Expr[j+s] == '-' && EnterGr) Exprn[pos] = ReFrToEnum; + +		if (EnterGr) continue; + +		if (Expr[j+s] == '[' && Expr[j+s+1] == '^') { +			Exprn[pos] = ReNEnumS; +			if (EnterFg) return false; +			EnterGr++; +			j++; +			continue; +		}; +		if (Expr[j+s] == '*' && Expr[j+s+1] == '?') { +			Exprn[pos] = ReNGMul; +			j++; +			continue; +		}; +		if (Expr[j+s] == '+' && Expr[j+s+1] == '?') { +			Exprn[pos] = ReNGPlus; +			j++; +			continue; +		}; +		if (Expr[j+s] == '?' && Expr[j+s+1] == '?') { +			Exprn[pos] = ReNGQuest; +			j++; +			continue; +		}; +		if (Expr[j+s] == '?' && Expr[j+s+1] == '#' && +		        Expr[j+s+2]>='0' && Expr[j+s+2]<='9') { +			Exprn[pos] = ReBehind+Expr[j+s+2]-0x30; +			j+=2; +			continue; +		}; +		if (Expr[j+s] == '?' && Expr[j+s+1] == '~' && +		        Expr[j+s+2]>='0' && Expr[j+s+2]<='9') { +			Exprn[pos] = ReNBehind+Expr[j+s+2]-0x30; +			j+=2; +			continue; +		}; +		if (Expr[j+s] == '?' && Expr[j+s+1] == '=') { +			Exprn[pos] = ReAhead; +			j++; +			continue; +		}; +		if (Expr[j+s] == '?' && Expr[j+s+1] == '!') { +			Exprn[pos] = ReNAhead; +			j++; +			continue; +		}; + +		if (Expr[j+s] == '(') { +			Exprn[pos] = ReLBrack; +			if (EnterFg) return false; +			EnterBr++; +		}; +		if (Expr[j+s] == ')') { +			Exprn[pos] = ReRBrack; +			if (!EnterBr || EnterFg) return false; +			EnterBr--; +		}; +		if (Expr[j+s] == '[') { +			Exprn[pos] = ReEnumS; +			if (EnterFg) return false; +			EnterGr++; +		}; +		if (Expr[j+s] == '{') { +			Exprn[pos] = ReRangeS; +			if (EnterFg) return false; +			EnterFg++; +		}; +		if (Expr[j+s] == '}' && Expr[j+s+1] == '?') { +			Exprn[pos] = ReNGRangeE; +			if (!EnterFg) return false; +			EnterFg--; +			j++; +			continue; +		}; +		if (Expr[j+s] == '}') { +			Exprn[pos] = ReRangeE; +			if (!EnterFg) return false; +			EnterFg--; +		}; + +		if (Expr[j+s] == '^') Exprn[pos] = ReSoL; +		if (Expr[j+s] == '$') Exprn[pos] = ReEoL; +		if (Expr[j+s] == '.') Exprn[pos] = ReAnyChr; +		if (Expr[j+s] == '*') Exprn[pos] = ReMul; +		if (Expr[j+s] == '+') Exprn[pos] = RePlus; +		if (Expr[j+s] == '?') Exprn[pos] = ReQuest; +		if (Expr[j+s] == '|') Exprn[pos] = ReOr; +	}; +	if (EnterGr || EnterBr || EnterFg) return false; + +	Info->Op = ReBrackets; +	Info->un.Param = new SRegInfo; +	Info->s = CurMatch++; + +	if (!SetStructs(Info->un.Param,0,pos)) return false; +	Optimize(); +	delete Exprn; +	return true; +}; + +void PosRegExp::Optimize() { +	PRegInfo Next = Info; +	FirstChar = 0; +	while(Next) { +		if (Next->Op == ReBrackets || Next->Op == RePlus  || Next->Op == ReNGPlus) { +			Next = Next->un.Param; +			continue; +		}; +		if (Next->Op == ReSymb) { +			if (Next->un.Symb & 0xFF00 &&  Next->un.Symb != ReSoL && Next->un.Symb != ReWBound) +				break; +			FirstChar = Next->un.Symb; +			break; +		}; +		break; +	}; +}; + +bool PosRegExp::SetStructs(PRegInfo &re,int start,int end) { +	PRegInfo Next,Prev,Prev2; +	int comma,st,en,ng,i, j,k; +	int EnterBr; +	bool Add; + +	if (end - start < 0) return false; +	Next = re; +	for (i = start; i < end; i++) { +		Add = false; +		// Ops +		if (Exprn[i] > ReBlockOps && Exprn[i] < ReSymbolOps) { +			Next->un.Param = 0; +			Next->Op = (EOps)Exprn[i]; +			Add = true; +		}; +		// {n,m} +		if (Exprn[i] == ReRangeS) { +			st = i; +			en = -1; +			comma = -1; +			ng = 0; +			for (j = i;j < end;j++) { +				if (Exprn[j] == ReNGRangeE) { +					en = j; +					ng = 1; +					break; +				}; +				if (Exprn[j] == ReRangeE) { +					en = j; +					break; +				}; +				if ((char)Exprn[j] == ',') +					comma = j; +			}; +			if (en == -1) return false; +			if (comma == -1) comma = en; +			Next->s = (char)GetNumber(Exprn,st+1,comma); +			if (comma != en) +				Next->e = (char)GetNumber(Exprn,comma+1,en); +			else +				Next->e = Next->s; +			Next->un.Param = 0; +			Next->Op = ng?ReNGRangeNM:ReRangeNM; +			if (en-comma == 1) { +				Next->e = -1; +				Next->Op = ng?ReNGRangeN:ReRangeN; +			}; +			i=j; +			Add = true; +		}; +		// [] [^] +		if (Exprn[i] == ReEnumS || Exprn[i] == ReNEnumS) { +			Next->Op = (Exprn[i] == ReEnumS)?ReEnum:ReNEnum; +			for (j = i+1;j < end;j++) { +				if (Exprn[j] == ReEnumE) +					break; +			}; +			if (j == end) return false; +			Next->un.ChrClass = new SCharData; +			memset(Next->un.ChrClass, 0, 32); +			for (j = i+1;Exprn[j] != ReEnumE;j++) { +				if (Exprn[j+1] == ReFrToEnum) { +					for (i = (Exprn[j]&0xFF); i < (Exprn[j+2]&0xFF);i++) +						Next->un.ChrClass->SetBit(i&0xFF); +					j++; +					continue; +				}; +				switch(Exprn[j]) { +				case ReDigit: +					for (k = 0x30;k < 0x40;k++) +						if (IsDigit((char)k)) +							Next->un.ChrClass->SetBit(k); +					break; +				case ReNDigit: +					for (k = 0x30;k < 0x40;k++) +						if (!IsDigit((char)k)) +							Next->un.ChrClass->SetBit(k); +					Next->un.ChrClass->ClearBit(0x0a); +					Next->un.ChrClass->ClearBit(0x0d); +					break; +				case ReWordSymb: +					for (k = 0;k < 256;k++) +						if (IsWord((char)k)) +							Next->un.ChrClass->SetBit(k); +					break; +				case ReNWordSymb: +					for (k = 0;k < 256;k++) +						if (!IsWord((char)k)) +							Next->un.ChrClass->SetBit(k); +					Next->un.ChrClass->ClearBit(0x0a); +					Next->un.ChrClass->ClearBit(0x0d); +					break; +				case ReWSpace: +					Next->un.ChrClass->SetBit(0x20); +					Next->un.ChrClass->SetBit(0x09); +					break; +				case ReNWSpace: +					memset(Next->un.ChrClass->IArr, 0xFF, 32); +					Next->un.ChrClass->ClearBit(0x20); +					Next->un.ChrClass->ClearBit(0x09); +					Next->un.ChrClass->ClearBit(0x0a); +					Next->un.ChrClass->ClearBit(0x0d); +					break; +				default: +					if (!(Exprn[j]&0xFF00)) +						Next->un.ChrClass->SetBit(Exprn[j]&0xFF); +					break; +				}; +			}; +			Add = true; +			i=j; +		}; +		// ( ... ) +		if (Exprn[i] == ReLBrack) { +			EnterBr = 1; +			for (j = i+1;j < end;j++) { +				if (Exprn[j] == ReLBrack) EnterBr++; +				if (Exprn[j] == ReRBrack) EnterBr--; +				if (!EnterBr) break; +			}; +			if (EnterBr) return false; +			Next->Op = ReBrackets; +			Next->un.Param = new SRegInfo; +			Next->un.Param->Parent = Next; +			Next->s = CurMatch++; +			if (CurMatch > MatchesNum) CurMatch = MatchesNum; +			if (!SetStructs(Next->un.Param,i+1,j)) return false; +			Add = true; +			i=j; +		}; +		if ((Exprn[i]&0xFF00) == ReBkTrace) { +			Next->Op = ReBkTrace; +			Next->un.Symb = Exprn[i]&0xFF; +			Add = true; +		}; +		if ((Exprn[i]&0xFF00) == ReBkBrack) { +			Next->Op = ReBkBrack; +			Next->un.Symb = Exprn[i]&0xFF; +			Add = true; +		}; +		if ((Exprn[i]&0xFF00) == ReBehind) { +			Next->Op = ReBehind; +			Next->s = Exprn[i]&0xFF; +			Add = true; +		}; +		if ((Exprn[i]&0xFF00) == ReNBehind) { +			Next->Op = ReNBehind; +			Next->s = Exprn[i]&0xFF; +			Add = true; +		}; +		// Chars +		if (Exprn[i] >= ReAnyChr && Exprn[i] < ReTemp || Exprn[i] < 0x100) { +			Next->Op = ReSymb; +			Next->un.Symb = Exprn[i]; +			Add = true; +		}; +		// Next +		if (Add && i != end-1) { +			Next->Next = new SRegInfo; +			Next->Next->Parent = Next->Parent; +			Next = Next->Next; +		}; +	}; +	Next = re; +	Prev = Prev2 = 0; +	while(Next) { +		if (Next->Op > ReBlockOps && Next->Op < ReSymbolOps) { +			if (!Prev) return false; +			if (!Prev2) re = Next; +			else Prev2->Next = Next; +			//if (Prev->Op > ReBlockOps && Prev->Op < ReSymbolOps) return false; +			Prev->Parent = Next; +			Prev->Next = 0; +			Next->un.Param = Prev; +			Prev = Prev2; +		}; +		Prev2 = Prev; +		Prev = Next; +		Next = Next->Next; +	}; + +	return true; +}; + +///////////////////////////////////////////////////////////////// +/////////////////////////  Parsing  ///////////////////////////// +///////////////////////////////////////////////////////////////// + +bool PosRegExp::CheckSymb(int Symb,bool Inc) { +	bool Res; +	char ch; +	switch(Symb) { +	case ReAnyChr: +		if (posParse >= posEnd) return false; +		ch = CharAt(posParse,param); +		Res = ch != '\r' && ch != '\n'; +		if (Res && Inc) posParse++; +		return Res; +	case ReSoL: +		if (posStart == posParse) +			return true; +		ch = CharAt(posParse-1,param); +		return ch == '\n' || ch == '\r'; +	case ReEoL: +		if (posEnd == posParse) +			return true; +		ch = CharAt(posParse,param); +		return ch == '\n' || ch == '\r'; +	case ReDigit: +		if (posParse >= posEnd) return false; +		ch = CharAt(posParse,param); +		Res = (ch >= 0x30 && ch <= 0x39); +		if (Res && Inc) posParse++; +		return Res; +	case ReNDigit: +		if (posParse >= posEnd) return false; +		ch = CharAt(posParse,param); +		Res = !(ch >= 0x30 && ch <= 0x39) && ch != '\r' && ch != '\n'; +		if (Res && Inc) posParse++; +		return Res; +	case ReWordSymb: +		if (posParse >= posEnd) return false; +		Res = IsWord(CharAt(posParse,param)); +		if (Res && Inc) posParse++; +		return Res; +	case ReNWordSymb: +		if (posParse >= posEnd) return false; +		ch = CharAt(posParse,param); +		Res = !IsWord(ch) && ch != '\r' && ch != '\n'; +		if (Res && Inc) posParse++; +		return Res; +	case ReWSpace: +		if (posParse >= posEnd) return false; +		ch = CharAt(posParse,param); +		Res = (ch == 0x20 || ch == '\t'); +		if (Res && Inc) posParse++; +		return Res; +	case ReNWSpace: +		if (posParse >= posEnd) return false; +		ch = CharAt(posParse,param); +		Res = !(ch == 0x20 || ch == '\t') && ch != '\r' && ch != '\n'; +		if (Res && Inc) posParse++; +		return Res; +	case ReUCase: +		if (posParse >= posEnd) return false; +		Res = IsUpperCase(CharAt(posParse,param)); +		if (Res && Inc) posParse++; +		return Res; +	case ReNUCase: +		if (posParse >= posEnd) return false; +		Res = IsLowerCase(CharAt(posParse,param)); +		if (Res && Inc) posParse++; +		return Res; +	case ReWBound: +		if (posParse >= posEnd) return true; +		ch = CharAt(posParse,param); +		return IsWord(CharAt(posParse,param)) && (posParse == posStart || !IsWord(CharAt(posParse-1,param))); +	case ReNWBound: +		if (posParse >= posEnd) return true; +		return !IsWord(CharAt(posParse,param)) && IsWord(CharAt(posParse-1,param)); +	case RePreNW: +		if (posParse >= posEnd) return true; +		return (posParse == posStart || !IsWord(CharAt(posParse-1,param))); +	case ReStart: +		Matches->s[0] = (posParse-posStart); +		return true; +	case ReEnd: +		Matches->e[0] = (posParse-posStart); +		return true; +	default: +		if ((Symb & 0xFF00) || posParse >= posEnd) return false; +		if (NoCase) { +			if (LowCase(CharAt(posParse,param)) != LowCase((char)Symb&0xFF)) return false; +		} else +			if (CharAt(posParse,param) != (char)(Symb&0xFF)) return false; +		if (Inc) posParse++; +		return true; +	}; +} + +bool PosRegExp::LowParseRe(PRegInfo &Next) { +	PRegInfo OrNext; +	int i,match,sv; +	int posStr; + +	switch(Next->Op) { +	case ReSymb: +		if (!CheckSymb(Next->un.Symb,true)) return false; +		break; +	case ReEmpty: +		break; +	case ReBkTrace: +		if (!posBkStr | !BkTrace) return false; +		sv = Next->un.Symb; +		posStr = posParse; +		for (i = BkTrace->s[sv]; i < BkTrace->e[sv]; i++) { +			if (CharAt(posStr,param) != CharAt(posBkStr+i,param) || posEnd == posStr) return false; +			posStr++; +		}; +		posParse = posStr; +		break; +	case ReBkBrack: +		sv = Next->un.Symb; +		posStr = posParse; +		if (Matches->s[sv] == -1 || Matches->e[sv] == -1) return false; +		for (i = Matches->s[sv]; i < Matches->e[sv]; i++) { +			if (CharAt(posStr,param) != CharAt(posStart+i,param) || posEnd == posStr) return false; +			posStr++; +		}; +		posParse = posStr; +		break; +	case ReBehind: +		sv = Next->s; +		posStr = posParse; +		posParse -= sv; +		if (!LowParse(Next->un.Param)) return false; +		posParse = posStr; +		break; +	case ReNBehind: +		sv = Next->s; +		posStr = posParse; +		posParse -= sv; +		if (LowParse(Next->un.Param)) return false; +		posParse = posStr; +		break; +	case ReAhead: +		posStr = posParse; +		if (!LowParse(Next->un.Param)) return false; +		posParse = posStr; +		break; +	case ReNAhead: +		posStr = posParse; +		if (LowParse(Next->un.Param)) return false; +		posParse = posStr; +		break; +	case ReEnum: +		if (posParse >= posEnd) return false; +		if (!Next->un.ChrClass->GetBit(CharAt(posParse,param))) return false; +		posParse++; +		break; +	case ReNEnum: +		if (posParse >= posEnd) return false; +		if (Next->un.ChrClass->GetBit(CharAt(posParse,param))) return false; +		posParse++; +		break; +	case ReBrackets: +		match = Next->s; +		sv = posParse-posStart; +		posStr = posParse; +		if (LowParse(Next->un.Param)) { +			if (match || (Matches->s[match] == -1)) +				Matches->s[match] = sv; +			if (match || (Matches->e[match] == -1)) +				Matches->e[match] = posParse-posStart; +			return true; +		}; +		posParse = posStr; +		return false; +	case ReMul: +		posStr = posParse; +		while (LowParse(Next->un.Param)); +		while(!LowCheckNext(Next) && posStr < posParse) posParse--; +		break; +	case ReNGMul: +		do { +			if (LowCheckNext(Next)) break; +		} while (LowParse(Next->un.Param)); +		break; +	case RePlus: +		posStr = posParse; +		match = false; +		while (LowParse(Next->un.Param)) +			match = true; +		if (!match) return false; +		while(!LowCheckNext(Next) && posStr < posParse) posParse--; +		break; +	case ReNGPlus: +		if (!LowParse(Next->un.Param)) return false; +		do { +			if (LowCheckNext(Next)) break; +		} while (LowParse(Next->un.Param)); +		break; +	case ReQuest: +		LowParse(Next->un.Param); +		break; +	case ReNGQuest: +		if (LowCheckNext(Next)) break; +		if (!LowParse(Next->un.Param)) return false; +		break; +	case ReOr: +		OrNext = Next; +		// posStr = posParse; +		if (LowParse(Next->un.Param)) { +			while (OrNext && OrNext->Op == ReOr) +				OrNext = OrNext->Next; +			/*if (!LowCheckNext(OrNext)){ +			  posParse = posStr; +			  OrNext = Next; +		};*/ +		}; +		Next = OrNext; +		break; +	case ReRangeN: +		posStr = posParse; +		i = 0; +		while (LowParse(Next->un.Param)) i++; // ??? +		do { +			if (i < Next->s) { +				posParse = posStr; +				return false; +			}; +			i--; +		} while(!LowCheckNext(Next) && posStr < posParse--); +		break; +	case ReNGRangeN: +		posStr = posParse; +		i = 0; +		while (LowParse(Next->un.Param)) { +			i++; +			if (i >= Next->s && LowCheckNext(Next)) // ??? +				break; +		}; +		if (i < Next->s) { +			posParse = posStr; +			return false; +		}; +		break; +	case ReRangeNM: +		posStr = posParse; +		i = 0; +		while (i < Next->s && LowParse(Next->un.Param)) // ??? +			i++; +		if (i < Next->s) { +			posParse = posStr; +			return false; +		}; +		while (i < Next->e && LowParse(Next->un.Param)) // ??? +			i++; + +		while(!LowCheckNext(Next)) { +			i--; +			posParse--; +			if (i < Next->s) { +				posParse = posStr; +				return false; +			}; +		}; +		break; +	case ReNGRangeNM: +		posStr = posParse; +		i = 0; +		while (i < Next->s && LowParse(Next->un.Param)) // ??? +			i++; +		if (i < Next->s) { +			posParse = posStr; +			return false; +		}; +		while(!LowCheckNext(Next)) { +			i++; +			if (!LowParse(Next->un.Param) || i > Next->e) { // ??? +				posParse = posStr; +				return false; +			}; +		}; +		break; +	}; +	return true; +}; + +bool PosRegExp::LowCheckNext(PRegInfo Re) { +	PRegInfo Next; +	int tmp = posParse; +	Next = Re; +	do { +		if (Next && Next->Op == ReOr) +			while (Next && Next->Op == ReOr) +				Next = Next->Next; +		if (Next->Next && !LowParse(Next->Next)) { +			posParse = tmp; +			Ok = false; +			return false; +		}; +		Next = Next->Parent; +	} while(Next); +	posParse = tmp; +	if (Ok != false) Ok = true; +	return true; +}; + +bool PosRegExp::LowParse(PRegInfo Re) { +	while(Re && posParse <= posEnd) { +		if (!LowParseRe(Re)) return false; +		if (Re) Re = Re->Next; +	}; +	return true; +}; + +bool PosRegExp::QuickCheck() { +	if (!NoMoves || !FirstChar) +		return true; +	switch(FirstChar) { +	case ReSoL: +		if (posParse != posStart) return false; +		return true; +	case ReWBound: +		return IsWord(CharAt(posParse,param)) && (posParse == posStart || !IsWord(CharAt(posParse-1,param))); +	default: +		if (NoCase && LowCase(CharAt(posParse,param)) != LowCase(FirstChar)) return false; +		if (!NoCase && CharAt(posParse,param) != (char)FirstChar) return false; +		return true; +	}; +}; + +bool PosRegExp::ParseRe(int posStr) { +	if (Error) return false; + +	posParse = posStr; +	if (!QuickCheck()) return false; + +	for (int i = 0; i < MatchesNum; i++) +		Matches->s[i] = Matches->e[i] = -1; +	Matches->CurMatch = CurMatch; + +	Ok = -1; +	//try{ +	do { +		if (!LowParse(Info)) { +			if (NoMoves) return false; +		} else +			return true; +		posParse = ++posStr; +	} while(posParse != posEnd+1); +	return false; +	//}__except(){ +	//  return true; +	//}; +} +; + +bool PosRegExp::Parse(int posStr,int posSol, int posEol, PMatches Mtch, int Moves) { +	if (!this) return false; + +	bool s = NoMoves; +	if (Moves != -1) NoMoves = Moves!=0; +	posStart = posSol; +	posEnd   = posEol; +	Matches = Mtch; +	bool r = ParseRe(posStr); +	NoMoves = s; +	return r; +}; + +bool PosRegExp::Parse(int posStr, int posStop, PMatches Mtch) { +	if (!this) return false; +	posStart = posStr; +	posEnd = posStop; +	Matches = Mtch; +	return ParseRe(posStr); +}; + +bool PosRegExp::SetNoMoves(bool Moves) { +	NoMoves = Moves; +	return true; +}; + +bool PosRegExp::SetBkTrace(int posStr,PMatches Trace) { +	BkTrace = Trace; +	posBkStr = posStr; +	return true; +}; + +#define EVAL_MATCHES 16 +#define EVAL_CHUNKSIZE 256 + +#define EVAL_LOWERCASE 1 +#define EVAL_UPPERCASE 2 +#define EVAL_LOWERCASE_NEXT 4 +#define EVAL_UPPERCASE_NEXT 8 + +bool PosRegExp::Evaluate(char *Expr, int posStr, PMatches Mtch, char **Res) { +	int length, +	newlength, +	chunklength, +	value, +	size, +	src, +	end; +	unsigned flag; +	char ch, +	*dest, +	*pool; + +	size = EVAL_CHUNKSIZE; +	pool = (char*) malloc (size); +	dest = pool; +	length = 0; +	flag = 0; +	while (*Expr) { +		switch (ch = *Expr++) { +		case '\\': +			switch (ch = *Expr++) { +			case 'A': +			case 'B': +			case 'C': +			case 'D': +			case 'E': +			case 'F': +				ch -= ('A' - '0'); +			case '0': +			case '1': +			case '2': +			case '3': +			case '4': +			case '5': +			case '6': +			case '7': +			case '8': +			case '9': +				value = ch - '0'; +				if (Mtch->s[value] != -1 && value < EVAL_MATCHES) { +					chunklength = Mtch->e[value] - Mtch->s[value]; +					if (chunklength) { +						newlength = chunklength + length; +						if (newlength > size) { +							do +								size += EVAL_CHUNKSIZE; +							while (size < newlength); +							pool = (char*) realloc (pool, size); +							dest = pool + length; +						} +						length = newlength; +						src = posStr + Mtch->s[value]; +						end = posStr + Mtch->e[value]; +						if (flag & EVAL_UPPERCASE) { +							if (flag & EVAL_LOWERCASE_NEXT) { +								*dest++ = tolower (CharAt(src++,param)); +								flag &= ~EVAL_LOWERCASE_NEXT; +							} +							while (src < end) +								*dest++ = toupper (CharAt(src++,param)); +						} else if (flag & EVAL_LOWERCASE) { +							if (flag & EVAL_UPPERCASE_NEXT) { +								*dest++ = toupper (CharAt(src++,param)); +								flag &= ~EVAL_UPPERCASE_NEXT; +							} +							while (src < end) +								*dest++ = tolower (CharAt(src++,param)); +						} else { +							if (flag & EVAL_LOWERCASE_NEXT) { +								*dest++ = tolower (CharAt(src++,param)); +								flag &= ~EVAL_LOWERCASE_NEXT; +							} else if (flag & EVAL_UPPERCASE_NEXT) { +								*dest++ = toupper (CharAt(src++,param)); +								flag &= ~EVAL_UPPERCASE_NEXT; +							} +							while (src < end) +								*dest++ = CharAt(src++,param); +						} +					} +				} else +					goto error; +				continue; +			case '\0': +				goto error; +			case 'r': +				ch = '\r'; +				break; +			case 'n': +				ch = '\n'; +				break; +			case 'b': +				ch = '\b'; +				break; +			case 'a': +				ch = '\a'; +				break; +			case 't': +				ch = '\t'; +				break; +			case 'U': +				flag |= EVAL_UPPERCASE; +				continue; +			case 'u': +				flag |= EVAL_UPPERCASE_NEXT; +				continue; +			case 'L': +				flag |= EVAL_LOWERCASE; +				continue; +			case 'l': +				flag |= EVAL_LOWERCASE_NEXT; +				continue; +			case 'Q': +			case 'q': +				flag &= ~(EVAL_UPPERCASE | EVAL_LOWERCASE); +				continue; +			case 'x': +				{ +					if (!*Expr) +						goto error; +					value = toupper (*Expr) - '0'; +					if (value > 9) +						value = value + '0' - 'A' + 10; +					if (value > 15) +						goto error; +					ch = value << 4; +					Expr++; +					if (!*Expr) +						goto error; +					value = toupper (*Expr) - '0'; +					if (value > 9) +						value = value + '0' - 'A' + 10; +					if (value > 15) +						goto error; +					Expr++; +					ch |= value; +					break; +				} +			case 'd': +				{ +					if (!*Expr) +						goto error; +					value = toupper (*Expr) - '0'; +					if (value > 9) +						goto error; +					ch = value * 100; +					Expr++; +					if (!*Expr) +						goto error; +					value = toupper (*Expr) - '0'; +					if (value > 9) +						goto error; +					ch += value * 10; +					Expr++; +					if (!*Expr) +						goto error; +					value = toupper (*Expr) - '0'; +					if (value > 9) +						goto error; +					ch += value; +					Expr++; +					break; +				} +			case 'o': +				{ +					if (!*Expr) +						goto error; +					value = toupper (*Expr) - '0'; +					if (value > 9) +						goto error; +					ch = value << 6; +					Expr++; +					if (!*Expr) +						goto error; +					value = toupper (*Expr) - '0'; +					if (value > 9) +						goto error; +					ch += value << 3; +					Expr++; +					if (!*Expr) +						goto error; +					value = toupper (*Expr) - '0'; +					if (value > 9) +						goto error; +					ch |= value; +					Expr++; +					/* break; */ +				} +				/* default: +					break; */ +			} +		default: +			if (++length > size) { +				do +					size += EVAL_CHUNKSIZE; +				while (size < length); +				pool = (char*) realloc (pool, size); +				dest = pool + length - 1; +			} +			if (flag & EVAL_LOWERCASE_NEXT) { +				*dest++ = tolower (ch); +				flag &= ~EVAL_LOWERCASE_NEXT; +			} else if (flag & EVAL_UPPERCASE_NEXT) { +				*dest++ = toupper (ch); +				flag &= ~EVAL_UPPERCASE_NEXT; +			} else if (flag & EVAL_UPPERCASE) +				*dest++ = toupper (ch); +			else if (flag & EVAL_LOWERCASE) +				*dest++ = tolower (ch); +			else +				*dest++ = ch; +		} +	} +	if (++length > size) { +		do +			size += EVAL_CHUNKSIZE; +		while (size < length); +		pool = (char*) realloc (pool, size); +		dest = pool + length - 1; +	} +	*dest = '\0'; +	*Res = pool; +	return true; +error: +	free (pool); +	return false; +} | 
