diff options
| author | nyamatongwe <unknown> | 2009-04-12 04:54:37 +0000 | 
|---|---|---|
| committer | nyamatongwe <unknown> | 2009-04-12 04:54:37 +0000 | 
| commit | 67db09ae242cf51a4aab5fe8cb36bb8e7b11c7dd (patch) | |
| tree | fc5fd20761e8b533a011ad30e9449b55a57a2001 /src | |
| parent | 2d3cb8a10d0ad8eec42ab352d168628184029ef9 (diff) | |
| download | scintilla-mirror-67db09ae242cf51a4aab5fe8cb36bb8e7b11c7dd.tar.gz | |
Updated Python lexer to support Python 3 changes. Can choose whether
Unicode, byte string, binary number and new-style octal (0o) literals
are recognised.
From code by Stephan Deibel.
Diffstat (limited to 'src')
| -rw-r--r-- | src/LexPython.cxx | 92 | 
1 files changed, 76 insertions, 16 deletions
| diff --git a/src/LexPython.cxx b/src/LexPython.cxx index b2c2f0558..4513dccf2 100644 --- a/src/LexPython.cxx +++ b/src/LexPython.cxx @@ -24,17 +24,27 @@  using namespace Scintilla;  #endif -enum kwType { kwOther, kwClass, kwDef, kwImport }; +/* kwCDef, kwCTypeName only used for Cython */ +enum kwType { kwOther, kwClass, kwDef, kwImport, kwCDef, kwCTypeName }; +  static const int indicatorWhitespace = 1;  static bool IsPyComment(Accessor &styler, int pos, int len) {  	return len > 0 && styler[pos] == '#';  } -static bool IsPyStringStart(int ch, int chNext, int chNext2) { +enum literalsAllowed { litNone=0, litU=1, litB=2}; + +static bool IsPyStringTypeChar(int ch, literalsAllowed allowed) { +	return  +		((allowed & litB) && (ch == 'b' || ch == 'B')) || +		((allowed & litU) && (ch == 'u' || ch == 'U')); +} + +static bool IsPyStringStart(int ch, int chNext, int chNext2, literalsAllowed allowed) {  	if (ch == '\'' || ch == '"')  		return true; -	if (ch == 'u' || ch == 'U') { +	if (IsPyStringTypeChar(ch, allowed)) {  		if (chNext == '"' || chNext == '\'')  			return true;  		if ((chNext == 'r' || chNext == 'R') && (chNext2 == '"' || chNext2 == '\'')) @@ -47,16 +57,16 @@ static bool IsPyStringStart(int ch, int chNext, int chNext2) {  }  /* Return the state to use for the string starting at i; *nextIndex will be set to the first index following the quote(s) */ -static int GetPyStringState(Accessor &styler, int i, unsigned int *nextIndex) { +static int GetPyStringState(Accessor &styler, int i, unsigned int *nextIndex, literalsAllowed allowed) {  	char ch = styler.SafeGetCharAt(i);  	char chNext = styler.SafeGetCharAt(i + 1); -	// Advance beyond r, u, or ur prefix, but bail if there are any unexpected chars +	// Advance beyond r, u, or ur prefix (or r, b, or br in Python 3.0), but bail if there are any unexpected chars  	if (ch == 'r' || ch == 'R') {  		i++;  		ch = styler.SafeGetCharAt(i);  		chNext = styler.SafeGetCharAt(i + 1); -	} else if (ch == 'u' || ch == 'U') { +	} else if (IsPyStringTypeChar(ch, allowed)) {  		if (chNext == 'r' || chNext == 'R')  			i += 2;  		else @@ -96,7 +106,7 @@ static inline bool IsAWordStart(int ch) {  }  static void ColourisePyDoc(unsigned int startPos, int length, int initStyle, -                           WordList *keywordlists[], Accessor &styler) { +        WordList *keywordlists[], Accessor &styler) {  	int endPos = startPos + length; @@ -127,6 +137,19 @@ static void ColourisePyDoc(unsigned int startPos, int length, int initStyle,  	const int whingeLevel = styler.GetPropertyInt("tab.timmy.whinge.level"); +	// property lexer.python.literals.binary +	//	Set to 0 to not recognise Python 3 binary and octal literals: 0b1011 0o712. +	bool base2or8Literals = styler.GetPropertyInt("lexer.python.literals.binary", 1) != 0; + +	// property lexer.python.strings.u +	//	Set to 0 to not recognise Python Unicode literals u"x" as used before Python 3. +	literalsAllowed allowedLiterals = (styler.GetPropertyInt("lexer.python.strings.u", 1)) ? litU : litNone; + +	// property lexer.python.strings.b +	//	Set to 0 to not recognise Python 3 bytes literals b"x". +	if (styler.GetPropertyInt("lexer.python.strings.b", 1)) +		allowedLiterals = static_cast<literalsAllowed>(allowedLiterals | litB); +  	initStyle = initStyle & 31;  	if (initStyle == SCE_P_STRINGEOL) {  		initStyle = SCE_P_DEFAULT; @@ -135,7 +158,7 @@ static void ColourisePyDoc(unsigned int startPos, int length, int initStyle,  	kwType kwLast = kwOther;  	int spaceFlags = 0;  	styler.IndentAmount(lineCurrent, &spaceFlags, IsPyComment); -	bool hexadecimal = false; +	bool base_n_number = false;  	StyleContext sc(startPos, endPos - startPos, initStyle, styler); @@ -192,7 +215,7 @@ static void ColourisePyDoc(unsigned int startPos, int length, int initStyle,  			sc.SetState(SCE_P_DEFAULT);  		} else if (sc.state == SCE_P_NUMBER) {  			if (!IsAWordChar(sc.ch) && -			        !(!hexadecimal && ((sc.ch == '+' || sc.ch == '-') && (sc.chPrev == 'e' || sc.chPrev == 'E')))) { +			        !(!base_n_number && ((sc.ch == '+' || sc.ch == '-') && (sc.chPrev == 'e' || sc.chPrev == 'E')))) {  				sc.SetState(SCE_P_DEFAULT);  			}  		} else if (sc.state == SCE_P_IDENTIFIER) { @@ -208,6 +231,23 @@ static void ColourisePyDoc(unsigned int startPos, int length, int initStyle,  					style = SCE_P_CLASSNAME;  				} else if (kwLast == kwDef) {  					style = SCE_P_DEFNAME; +				} else if (kwLast == kwCDef) { +					int pos = sc.currentPos; +					unsigned char ch = styler.SafeGetCharAt(pos, '\0'); +					while (ch != '\0') { +						if (ch == '(') { +							style = SCE_P_DEFNAME; +							break; +						} else if (ch == ':') { +							style = SCE_P_CLASSNAME; +							break; +						} else if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') { +							pos++; +							ch = styler.SafeGetCharAt(pos, '\0'); +						} else { +							break; +						} +					}  				} else if (keywords2.InList(s)) {  					style = SCE_P_WORD2;  				} @@ -220,9 +260,13 @@ static void ColourisePyDoc(unsigned int startPos, int length, int initStyle,  						kwLast = kwDef;  					else if (0 == strcmp(s, "import"))  						kwLast = kwImport; -					else +					else if (0 == strcmp(s, "cdef")) +						kwLast = kwCDef; +					else if (0 == strcmp(s, "cimport")) +						kwLast = kwImport; +					else if (kwLast != kwCDef)  						kwLast = kwOther; -				} else { +				} else if (kwLast != kwCDef) {  					kwLast = kwOther;  				}  			} @@ -278,6 +322,11 @@ static void ColourisePyDoc(unsigned int startPos, int length, int initStyle,  			indentGood = true;  		} +		// One cdef line, clear kwLast only at end of line +		if (kwLast == kwCDef && sc.atLineEnd) { +			kwLast = kwOther; +		} +  		// State exit code may have moved on to end of line  		if (needEOLCheck && sc.atLineEnd) {  			lineCurrent++; @@ -290,20 +339,30 @@ static void ColourisePyDoc(unsigned int startPos, int length, int initStyle,  		if (sc.state == SCE_P_DEFAULT) {  			if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {  				if (sc.ch == '0' && (sc.chNext == 'x' || sc.chNext == 'X')) { -					hexadecimal = true; +					base_n_number = true; +					sc.SetState(SCE_P_NUMBER); +				} else if (sc.ch == '0' &&  +					(sc.chNext == 'o' || sc.chNext == 'O' || sc.chNext == 'b' || sc.chNext == 'B')) { +					if (base2or8Literals) { +						base_n_number = true; +						sc.SetState(SCE_P_NUMBER); +					} else { +						sc.SetState(SCE_P_NUMBER); +						sc.ForwardSetState(SCE_P_IDENTIFIER); +					}  				} else { -					hexadecimal = false; +					base_n_number = false; +					sc.SetState(SCE_P_NUMBER);  				} -				sc.SetState(SCE_P_NUMBER);  			} else if ((isascii(sc.ch) && isoperator(static_cast<char>(sc.ch))) || sc.ch == '`') {  				sc.SetState(SCE_P_OPERATOR);  			} else if (sc.ch == '#') {  				sc.SetState(sc.chNext == '#' ? SCE_P_COMMENTBLOCK : SCE_P_COMMENTLINE);  			} else if (sc.ch == '@') {  				sc.SetState(SCE_P_DECORATOR); -			} else if (IsPyStringStart(sc.ch, sc.chNext, sc.GetRelative(2))) { +			} else if (IsPyStringStart(sc.ch, sc.chNext, sc.GetRelative(2), allowedLiterals)) {  				unsigned int nextIndex = 0; -				sc.SetState(GetPyStringState(styler, sc.currentPos, &nextIndex)); +				sc.SetState(GetPyStringState(styler, sc.currentPos, &nextIndex, allowedLiterals));  				while (nextIndex > (sc.currentPos + 1) && sc.More()) {  					sc.Forward();  				} @@ -478,3 +537,4 @@ static const char * const pythonWordListDesc[] = {  LexerModule lmPython(SCLEX_PYTHON, ColourisePyDoc, "python", FoldPyDoc,  					 pythonWordListDesc); + | 
