diff options
author | nyamatongwe <unknown> | 2009-04-12 04:54:37 +0000 |
---|---|---|
committer | nyamatongwe <unknown> | 2009-04-12 04:54:37 +0000 |
commit | 67db09ae242cf51a4aab5fe8cb36bb8e7b11c7dd (patch) | |
tree | fc5fd20761e8b533a011ad30e9449b55a57a2001 /src/LexPython.cxx | |
parent | 2d3cb8a10d0ad8eec42ab352d168628184029ef9 (diff) | |
download | scintilla-mirror-67db09ae242cf51a4aab5fe8cb36bb8e7b11c7dd.tar.gz |
Updated Python lexer to support Python 3 changes. Can choose whether
Unicode, byte string, binary number and new-style octal (0o) literals
are recognised.
From code by Stephan Deibel.
Diffstat (limited to 'src/LexPython.cxx')
-rw-r--r-- | src/LexPython.cxx | 92 |
1 files changed, 76 insertions, 16 deletions
diff --git a/src/LexPython.cxx b/src/LexPython.cxx index b2c2f0558..4513dccf2 100644 --- a/src/LexPython.cxx +++ b/src/LexPython.cxx @@ -24,17 +24,27 @@ using namespace Scintilla; #endif -enum kwType { kwOther, kwClass, kwDef, kwImport }; +/* kwCDef, kwCTypeName only used for Cython */ +enum kwType { kwOther, kwClass, kwDef, kwImport, kwCDef, kwCTypeName }; + static const int indicatorWhitespace = 1; static bool IsPyComment(Accessor &styler, int pos, int len) { return len > 0 && styler[pos] == '#'; } -static bool IsPyStringStart(int ch, int chNext, int chNext2) { +enum literalsAllowed { litNone=0, litU=1, litB=2}; + +static bool IsPyStringTypeChar(int ch, literalsAllowed allowed) { + return + ((allowed & litB) && (ch == 'b' || ch == 'B')) || + ((allowed & litU) && (ch == 'u' || ch == 'U')); +} + +static bool IsPyStringStart(int ch, int chNext, int chNext2, literalsAllowed allowed) { if (ch == '\'' || ch == '"') return true; - if (ch == 'u' || ch == 'U') { + if (IsPyStringTypeChar(ch, allowed)) { if (chNext == '"' || chNext == '\'') return true; if ((chNext == 'r' || chNext == 'R') && (chNext2 == '"' || chNext2 == '\'')) @@ -47,16 +57,16 @@ static bool IsPyStringStart(int ch, int chNext, int chNext2) { } /* Return the state to use for the string starting at i; *nextIndex will be set to the first index following the quote(s) */ -static int GetPyStringState(Accessor &styler, int i, unsigned int *nextIndex) { +static int GetPyStringState(Accessor &styler, int i, unsigned int *nextIndex, literalsAllowed allowed) { char ch = styler.SafeGetCharAt(i); char chNext = styler.SafeGetCharAt(i + 1); - // Advance beyond r, u, or ur prefix, but bail if there are any unexpected chars + // Advance beyond r, u, or ur prefix (or r, b, or br in Python 3.0), but bail if there are any unexpected chars if (ch == 'r' || ch == 'R') { i++; ch = styler.SafeGetCharAt(i); chNext = styler.SafeGetCharAt(i + 1); - } else if (ch == 'u' || ch == 'U') { + } else if (IsPyStringTypeChar(ch, allowed)) { if (chNext == 'r' || chNext == 'R') i += 2; else @@ -96,7 +106,7 @@ static inline bool IsAWordStart(int ch) { } static void ColourisePyDoc(unsigned int startPos, int length, int initStyle, - WordList *keywordlists[], Accessor &styler) { + WordList *keywordlists[], Accessor &styler) { int endPos = startPos + length; @@ -127,6 +137,19 @@ static void ColourisePyDoc(unsigned int startPos, int length, int initStyle, const int whingeLevel = styler.GetPropertyInt("tab.timmy.whinge.level"); + // property lexer.python.literals.binary + // Set to 0 to not recognise Python 3 binary and octal literals: 0b1011 0o712. + bool base2or8Literals = styler.GetPropertyInt("lexer.python.literals.binary", 1) != 0; + + // property lexer.python.strings.u + // Set to 0 to not recognise Python Unicode literals u"x" as used before Python 3. + literalsAllowed allowedLiterals = (styler.GetPropertyInt("lexer.python.strings.u", 1)) ? litU : litNone; + + // property lexer.python.strings.b + // Set to 0 to not recognise Python 3 bytes literals b"x". + if (styler.GetPropertyInt("lexer.python.strings.b", 1)) + allowedLiterals = static_cast<literalsAllowed>(allowedLiterals | litB); + initStyle = initStyle & 31; if (initStyle == SCE_P_STRINGEOL) { initStyle = SCE_P_DEFAULT; @@ -135,7 +158,7 @@ static void ColourisePyDoc(unsigned int startPos, int length, int initStyle, kwType kwLast = kwOther; int spaceFlags = 0; styler.IndentAmount(lineCurrent, &spaceFlags, IsPyComment); - bool hexadecimal = false; + bool base_n_number = false; StyleContext sc(startPos, endPos - startPos, initStyle, styler); @@ -192,7 +215,7 @@ static void ColourisePyDoc(unsigned int startPos, int length, int initStyle, sc.SetState(SCE_P_DEFAULT); } else if (sc.state == SCE_P_NUMBER) { if (!IsAWordChar(sc.ch) && - !(!hexadecimal && ((sc.ch == '+' || sc.ch == '-') && (sc.chPrev == 'e' || sc.chPrev == 'E')))) { + !(!base_n_number && ((sc.ch == '+' || sc.ch == '-') && (sc.chPrev == 'e' || sc.chPrev == 'E')))) { sc.SetState(SCE_P_DEFAULT); } } else if (sc.state == SCE_P_IDENTIFIER) { @@ -208,6 +231,23 @@ static void ColourisePyDoc(unsigned int startPos, int length, int initStyle, style = SCE_P_CLASSNAME; } else if (kwLast == kwDef) { style = SCE_P_DEFNAME; + } else if (kwLast == kwCDef) { + int pos = sc.currentPos; + unsigned char ch = styler.SafeGetCharAt(pos, '\0'); + while (ch != '\0') { + if (ch == '(') { + style = SCE_P_DEFNAME; + break; + } else if (ch == ':') { + style = SCE_P_CLASSNAME; + break; + } else if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') { + pos++; + ch = styler.SafeGetCharAt(pos, '\0'); + } else { + break; + } + } } else if (keywords2.InList(s)) { style = SCE_P_WORD2; } @@ -220,9 +260,13 @@ static void ColourisePyDoc(unsigned int startPos, int length, int initStyle, kwLast = kwDef; else if (0 == strcmp(s, "import")) kwLast = kwImport; - else + else if (0 == strcmp(s, "cdef")) + kwLast = kwCDef; + else if (0 == strcmp(s, "cimport")) + kwLast = kwImport; + else if (kwLast != kwCDef) kwLast = kwOther; - } else { + } else if (kwLast != kwCDef) { kwLast = kwOther; } } @@ -278,6 +322,11 @@ static void ColourisePyDoc(unsigned int startPos, int length, int initStyle, indentGood = true; } + // One cdef line, clear kwLast only at end of line + if (kwLast == kwCDef && sc.atLineEnd) { + kwLast = kwOther; + } + // State exit code may have moved on to end of line if (needEOLCheck && sc.atLineEnd) { lineCurrent++; @@ -290,20 +339,30 @@ static void ColourisePyDoc(unsigned int startPos, int length, int initStyle, if (sc.state == SCE_P_DEFAULT) { if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) { if (sc.ch == '0' && (sc.chNext == 'x' || sc.chNext == 'X')) { - hexadecimal = true; + base_n_number = true; + sc.SetState(SCE_P_NUMBER); + } else if (sc.ch == '0' && + (sc.chNext == 'o' || sc.chNext == 'O' || sc.chNext == 'b' || sc.chNext == 'B')) { + if (base2or8Literals) { + base_n_number = true; + sc.SetState(SCE_P_NUMBER); + } else { + sc.SetState(SCE_P_NUMBER); + sc.ForwardSetState(SCE_P_IDENTIFIER); + } } else { - hexadecimal = false; + base_n_number = false; + sc.SetState(SCE_P_NUMBER); } - sc.SetState(SCE_P_NUMBER); } else if ((isascii(sc.ch) && isoperator(static_cast<char>(sc.ch))) || sc.ch == '`') { sc.SetState(SCE_P_OPERATOR); } else if (sc.ch == '#') { sc.SetState(sc.chNext == '#' ? SCE_P_COMMENTBLOCK : SCE_P_COMMENTLINE); } else if (sc.ch == '@') { sc.SetState(SCE_P_DECORATOR); - } else if (IsPyStringStart(sc.ch, sc.chNext, sc.GetRelative(2))) { + } else if (IsPyStringStart(sc.ch, sc.chNext, sc.GetRelative(2), allowedLiterals)) { unsigned int nextIndex = 0; - sc.SetState(GetPyStringState(styler, sc.currentPos, &nextIndex)); + sc.SetState(GetPyStringState(styler, sc.currentPos, &nextIndex, allowedLiterals)); while (nextIndex > (sc.currentPos + 1) && sc.More()) { sc.Forward(); } @@ -478,3 +537,4 @@ static const char * const pythonWordListDesc[] = { LexerModule lmPython(SCLEX_PYTHON, ColourisePyDoc, "python", FoldPyDoc, pythonWordListDesc); + |