Bug [#1848]. Support Python f-strings with new styles.

author: John Ehresman <unknown> 2017-02-12 09:15:08 +1100
committer: John Ehresman <unknown> 2017-02-12 09:15:08 +1100
commit: c0df83f637c42982662aaa802f691abb3d529a2e (patch)
tree: 73c32bc0eaf0c8ad54cc4d19cddb160fd34d2ea0 /lexers/LexPython.cxx
parent: 5bd32babfe8721de32026d78378848f90fe6d68c (diff)
download: scintilla-mirror-c0df83f637c42982662aaa802f691abb3d529a2e.tar.gz
1 files changed, 68 insertions, 17 deletions
diff --git a/lexers/LexPython.cxx b/lexers/LexPython.cxx
index 6a2edde11..b20d3ad25 100644
--- a/lexers/LexPython.cxx
+++ b/lexers/LexPython.cxx
@@ -20,6 +20,7 @@
 #include "Scintilla.h"
 #include "SciLexer.h"
 
+#include "StringCopy.h"
 #include "WordList.h"
 #include "LexAccessor.h"
 #include "Accessor.h"
@@ -70,10 +71,42 @@ bool IsPyStringStart(int ch, int chNext, int chNext2, literalsAllowed allowed) {
 	return false;
 }
 
+bool IsPyFStringState(int st) {
+	return ((st == SCE_P_FCHARACTER) || (st == SCE_P_FSTRING) ||
+		(st == SCE_P_FTRIPLE) || (st == SCE_P_FTRIPLEDOUBLE));
+}
+
+bool IsPySingleQuoteStringState(int st) {
+	return ((st == SCE_P_CHARACTER) || (st == SCE_P_STRING) ||
+		(st == SCE_P_FCHARACTER) || (st == SCE_P_FSTRING));
+}
+
+bool IsPyTripleQuoteStringState(int st) {
+	return ((st == SCE_P_TRIPLE) || (st == SCE_P_TRIPLEDOUBLE) ||
+		(st == SCE_P_FTRIPLE) || (st == SCE_P_FTRIPLEDOUBLE));
+}
+
+void PushStateToStack(int state, int *stack, int stackSize) {
+	for (int i = stackSize-1; i > 0; i--) {
+		stack[i] = stack[i-1];
+	}
+	stack[0] = state;
+}
+
+int PopFromStateStack(int *stack, int stackSize) {
+	int top = stack[0];
+	for (int i = 0; i < stackSize - 1; i++) {
+		stack[i] = stack[i+1];
+	}
+	stack[stackSize-1] = 0;
+	return top;
+}
+
 /* Return the state to use for the string starting at i; *nextIndex will be set to the first index following the quote(s) */
 int GetPyStringState(Accessor &styler, Sci_Position i, Sci_PositionU *nextIndex, literalsAllowed allowed) {
 	char ch = styler.SafeGetCharAt(i);
 	char chNext = styler.SafeGetCharAt(i + 1);
+	int firstIsF = (ch == 'f' || ch == 'F');
 
 	// Advance beyond r, u, or ur prefix (or r, b, or br in Python 2.7+ and r, f, or fr in Python 3.6+), but bail if there are any unexpected chars
 	if (ch == 'r' || ch == 'R') {
@@ -98,16 +131,16 @@ int GetPyStringState(Accessor &styler, Sci_Position i, Sci_PositionU *nextIndex,
 		*nextIndex = i + 3;
 
 		if (ch == '"')
-			return SCE_P_TRIPLEDOUBLE;
+			return (firstIsF ? SCE_P_FTRIPLEDOUBLE : SCE_P_TRIPLEDOUBLE);
 		else
-			return SCE_P_TRIPLE;
+			return (firstIsF ? SCE_P_FTRIPLE : SCE_P_TRIPLE);
 	} else {
 		*nextIndex = i + 1;
 
 		if (ch == '"')
-			return SCE_P_STRING;
+			return (firstIsF ? SCE_P_FSTRING : SCE_P_STRING);
 		else
-			return SCE_P_CHARACTER;
+			return (firstIsF ? SCE_P_FCHARACTER : SCE_P_CHARACTER);
 	}
 }
 
@@ -126,12 +159,12 @@ inline bool IsAWordChar(int ch, bool unicodeIdentifiers) {
 }
 
 inline bool IsAWordStart(int ch, bool unicodeIdentifiers) {
-	if (ch < 0x80) 
+	if (ch < 0x80)
 		return (isalpha(ch) || ch == '_');
 
 	if (!unicodeIdentifiers)
 		return false;
-	
+
 	// Approximation, Python uses the XID_Start set from unicode data
 	// see http://unicode.org/reports/tr31/
 	CharacterCategory c = CategoriseCharacter(ch);
@@ -352,6 +385,9 @@ Sci_Position SCI_METHOD LexerPython::WordListSet(int n, const char *wl) {
 void SCI_METHOD LexerPython::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
 	Accessor styler(pAccess, NULL);
 
+	// Track whether in f-string exp; an array is used for a stack to
+	// handle nested f-strings such as f"""{f'''{f"{f'{1}'}"}'''}"""
+	int fstringStateStack[4] = { 0, };
 	const Sci_Position endPos = startPos + length;
 
 	// Backtrack to previous line in case need to fix its tab whinging
@@ -417,15 +453,14 @@ void SCI_METHOD LexerPython::Lex(Sci_PositionU startPos, Sci_Position length, in
 		}
 
 		if (sc.atLineEnd) {
-			if ((sc.state == SCE_P_DEFAULT) ||
-			        (sc.state == SCE_P_TRIPLE) ||
-			        (sc.state == SCE_P_TRIPLEDOUBLE)) {
+			if ((sc.state == SCE_P_DEFAULT)
+				|| IsPyTripleQuoteStringState(sc.state)) {
 				// Perform colourisation of white space and triple quoted strings at end of each line to allow
 				// tab marking to work inside white space and triple quoted strings
 				sc.SetState(sc.state);
 			}
 			lineCurrent++;
-			if ((sc.state == SCE_P_STRING) || (sc.state == SCE_P_CHARACTER)) {
+			if (IsPySingleQuoteStringState(sc.state)) {
 				if (inContinuedString || options.stringsOverNewline) {
 					inContinuedString = false;
 				} else {
@@ -439,7 +474,7 @@ void SCI_METHOD LexerPython::Lex(Sci_PositionU startPos, Sci_Position length, in
 
 		bool needEOLCheck = false;
 
-		// Check for a state end
+
 		if (sc.state == SCE_P_OPERATOR) {
 			kwLast = kwOther;
 			sc.SetState(SCE_P_DEFAULT);
@@ -524,7 +559,7 @@ void SCI_METHOD LexerPython::Lex(Sci_PositionU startPos, Sci_Position length, in
 			if (!IsAWordStart(sc.ch, options.unicodeIdentifiers)) {
 				sc.SetState(SCE_P_DEFAULT);
 			}
-		} else if ((sc.state == SCE_P_STRING) || (sc.state == SCE_P_CHARACTER)) {
+		} else if (IsPySingleQuoteStringState(sc.state)) {
 			if (sc.ch == '\\') {
 				if ((sc.chNext == '\r') && (sc.GetRelative(2) == '\n')) {
 					sc.Forward();
@@ -535,14 +570,16 @@ void SCI_METHOD LexerPython::Lex(Sci_PositionU startPos, Sci_Position length, in
 					// Don't roll over the newline.
 					sc.Forward();
 				}
-			} else if ((sc.state == SCE_P_STRING) && (sc.ch == '\"')) {
+			} else if (((sc.state == SCE_P_STRING || sc.state == SCE_P_FSTRING))
+				   && (sc.ch == '\"')) {
 				sc.ForwardSetState(SCE_P_DEFAULT);
 				needEOLCheck = true;
-			} else if ((sc.state == SCE_P_CHARACTER) && (sc.ch == '\'')) {
+			} else if (((sc.state == SCE_P_CHARACTER) || (sc.state == SCE_P_FCHARACTER))
+				   && (sc.ch == '\'')) {
 				sc.ForwardSetState(SCE_P_DEFAULT);
 				needEOLCheck = true;
 			}
-		} else if (sc.state == SCE_P_TRIPLE) {
+		} else if ((sc.state == SCE_P_TRIPLE) || (sc.state == SCE_P_FTRIPLE)) {
 			if (sc.ch == '\\') {
 				sc.Forward();
 			} else if (sc.Match("\'\'\'")) {
@@ -551,7 +588,7 @@ void SCI_METHOD LexerPython::Lex(Sci_PositionU startPos, Sci_Position length, in
 				sc.ForwardSetState(SCE_P_DEFAULT);
 				needEOLCheck = true;
 			}
-		} else if (sc.state == SCE_P_TRIPLEDOUBLE) {
+		} else if ((sc.state == SCE_P_TRIPLEDOUBLE) || (sc.state == SCE_P_FTRIPLEDOUBLE)) {
 			if (sc.ch == '\\') {
 				sc.Forward();
 			} else if (sc.Match("\"\"\"")) {
@@ -561,6 +598,18 @@ void SCI_METHOD LexerPython::Lex(Sci_PositionU startPos, Sci_Position length, in
 				needEOLCheck = true;
 			}
 		}
+		
+		// Note if used and not if else because string states also match
+		// some of the above clauses
+		if (IsPyFStringState(sc.state) && sc.ch == '{') {
+			if (sc.chNext == '{') {
+				sc.Forward();
+			} else {
+				PushStateToStack(sc.state, fstringStateStack, ELEMENTS(fstringStateStack));
+				sc.ForwardSetState(SCE_P_DEFAULT);
+			}
+		}
+		// End of code to find the end of a state
 
 		if (!indentGood && !IsASpaceOrTab(sc.ch)) {
 			styler.IndicatorFill(startIndicator, sc.currentPos, indicatorWhitespace, 1);
@@ -583,7 +632,9 @@ void SCI_METHOD LexerPython::Lex(Sci_PositionU startPos, Sci_Position length, in
 
 		// Check for a new state starting character
 		if (sc.state == SCE_P_DEFAULT) {
-			if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
+			if (fstringStateStack[0] != 0 && sc.ch == '}') {
+				sc.SetState(PopFromStateStack(fstringStateStack, ELEMENTS(fstringStateStack)));
+			} else if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
 				if (sc.ch == '0' && (sc.chNext == 'x' || sc.chNext == 'X')) {
 					base_n_number = true;
 					sc.SetState(SCE_P_NUMBER);
author	John Ehresman <unknown>	2017-02-12 09:15:08 +1100
committer	John Ehresman <unknown>	2017-02-12 09:15:08 +1100
commit	c0df83f637c42982662aaa802f691abb3d529a2e (patch)
tree	73c32bc0eaf0c8ad54cc4d19cddb160fd34d2ea0 /lexers/LexPython.cxx
parent	5bd32babfe8721de32026d78378848f90fe6d68c (diff)
download	scintilla-mirror-c0df83f637c42982662aaa802f691abb3d529a2e.tar.gz