aboutsummaryrefslogtreecommitdiffhomepage
path: root/lexers/LexPython.cxx
diff options
context:
space:
mode:
authorJohn Ehresman <unknown>2017-03-08 13:41:56 +1100
committerJohn Ehresman <unknown>2017-03-08 13:41:56 +1100
commitf498a000ab363cba7c2987cb6f6e596601446fa4 (patch)
tree699501d49f0e6f64e80234dc0f87f41d0843f52f /lexers/LexPython.cxx
parent4a0fff2699da348228ff22b134f4149bc02ceb9a (diff)
downloadscintilla-mirror-f498a000ab363cba7c2987cb6f6e596601446fa4.tar.gz
Improve f-string support.
Add support for multiline expressions in triple quoted f-strings. Handle nested "()", "[]", and "{}" in f-string expressions and terminate expression coloring at ":" or "!". End f-string if ending quote is seen in a "{}" expression. Fix terminating single quoted f-string at EOL.
Diffstat (limited to 'lexers/LexPython.cxx')
-rw-r--r--lexers/LexPython.cxx177
1 files changed, 145 insertions, 32 deletions
diff --git a/lexers/LexPython.cxx b/lexers/LexPython.cxx
index 5b27cac29..a159528ae 100644
--- a/lexers/LexPython.cxx
+++ b/lexers/LexPython.cxx
@@ -38,6 +38,28 @@ using namespace Scintilla;
namespace {
// Use an unnamed namespace to protect the functions and classes from name conflicts
+/* Notes on f-strings: f-strings are strings prefixed with f (e.g. f'') that may
+ have arbitrary expressions in {}. The tokens in the expressions are lexed as if
+ they were outside of any string. Expressions may contain { and } characters as
+ long as there is a closing } for every {, may be 2+ lines in a triple quoted
+ string, and may have a formatting specifier following a ! or :, but both !
+ and : are valid inside of a bracketed expression and != is a valid
+ expression token even outside of a bracketed expression.
+
+ When in an f-string expression, the lexer keeps track of the state value of
+ the f-string and the nesting count for the expression (# of [, (, { seen - # of
+ }, ), ] seen). f-strings may be nested (e.g. f'{ a + f"{1+2}"') so a stack of
+ states and nesting counts is kept. If a f-string expression continues beyond
+ the end of a line, this stack is saved in a std::map that maps a line number to
+ the stack at the end of that line. std::vector is used for the stack.
+
+ The PEP for f-strings is at https://www.python.org/dev/peps/pep-0498/
+*/
+struct SingleFStringExpState {
+ int state;
+ int nestingCount;
+};
+
/* kwCDef, kwCTypeName only used for Cython */
enum kwType { kwOther, kwClass, kwDef, kwImport, kwCDef, kwCTypeName, kwCPDef };
@@ -86,20 +108,40 @@ bool IsPyTripleQuoteStringState(int st) {
(st == SCE_P_FTRIPLE) || (st == SCE_P_FTRIPLEDOUBLE));
}
-void PushStateToStack(int state, int *stack, int stackSize) {
- for (int i = stackSize-1; i > 0; i--) {
- stack[i] = stack[i-1];
- }
- stack[0] = state;
+char GetPyStringQuoteChar(int st) {
+ if ((st == SCE_P_CHARACTER) || (st == SCE_P_FCHARACTER) ||
+ (st == SCE_P_TRIPLE) || (st == SCE_P_FTRIPLE))
+ return '\'';
+ if ((st == SCE_P_STRING) || (st == SCE_P_FSTRING) ||
+ (st == SCE_P_TRIPLEDOUBLE) || (st == SCE_P_FTRIPLEDOUBLE))
+ return '"';
+
+ return '\0';
+}
+
+void PushStateToStack(int state, std::vector<SingleFStringExpState>& stack, SingleFStringExpState*& currentFStringExp) {
+ SingleFStringExpState single = {state, 0};
+ stack.push_back(single);
+
+ currentFStringExp = &stack.back();
}
-int PopFromStateStack(int *stack, int stackSize) {
- int top = stack[0];
- for (int i = 0; i < stackSize - 1; i++) {
- stack[i] = stack[i+1];
+int PopFromStateStack(std::vector<SingleFStringExpState>& stack, SingleFStringExpState*& currentFStringExp) {
+ int state = 0;
+
+ if (!stack.empty()) {
+ state = stack.back().state;
+ stack.pop_back();
+ }
+
+ if (stack.empty()) {
+ currentFStringExp = NULL;
+ }
+ else {
+ currentFStringExp = &stack.back();
}
- stack[stackSize-1] = 0;
- return top;
+
+ return state;
}
/* Return the state to use for the string starting at i; *nextIndex will be set to the first index following the quote(s) */
@@ -282,6 +324,7 @@ class LexerPython : public ILexerWithSubStyles {
OptionSetPython osPython;
enum { ssIdentifier };
SubStyles subStyles;
+ std::map<int, std::vector<SingleFStringExpState> > ftripleStateAtEol;
public:
explicit LexerPython() :
subStyles(styleSubable, 0x80, 0x40, 0) {
@@ -353,7 +396,7 @@ public:
}
private:
- void ProcessLineEnd(StyleContext &sc, int *fstringStateStack, bool &inContinuedString) const;
+ void ProcessLineEnd(StyleContext &sc, std::vector<SingleFStringExpState>& fstringStateStack, SingleFStringExpState*& currentFStringExp, bool &inContinuedString);
};
Sci_Position SCI_METHOD LexerPython::PropertySet(const char *key, const char *val) {
@@ -385,11 +428,30 @@ Sci_Position SCI_METHOD LexerPython::WordListSet(int n, const char *wl) {
return firstModification;
}
-void LexerPython::ProcessLineEnd(StyleContext &sc, int *fstringStateStack, bool &inContinuedString) const {
- // Restore to to outermost string state if in an f-string expression and
- // let code below decide what to do
- while (fstringStateStack[0] != 0) {
- sc.SetState(PopFromStateStack(fstringStateStack, 4));
+void LexerPython::ProcessLineEnd(StyleContext &sc, std::vector<SingleFStringExpState>& fstringStateStack, SingleFStringExpState*& currentFStringExp, bool &inContinuedString) {
+ long deepestSingleStateIndex = -1;
+ unsigned long i;
+
+ // Find the deepest single quote state because that string will end; no \ continuation in f-string
+ for (i = 0; i < fstringStateStack.size(); i++) {
+ if (IsPySingleQuoteStringState(fstringStateStack[i].state)) {
+ deepestSingleStateIndex = i;
+ break;
+ }
+ }
+
+ if (deepestSingleStateIndex != -1) {
+ sc.SetState(fstringStateStack[deepestSingleStateIndex].state);
+ while (fstringStateStack.size() > (unsigned long)deepestSingleStateIndex) {
+ PopFromStateStack(fstringStateStack, currentFStringExp);
+ }
+ }
+ if (!fstringStateStack.empty()) {
+ std::pair<int, std::vector<SingleFStringExpState> > val;
+ val.first = sc.currentLine;
+ val.second = fstringStateStack;
+
+ ftripleStateAtEol.insert(val);
}
if ((sc.state == SCE_P_DEFAULT)
@@ -411,9 +473,11 @@ void LexerPython::ProcessLineEnd(StyleContext &sc, int *fstringStateStack, bool
void SCI_METHOD LexerPython::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
Accessor styler(pAccess, NULL);
- // Track whether in f-string expression; an array is used for a stack to
+ // Track whether in f-string expression; vector is used for a stack to
// handle nested f-strings such as f"""{f'''{f"{f'{1}'}"}'''}"""
- int fstringStateStack[4] = { 0, };
+ std::vector<SingleFStringExpState> fstringStateStack;
+ SingleFStringExpState* currentFStringExp = NULL;
+
const Sci_Position endPos = startPos + length;
// Backtrack to previous line in case need to fix its tab whinging
@@ -445,6 +509,17 @@ void SCI_METHOD LexerPython::Lex(Sci_PositionU startPos, Sci_Position length, in
initStyle = SCE_P_DEFAULT;
}
+ // Set up fstate stack from last line and remove any subsequent ftriple at eol states
+ std::map<int, std::vector<SingleFStringExpState> >::iterator it;
+ it = ftripleStateAtEol.find(lineCurrent - 1);
+ if (it != ftripleStateAtEol.end()) {
+ fstringStateStack = it->second;
+ }
+ it = ftripleStateAtEol.lower_bound(lineCurrent);
+ if (it != ftripleStateAtEol.end()) {
+ ftripleStateAtEol.erase(it, ftripleStateAtEol.end());
+ }
+
kwType kwLast = kwOther;
int spaceFlags = 0;
styler.IndentAmount(lineCurrent, &spaceFlags, IsPyComment);
@@ -479,7 +554,7 @@ void SCI_METHOD LexerPython::Lex(Sci_PositionU startPos, Sci_Position length, in
}
if (sc.atLineEnd) {
- ProcessLineEnd(sc, fstringStateStack, inContinuedString);
+ ProcessLineEnd(sc, fstringStateStack, currentFStringExp, inContinuedString);
lineCurrent++;
if (!sc.More())
break;
@@ -583,12 +658,7 @@ void SCI_METHOD LexerPython::Lex(Sci_PositionU startPos, Sci_Position length, in
// Don't roll over the newline.
sc.Forward();
}
- } else if (((sc.state == SCE_P_STRING || sc.state == SCE_P_FSTRING))
- && (sc.ch == '\"')) {
- sc.ForwardSetState(SCE_P_DEFAULT);
- needEOLCheck = true;
- } else if (((sc.state == SCE_P_CHARACTER) || (sc.state == SCE_P_FCHARACTER))
- && (sc.ch == '\'')) {
+ } else if (sc.ch == GetPyStringQuoteChar(sc.state)) {
sc.ForwardSetState(SCE_P_DEFAULT);
needEOLCheck = true;
}
@@ -611,18 +681,51 @@ void SCI_METHOD LexerPython::Lex(Sci_PositionU startPos, Sci_Position length, in
needEOLCheck = true;
}
}
-
+
// Note if used and not if else because string states also match
// some of the above clauses
if (IsPyFStringState(sc.state) && sc.ch == '{') {
if (sc.chNext == '{') {
sc.Forward();
} else {
- PushStateToStack(sc.state, fstringStateStack, ELEMENTS(fstringStateStack));
+ PushStateToStack(sc.state, fstringStateStack, currentFStringExp);
sc.ForwardSetState(SCE_P_DEFAULT);
}
needEOLCheck = true;
}
+
+ // If in an f-string expression, check for the ending quote(s)
+ // and end f-string to handle syntactically incorrect cases like
+ // f'{' and f"""{"""
+ if (!fstringStateStack.empty() && (sc.ch == '\'' || sc.ch == '"')) {
+ long matching_stack_i = -1;
+ for (unsigned long stack_i = 0; stack_i < fstringStateStack.size() && matching_stack_i == -1; stack_i++) {
+ int stack_state = fstringStateStack[stack_i].state;
+ char quote = GetPyStringQuoteChar(stack_state);
+ if (sc.ch == quote) {
+ if (IsPySingleQuoteStringState(stack_state)) {
+ matching_stack_i = stack_i;
+ }
+ else if (quote == '"' ? sc.Match("\"\"\"") : sc.Match("'''")) {
+ matching_stack_i = stack_i;
+ }
+ }
+ }
+
+ if (matching_stack_i != -1) {
+ sc.SetState(fstringStateStack[matching_stack_i].state);
+ if (IsPyTripleQuoteStringState(fstringStateStack[matching_stack_i].state)) {
+ sc.Forward();
+ sc.Forward();
+ }
+ sc.ForwardSetState(SCE_P_DEFAULT);
+ needEOLCheck = true;
+
+ while (fstringStateStack.size() > (unsigned long)matching_stack_i) {
+ PopFromStateStack(fstringStateStack, currentFStringExp);
+ }
+ }
+ }
// End of code to find the end of a state
if (!indentGood && !IsASpaceOrTab(sc.ch)) {
@@ -638,16 +741,26 @@ void SCI_METHOD LexerPython::Lex(Sci_PositionU startPos, Sci_Position length, in
// State exit code may have moved on to end of line
if (needEOLCheck && sc.atLineEnd) {
- ProcessLineEnd(sc, fstringStateStack, inContinuedString);
+ ProcessLineEnd(sc, fstringStateStack, currentFStringExp, inContinuedString);
lineCurrent++;
styler.IndentAmount(lineCurrent, &spaceFlags, IsPyComment);
if (!sc.More())
break;
}
- // If in f-string expression, check for } to resume f-string state
- if (fstringStateStack[0] != 0 && sc.ch == '}') {
- sc.SetState(PopFromStateStack(fstringStateStack, ELEMENTS(fstringStateStack)));
+ // If in f-string expression, check for }, :, ! to resume f-string state or update nesting count
+ if (currentFStringExp != NULL && !IsPySingleQuoteStringState(sc.state) && !IsPyTripleQuoteStringState(sc.state)) {
+ if (currentFStringExp->nestingCount == 0 && (sc.ch == '}' || sc.ch == ':' || (sc.ch == '!' && sc.chNext != '='))) {
+ sc.SetState(PopFromStateStack(fstringStateStack, currentFStringExp));
+ }
+ else {
+ if (sc.ch == '{' || sc.ch == '[' || sc.ch == '(') {
+ currentFStringExp->nestingCount++;
+ }
+ else if (sc.ch == '}' || sc.ch == ']' || sc.ch == ')') {
+ currentFStringExp->nestingCount--;
+ }
+ }
}
// Check for a new state starting character