diff options
author | Neil <nyamatongwe@gmail.com> | 2014-05-12 17:49:28 +1000 |
---|---|---|
committer | Neil <nyamatongwe@gmail.com> | 2014-05-12 17:49:28 +1000 |
commit | 909aec98c3efdcc2499c50a387fe4c025064ec89 (patch) | |
tree | cfa3c898e0698e58131b015fbabb1c6b95590c84 | |
parent | 0c0ef91799e521f2aecd0e680135478fcf3af4f9 (diff) | |
download | scintilla-mirror-909aec98c3efdcc2499c50a387fe4c025064ec89.tar.gz |
Feature [feature-requests:#1051]. Support #if defined SYMBOL.
Previously required braces around like #if defined(SYMBOL).
Also understand macros with arguments such as version checking macros like
#if GTK_CHECK_VERSION(2,22,0)
-rw-r--r-- | lexers/LexCPP.cxx | 346 |
1 files changed, 252 insertions, 94 deletions
diff --git a/lexers/LexCPP.cxx b/lexers/LexCPP.cxx index fbc2a4578..fbe8748ca 100644 --- a/lexers/LexCPP.cxx +++ b/lexers/LexCPP.cxx @@ -86,6 +86,60 @@ bool followsReturnKeyword(StyleContext &sc, LexAccessor &styler) { return !*s; } +bool IsSpaceOrTab(int ch) { + return ch == ' ' || ch == '\t'; +} + +bool OnlySpaceOrTab(const std::string &s) { + for (std::string::const_iterator it = s.begin(); it != s.end(); ++it) { + if (!IsSpaceOrTab(*it)) + return false; + } + return true; +} + +std::vector<std::string> StringSplit(const std::string &text, int separator) { + std::vector<std::string> vs(text.empty() ? 0 : 1); + for (std::string::const_iterator it = text.begin(); it != text.end(); ++it) { + if (*it == separator) { + vs.push_back(std::string()); + } else { + vs.back() += *it; + } + } + return vs; +} + +struct BracketPair { + std::vector<std::string>::iterator itBracket; + std::vector<std::string>::iterator itEndBracket; +}; + +BracketPair FindBracketPair(std::vector<std::string> &tokens) { + BracketPair bp; + std::vector<std::string>::iterator itTok = std::find(tokens.begin(), tokens.end(), "("); + bp.itBracket = tokens.end(); + bp.itEndBracket = tokens.end(); + if (itTok != tokens.end()) { + bp.itBracket = itTok; + size_t nest = 0; + while (itTok != tokens.end()) { + if (*itTok == "(") { + nest++; + } else if (*itTok == ")") { + nest--; + if (nest == 0) { + bp.itEndBracket = itTok; + return bp; + } + } + ++itTok; + } + } + bp.itBracket = tokens.end(); + return bp; +} + void highlightTaskMarker(StyleContext &sc, LexAccessor &styler, int activity, WordList &markerList, bool caseSensitive){ if ((isoperator(sc.chPrev) || IsASpace(sc.chPrev)) && markerList.Length()) { @@ -169,33 +223,14 @@ bool IsStreamCommentStyle(int style) { style == SCE_C_COMMENTDOCKEYWORDERROR; } -std::vector<std::string> Tokenize(const std::string &s) { - // Break into space separated tokens - std::string word; - std::vector<std::string> tokens; - for (const char *cp = s.c_str(); *cp; cp++) { - if ((*cp == ' ') || (*cp == '\t')) { - if (!word.empty()) { - tokens.push_back(word); - word = ""; - } - } else { - word += *cp; - } - } - if (!word.empty()) { - tokens.push_back(word); - } - return tokens; -} - struct PPDefinition { int line; std::string key; std::string value; bool isUndef; - PPDefinition(int line_, const std::string &key_, const std::string &value_, bool isUndef_ = false) : - line(line_), key(key_), value(value_), isUndef(isUndef_) { + std::string arguments; + PPDefinition(int line_, const std::string &key_, const std::string &value_, bool isUndef_ = false, std::string arguments_="") : + line(line_), key(key_), value(value_), isUndef(isUndef_), arguments(arguments_) { } }; @@ -396,6 +431,7 @@ class LexerCPP : public ILexerWithSubStyles { CharacterSet setArithmethicOp; CharacterSet setRelOp; CharacterSet setLogicalOp; + CharacterSet setWordStart; PPStates vlls; std::vector<PPDefinition> ppDefineHistory; WordList keywords; @@ -404,7 +440,22 @@ class LexerCPP : public ILexerWithSubStyles { WordList keywords4; WordList ppDefinitions; WordList markerList; - std::map<std::string, std::string> preprocessorDefinitionsStart; + struct SymbolValue { + std::string value; + std::string arguments; + SymbolValue(const std::string &value_="", const std::string &arguments_="") : value(value_), arguments(arguments_) { + } + SymbolValue &operator = (const std::string &value_) { + value = value_; + arguments.clear(); + return *this; + } + bool IsMacro() const { + return !arguments.empty(); + } + }; + typedef std::map<std::string, SymbolValue> SymbolTable; + SymbolTable preprocessorDefinitionsStart; OptionsCPP options; OptionSetCPP osCPP; EscapeSequence escapeSeq; @@ -494,8 +545,9 @@ public: static int MaskActive(int style) { return style & ~activeFlag; } - void EvaluateTokens(std::vector<std::string> &tokens); - bool EvaluateExpression(const std::string &expr, const std::map<std::string, std::string> &preprocessorDefinitions); + void EvaluateTokens(std::vector<std::string> &tokens, const SymbolTable &preprocessorDefinitions); + std::vector<std::string> Tokenize(const std::string &expr) const; + bool EvaluateExpression(const std::string &expr, const SymbolTable &preprocessorDefinitions); }; int SCI_METHOD LexerCPP::PropertySet(const char *key, const char *val) { @@ -549,7 +601,16 @@ int SCI_METHOD LexerCPP::WordListSet(int n, const char *wl) { if (cpEquals) { std::string name(cpDefinition, cpEquals - cpDefinition); std::string val(cpEquals+1); - preprocessorDefinitionsStart[name] = val; + size_t bracket = name.find('('); + size_t bracketEnd = name.find(')'); + if ((bracket != std::string::npos) && (bracketEnd != std::string::npos)) { + // Macro + std::string args = name.substr(bracket + 1, bracketEnd - bracket - 1); + name = name.substr(0, bracket); + preprocessorDefinitionsStart[name] = SymbolValue(val, args); + } else { + preprocessorDefinitionsStart[name] = val; + } } else { std::string name(cpDefinition); std::string val("1"); @@ -579,7 +640,7 @@ void SCI_METHOD LexerCPP::Lex(unsigned int startPos, int length, int initStyle, CharacterSet setDoxygen(CharacterSet::setAlpha, "$@\\&<>#{}[]"); - CharacterSet setWordStart(CharacterSet::setAlpha, "_", 0x80, true); + setWordStart = CharacterSet(CharacterSet::setAlpha, "_", 0x80, true); CharacterSet setInvalidRawFirst(CharacterSet::setNone, " )\\\t\v\f\n"); @@ -637,12 +698,12 @@ void SCI_METHOD LexerCPP::Lex(unsigned int startPos, int length, int initStyle, definitionsChanged = true; } - std::map<std::string, std::string> preprocessorDefinitions = preprocessorDefinitionsStart; + SymbolTable preprocessorDefinitions = preprocessorDefinitionsStart; for (std::vector<PPDefinition>::iterator itDef = ppDefineHistory.begin(); itDef != ppDefineHistory.end(); ++itDef) { if (itDef->isUndef) preprocessorDefinitions.erase(itDef->key); else - preprocessorDefinitions[itDef->key] = itDef->value; + preprocessorDefinitions[itDef->key] = SymbolValue(itDef->value, itDef->arguments); } std::string rawStringTerminator = rawStringTerminators.ValueAt(lineCurrent-1); @@ -1135,19 +1196,35 @@ void SCI_METHOD LexerCPP::Lex(unsigned int startPos, int length, int initStyle, } else if (sc.Match("define")) { if (options.updatePreprocessor && !preproc.IsInactive()) { std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 6, true); - if (restOfLine.find(")") == std::string::npos) { // Don't handle macros with arguments - std::vector<std::string> tokens = Tokenize(restOfLine); - std::string key; - std::string value("1"); - if (tokens.size() >= 1) { - key = tokens[0]; - if (tokens.size() >= 2) { - value = tokens[1]; - } - preprocessorDefinitions[key] = value; - ppDefineHistory.push_back(PPDefinition(lineCurrent, key, value)); - definitionsChanged = true; - } + size_t startName = 0; + while ((startName < restOfLine.length()) && IsSpaceOrTab(restOfLine[startName])) + startName++; + size_t endName = startName; + while ((endName < restOfLine.length()) && setWord.Contains(static_cast<unsigned char>(restOfLine[endName]))) + endName++; + std::string key = restOfLine.substr(startName, endName-startName); + if (restOfLine[endName] == '(') { + // Macro + size_t endArgs = endName; + while ((endArgs < restOfLine.length()) && (restOfLine[endArgs] != ')')) + endArgs++; + std::string args = restOfLine.substr(endName + 1, endArgs - endName - 1); + size_t startValue = endArgs+1; + while ((startValue < restOfLine.length()) && IsSpaceOrTab(restOfLine[startValue])) + startValue++; + std::string value = restOfLine.substr(startValue); + preprocessorDefinitions[key] = SymbolValue(value, args); + ppDefineHistory.push_back(PPDefinition(lineCurrent, key, value, false, args)); + definitionsChanged = true; + } else { + // Value + size_t startValue = endName; + while ((startValue < restOfLine.length()) && IsSpaceOrTab(restOfLine[startValue])) + startValue++; + std::string value = restOfLine.substr(startValue); + preprocessorDefinitions[key] = value; + ppDefineHistory.push_back(PPDefinition(lineCurrent, key, value)); + definitionsChanged = true; } } } else if (sc.Match("undef")) { @@ -1297,19 +1374,36 @@ void SCI_METHOD LexerCPP::Fold(unsigned int startPos, int length, int initStyle, } } -void LexerCPP::EvaluateTokens(std::vector<std::string> &tokens) { +void LexerCPP::EvaluateTokens(std::vector<std::string> &tokens, const SymbolTable &preprocessorDefinitions) { + + // Remove whitespace tokens + tokens.erase(std::remove_if(tokens.begin(), tokens.end(), OnlySpaceOrTab), tokens.end()); - // Evaluate defined() statements to either 0 or 1 - for (size_t i=0; (i+2)<tokens.size();) { - if ((tokens[i] == "defined") && (tokens[i+1] == "(")) { + // Evaluate defined statements to either 0 or 1 + for (size_t i=0; (i+1)<tokens.size();) { + if (tokens[i] == "defined") { const char *val = "0"; - if (tokens[i+2] == ")") { - // defined() - tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 3); - } else if (((i+3)<tokens.size()) && (tokens[i+3] == ")")) { - // defined(<int>) - tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 4); - val = "1"; + if (tokens[i+1] == "(") { + if (((i + 2)<tokens.size()) && (tokens[i + 2] == ")")) { + // defined() + tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 3); + } else if (((i+3)<tokens.size()) && (tokens[i+3] == ")")) { + // defined(<identifier>) + SymbolTable::const_iterator it = preprocessorDefinitions.find(tokens[i+2]); + if (it != preprocessorDefinitions.end()) { + val = "1"; + } + tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 4); + } else { + // Spurious '(' so erase as more likely to result in false + tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 2); + } + } else { + // defined <identifier> + SymbolTable::const_iterator it = preprocessorDefinitions.find(tokens[i+1]); + if (it != preprocessorDefinitions.end()) { + val = "1"; + } } tokens[i] = val; } else { @@ -1317,21 +1411,79 @@ void LexerCPP::EvaluateTokens(std::vector<std::string> &tokens) { } } + // Evaluate identifiers + for (size_t i=0; i<tokens.size();) { + if (setWordStart.Contains(static_cast<unsigned char>(tokens[i][0]))) { + SymbolTable::const_iterator it = preprocessorDefinitions.find(tokens[i]); + if (it != preprocessorDefinitions.end()) { + // Tokenize value + std::vector<std::string> macroTokens = Tokenize(it->second.value); + if (it->second.IsMacro()) { + if ((i + 1 < tokens.size()) && (tokens.at(i + 1) == "(")) { + // Create map of argument name to value + std::vector<std::string> argumentNames = StringSplit(it->second.arguments, ','); + std::map<std::string, std::string> arguments; + size_t arg = 0; + size_t tok = i+2; + while ((tok < tokens.size()) && (arg < argumentNames.size()) && (tokens.at(tok) != ")")) { + if (tokens.at(tok) != ",") { + arguments[argumentNames.at(arg)] = tokens.at(tok); + arg++; + } + tok++; + } + + // Remove invocation + tokens.erase(tokens.begin() + i, tokens.begin() + tok + 1); + + // Substitute values into macro + std::vector<std::string> macroTokens = Tokenize(it->second.value); + macroTokens.erase(std::remove_if(macroTokens.begin(), macroTokens.end(), OnlySpaceOrTab), macroTokens.end()); + + for (size_t iMacro = 0; iMacro < macroTokens.size();) { + if (setWordStart.Contains(static_cast<unsigned char>(macroTokens[iMacro][0]))) { + std::map<std::string, std::string>::const_iterator itFind = arguments.find(macroTokens[iMacro]); + if (itFind != arguments.end()) { + macroTokens[iMacro] = itFind->second; + } + } + iMacro++; + } + + // Insert results back into tokens + tokens.insert(tokens.begin() + i, macroTokens.begin(), macroTokens.end()); + + } else { + i++; + } + } else { + // Remove invocation + tokens.erase(tokens.begin() + i); + // Insert results back into tokens + tokens.insert(tokens.begin() + i, macroTokens.begin(), macroTokens.end()); + } + } else { + // Identifier not found + tokens.erase(tokens.begin() + i); + } + } else { + i++; + } + } + // Find bracketed subexpressions and recurse on them - std::vector<std::string>::iterator itBracket = std::find(tokens.begin(), tokens.end(), "("); - std::vector<std::string>::iterator itEndBracket = std::find(tokens.begin(), tokens.end(), ")"); - while ((itBracket != tokens.end()) && (itEndBracket != tokens.end()) && (itEndBracket > itBracket)) { - std::vector<std::string> inBracket(itBracket + 1, itEndBracket); - EvaluateTokens(inBracket); + BracketPair bracketPair = FindBracketPair(tokens); + while (bracketPair.itBracket != tokens.end()) { + std::vector<std::string> inBracket(bracketPair.itBracket + 1, bracketPair.itEndBracket); + EvaluateTokens(inBracket, preprocessorDefinitions); // The insertion is done before the removal because there were failures with the opposite approach - tokens.insert(itBracket, inBracket.begin(), inBracket.end()); - itBracket = std::find(tokens.begin(), tokens.end(), "("); - itEndBracket = std::find(tokens.begin(), tokens.end(), ")"); - tokens.erase(itBracket, itEndBracket + 1); + tokens.insert(bracketPair.itBracket, inBracket.begin(), inBracket.end()); - itBracket = std::find(tokens.begin(), tokens.end(), "("); - itEndBracket = std::find(tokens.begin(), tokens.end(), ")"); + bracketPair = FindBracketPair(tokens); + tokens.erase(bracketPair.itBracket, bracketPair.itEndBracket + 1); + + bracketPair = FindBracketPair(tokens); } // Evaluate logical negations @@ -1400,45 +1552,51 @@ void LexerCPP::EvaluateTokens(std::vector<std::string> &tokens) { } } -bool LexerCPP::EvaluateExpression(const std::string &expr, const std::map<std::string, std::string> &preprocessorDefinitions) { - // Break into tokens, replacing with definitions - std::string word; +std::vector<std::string> LexerCPP::Tokenize(const std::string &expr) const { + // Break into tokens std::vector<std::string> tokens; const char *cp = expr.c_str(); - for (;;) { + while (*cp) { + std::string word; if (setWord.Contains(static_cast<unsigned char>(*cp))) { - word += *cp; - } else { - std::map<std::string, std::string>::const_iterator it = preprocessorDefinitions.find(word); - if (it != preprocessorDefinitions.end()) { - tokens.push_back(it->second); - } else if (!word.empty() && ((word[0] >= '0' && word[0] <= '9') || (word == "defined"))) { - tokens.push_back(word); + // Identifiers and numbers + while (setWord.Contains(static_cast<unsigned char>(*cp))) { + word += *cp; + cp++; } - word = ""; - if (!*cp) { - break; + } else if (IsSpaceOrTab(*cp)) { + while (IsSpaceOrTab(*cp)) { + word += *cp; + cp++; } - if ((*cp != ' ') && (*cp != '\t')) { - std::string op(cp, 1); - if (setRelOp.Contains(static_cast<unsigned char>(*cp))) { - if (setRelOp.Contains(static_cast<unsigned char>(cp[1]))) { - op += cp[1]; - cp++; - } - } else if (setLogicalOp.Contains(static_cast<unsigned char>(*cp))) { - if (setLogicalOp.Contains(static_cast<unsigned char>(cp[1]))) { - op += cp[1]; - cp++; - } - } - tokens.push_back(op); + } else if (setRelOp.Contains(static_cast<unsigned char>(*cp))) { + word += *cp; + cp++; + if (setRelOp.Contains(static_cast<unsigned char>(*cp))) { + word += *cp; + cp++; } + } else if (setLogicalOp.Contains(static_cast<unsigned char>(*cp))) { + word += *cp; + cp++; + if (setLogicalOp.Contains(static_cast<unsigned char>(*cp))) { + word += *cp; + cp++; + } + } else { + // Should handle strings, characters, and comments here + word += *cp; + cp++; } - cp++; + tokens.push_back(word); } + return tokens; +} + +bool LexerCPP::EvaluateExpression(const std::string &expr, const SymbolTable &preprocessorDefinitions) { + std::vector<std::string> tokens = Tokenize(expr); - EvaluateTokens(tokens); + EvaluateTokens(tokens, preprocessorDefinitions); // "0" or "" -> false else true bool isFalse = tokens.empty() || |