diff options
Diffstat (limited to 'src/KeyWords.cxx')
-rw-r--r-- | src/KeyWords.cxx | 186 |
1 files changed, 186 insertions, 0 deletions
diff --git a/src/KeyWords.cxx b/src/KeyWords.cxx index 2e1e82882..366cb47dc 100644 --- a/src/KeyWords.cxx +++ b/src/KeyWords.cxx @@ -23,6 +23,192 @@ using namespace Scintilla; #endif +/** + * Creates an array that points into each word in the string and puts \0 terminators + * after each word. + */ +static char **ArrayFromWordList(char *wordlist, int *len, bool onlyLineEnds = false) { + int prev = '\n'; + int words = 0; + // For rapid determination of whether a character is a separator, build + // a look up table. + bool wordSeparator[256]; + for (int i=0;i<256; i++) { + wordSeparator[i] = false; + } + wordSeparator['\r'] = true; + wordSeparator['\n'] = true; + if (!onlyLineEnds) { + wordSeparator[' '] = true; + wordSeparator['\t'] = true; + } + for (int j = 0; wordlist[j]; j++) { + int curr = static_cast<unsigned char>(wordlist[j]); + if (!wordSeparator[curr] && wordSeparator[prev]) + words++; + prev = curr; + } + char **keywords = new char *[words + 1]; + if (keywords) { + words = 0; + prev = '\0'; + size_t slen = strlen(wordlist); + for (size_t k = 0; k < slen; k++) { + if (!wordSeparator[static_cast<unsigned char>(wordlist[k])]) { + if (!prev) { + keywords[words] = &wordlist[k]; + words++; + } + } else { + wordlist[k] = '\0'; + } + prev = wordlist[k]; + } + keywords[words] = &wordlist[slen]; + *len = words; + } else { + *len = 0; + } + return keywords; +} + +void WordList::Clear() { + if (words) { + delete []list; + delete []words; + } + words = 0; + list = 0; + len = 0; + sorted = false; +} + +void WordList::Set(const char *s) { + list = new char[strlen(s) + 1]; + strcpy(list, s); + sorted = false; + words = ArrayFromWordList(list, &len, onlyLineEnds); +} + +extern "C" int cmpString(const void *a1, const void *a2) { + // Can't work out the correct incantation to use modern casts here + return strcmp(*(char**)(a1), *(char**)(a2)); +} + +static void SortWordList(char **words, unsigned int len) { + qsort(reinterpret_cast<void*>(words), len, sizeof(*words), + cmpString); +} + +bool WordList::InList(const char *s) { + if (0 == words) + return false; + if (!sorted) { + sorted = true; + SortWordList(words, len); + for (unsigned int k = 0; k < (sizeof(starts) / sizeof(starts[0])); k++) + starts[k] = -1; + for (int l = len - 1; l >= 0; l--) { + unsigned char indexChar = words[l][0]; + starts[indexChar] = l; + } + } + unsigned char firstChar = s[0]; + int j = starts[firstChar]; + if (j >= 0) { + while ((unsigned char)words[j][0] == firstChar) { + if (s[1] == words[j][1]) { + const char *a = words[j] + 1; + const char *b = s + 1; + while (*a && *a == *b) { + a++; + b++; + } + if (!*a && !*b) + return true; + } + j++; + } + } + j = starts['^']; + if (j >= 0) { + while (words[j][0] == '^') { + const char *a = words[j] + 1; + const char *b = s; + while (*a && *a == *b) { + a++; + b++; + } + if (!*a) + return true; + j++; + } + } + return false; +} + +/** similar to InList, but word s can be a substring of keyword. + * eg. the keyword define is defined as def~ine. This means the word must start + * with def to be a keyword, but also defi, defin and define are valid. + * The marker is ~ in this case. + */ +bool WordList::InListAbbreviated(const char *s, const char marker) { + if (0 == words) + return false; + if (!sorted) { + sorted = true; + SortWordList(words, len); + for (unsigned int k = 0; k < (sizeof(starts) / sizeof(starts[0])); k++) + starts[k] = -1; + for (int l = len - 1; l >= 0; l--) { + unsigned char indexChar = words[l][0]; + starts[indexChar] = l; + } + } + unsigned char firstChar = s[0]; + int j = starts[firstChar]; + if (j >= 0) { + while (words[j][0] == firstChar) { + bool isSubword = false; + int start = 1; + if (words[j][1] == marker) { + isSubword = true; + start++; + } + if (s[1] == words[j][start]) { + const char *a = words[j] + start; + const char *b = s + 1; + while (*a && *a == *b) { + a++; + if (*a == marker) { + isSubword = true; + a++; + } + b++; + } + if ((!*a || isSubword) && !*b) + return true; + } + j++; + } + } + j = starts['^']; + if (j >= 0) { + while (words[j][0] == '^') { + const char *a = words[j] + 1; + const char *b = s; + while (*a && *a == *b) { + a++; + b++; + } + if (!*a) + return true; + j++; + } + } + return false; +} + const LexerModule *LexerModule::base = 0; int LexerModule::nextLanguage = SCLEX_AUTOMATIC+1; |