aboutsummaryrefslogtreecommitdiffhomepage
path: root/lexlib/WordList.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'lexlib/WordList.cxx')
-rw-r--r--lexlib/WordList.cxx315
1 files changed, 0 insertions, 315 deletions
diff --git a/lexlib/WordList.cxx b/lexlib/WordList.cxx
deleted file mode 100644
index 460995daa..000000000
--- a/lexlib/WordList.cxx
+++ /dev/null
@@ -1,315 +0,0 @@
-// Scintilla source code edit control
-/** @file WordList.cxx
- ** Hold a list of words.
- **/
-// Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
-// The License.txt file describes the conditions under which this software may be distributed.
-
-#include <cstdlib>
-#include <cassert>
-#include <cstring>
-
-#include <algorithm>
-#include <iterator>
-
-#include "WordList.h"
-
-using namespace Scintilla;
-
-/**
- * Creates an array that points into each word in the string and puts \0 terminators
- * after each word.
- */
-static char **ArrayFromWordList(char *wordlist, size_t slen, int *len, bool onlyLineEnds = false) {
- int prev = '\n';
- int words = 0;
- // For rapid determination of whether a character is a separator, build
- // a look up table.
- bool wordSeparator[256] = {}; // Initialise all to false.
- wordSeparator[static_cast<unsigned int>('\r')] = true;
- wordSeparator[static_cast<unsigned int>('\n')] = true;
- if (!onlyLineEnds) {
- wordSeparator[static_cast<unsigned int>(' ')] = true;
- wordSeparator[static_cast<unsigned int>('\t')] = true;
- }
- for (int j = 0; wordlist[j]; j++) {
- const int curr = static_cast<unsigned char>(wordlist[j]);
- if (!wordSeparator[curr] && wordSeparator[prev])
- words++;
- prev = curr;
- }
- char **keywords = new char *[words + 1];
- int wordsStore = 0;
- if (words) {
- prev = '\0';
- for (size_t k = 0; k < slen; k++) {
- if (!wordSeparator[static_cast<unsigned char>(wordlist[k])]) {
- if (!prev) {
- keywords[wordsStore] = &wordlist[k];
- wordsStore++;
- }
- } else {
- wordlist[k] = '\0';
- }
- prev = wordlist[k];
- }
- }
- assert(wordsStore < (words + 1));
- keywords[wordsStore] = &wordlist[slen];
- *len = wordsStore;
- return keywords;
-}
-
-WordList::WordList(bool onlyLineEnds_) :
- words(0), list(0), len(0), onlyLineEnds(onlyLineEnds_) {
- // Prevent warnings by static analyzers about uninitialized starts.
- starts[0] = -1;
-}
-
-WordList::~WordList() {
- Clear();
-}
-
-WordList::operator bool() const noexcept {
- return len ? true : false;
-}
-
-bool WordList::operator!=(const WordList &other) const noexcept {
- if (len != other.len)
- return true;
- for (int i=0; i<len; i++) {
- if (strcmp(words[i], other.words[i]) != 0)
- return true;
- }
- return false;
-}
-
-int WordList::Length() const noexcept {
- return len;
-}
-
-void WordList::Clear() noexcept {
- if (words) {
- delete []list;
- delete []words;
- }
- words = nullptr;
- list = nullptr;
- len = 0;
-}
-
-#ifdef _MSC_VER
-
-static bool cmpWords(const char *a, const char *b) {
- return strcmp(a, b) < 0;
-}
-
-#else
-
-static int cmpWords(const void *a, const void *b) {
- return strcmp(*static_cast<const char * const *>(a), *static_cast<const char * const *>(b));
-}
-
-static void SortWordList(char **words, unsigned int len) {
- qsort(words, len, sizeof(*words), cmpWords);
-}
-
-#endif
-
-bool WordList::Set(const char *s) {
- const size_t lenS = strlen(s) + 1;
- char *listTemp = new char[lenS];
- memcpy(listTemp, s, lenS);
- int lenTemp = 0;
- char **wordsTemp = ArrayFromWordList(listTemp, lenS - 1, &lenTemp, onlyLineEnds);
-#ifdef _MSC_VER
- std::sort(wordsTemp, wordsTemp + lenTemp, cmpWords);
-#else
- SortWordList(wordsTemp, lenTemp);
-#endif
-
- if (lenTemp == len) {
- bool changed = false;
- for (int i = 0; i < lenTemp; i++) {
- if (strcmp(words[i], wordsTemp[i]) != 0) {
- changed = true;
- break;
- }
- }
- if (!changed) {
- delete []listTemp;
- delete []wordsTemp;
- return false;
- }
- }
-
- Clear();
- words = wordsTemp;
- list = listTemp;
- len = lenTemp;
- std::fill(starts, std::end(starts), -1);
- for (int l = len - 1; l >= 0; l--) {
- unsigned char indexChar = words[l][0];
- starts[indexChar] = l;
- }
- return true;
-}
-
-/** Check whether a string is in the list.
- * List elements are either exact matches or prefixes.
- * Prefix elements start with '^' and match all strings that start with the rest of the element
- * so '^GTK_' matches 'GTK_X', 'GTK_MAJOR_VERSION', and 'GTK_'.
- */
-bool WordList::InList(const char *s) const noexcept {
- if (0 == words)
- return false;
- const unsigned char firstChar = s[0];
- int j = starts[firstChar];
- if (j >= 0) {
- while (words[j][0] == firstChar) {
- if (s[1] == words[j][1]) {
- const char *a = words[j] + 1;
- const char *b = s + 1;
- while (*a && *a == *b) {
- a++;
- b++;
- }
- if (!*a && !*b)
- return true;
- }
- j++;
- }
- }
- j = starts[static_cast<unsigned int>('^')];
- if (j >= 0) {
- while (words[j][0] == '^') {
- const char *a = words[j] + 1;
- const char *b = s;
- while (*a && *a == *b) {
- a++;
- b++;
- }
- if (!*a)
- return true;
- j++;
- }
- }
- return false;
-}
-
-/** similar to InList, but word s can be a substring of keyword.
- * eg. the keyword define is defined as def~ine. This means the word must start
- * with def to be a keyword, but also defi, defin and define are valid.
- * The marker is ~ in this case.
- */
-bool WordList::InListAbbreviated(const char *s, const char marker) const noexcept {
- if (0 == words)
- return false;
- const unsigned char firstChar = s[0];
- int j = starts[firstChar];
- if (j >= 0) {
- while (words[j][0] == firstChar) {
- bool isSubword = false;
- int start = 1;
- if (words[j][1] == marker) {
- isSubword = true;
- start++;
- }
- if (s[1] == words[j][start]) {
- const char *a = words[j] + start;
- const char *b = s + 1;
- while (*a && *a == *b) {
- a++;
- if (*a == marker) {
- isSubword = true;
- a++;
- }
- b++;
- }
- if ((!*a || isSubword) && !*b)
- return true;
- }
- j++;
- }
- }
- j = starts[static_cast<unsigned int>('^')];
- if (j >= 0) {
- while (words[j][0] == '^') {
- const char *a = words[j] + 1;
- const char *b = s;
- while (*a && *a == *b) {
- a++;
- b++;
- }
- if (!*a)
- return true;
- j++;
- }
- }
- return false;
-}
-
-/** similar to InListAbbreviated, but word s can be a abridged version of a keyword.
-* eg. the keyword is defined as "after.~:". This means the word must have a prefix (begins with) of
-* "after." and suffix (ends with) of ":" to be a keyword, Hence "after.field:" , "after.form.item:" are valid.
-* Similarly "~.is.valid" keyword is suffix only... hence "field.is.valid" , "form.is.valid" are valid.
-* The marker is ~ in this case.
-* No multiple markers check is done and wont work.
-*/
-bool WordList::InListAbridged(const char *s, const char marker) const noexcept {
- if (0 == words)
- return false;
- const unsigned char firstChar = s[0];
- int j = starts[firstChar];
- if (j >= 0) {
- while (words[j][0] == firstChar) {
- const char *a = words[j];
- const char *b = s;
- while (*a && *a == *b) {
- a++;
- if (*a == marker) {
- a++;
- const size_t suffixLengthA = strlen(a);
- const size_t suffixLengthB = strlen(b);
- if (suffixLengthA >= suffixLengthB)
- break;
- b = b + suffixLengthB - suffixLengthA - 1;
- }
- b++;
- }
- if (!*a && !*b)
- return true;
- j++;
- }
- }
-
- j = starts[static_cast<unsigned int>(marker)];
- if (j >= 0) {
- while (words[j][0] == marker) {
- const char *a = words[j] + 1;
- const char *b = s;
- const size_t suffixLengthA = strlen(a);
- const size_t suffixLengthB = strlen(b);
- if (suffixLengthA > suffixLengthB) {
- j++;
- continue;
- }
- b = b + suffixLengthB - suffixLengthA;
-
- while (*a && *a == *b) {
- a++;
- b++;
- }
- if (!*a && !*b)
- return true;
- j++;
- }
- }
-
- return false;
-}
-
-const char *WordList::WordAt(int n) const noexcept {
- return words[n];
-}
-