From 909fc02778bf5db5953a266a9aeff0291552b6cf Mon Sep 17 00:00:00 2001 From: oirfeodent Date: Wed, 7 Sep 2016 13:49:08 +1000 Subject: Add InListAbridged to WordList. --- doc/ScintillaHistory.html | 5 ++++ lexlib/WordList.cxx | 60 ++++++++++++++++++++++++++++++++++++++++++++++ lexlib/WordList.h | 1 + test/unit/testWordList.cxx | 22 +++++++++++++++++ 4 files changed, 88 insertions(+) diff --git a/doc/ScintillaHistory.html b/doc/ScintillaHistory.html index 75f6b04d8..f97f2b8f3 100644 --- a/doc/ScintillaHistory.html +++ b/doc/ScintillaHistory.html @@ -499,6 +499,7 @@ Alexey Denisov jedailey + oirfeodent

@@ -518,6 +519,10 @@ Released 4 September 2016.

  • + The WordList class in lexlib used by lexers adds an InListAbridged method for + matching keywords that have particular prefixes and/or suffixes. +
  • +
  • On Cocoa, include ILexer.h in the public headers of the framework. Bug #1855.
  • diff --git a/lexlib/WordList.cxx b/lexlib/WordList.cxx index 63d22338f..b8662916c 100644 --- a/lexlib/WordList.cxx +++ b/lexlib/WordList.cxx @@ -236,6 +236,66 @@ bool WordList::InListAbbreviated(const char *s, const char marker) const { return false; } +/** similar to InListAbbreviated, but word s can be a abridged version of a keyword. +* eg. the keyword is defined as "after.~:". This means the word must have a prefix (begins with) of +* "after." and suffix (ends with) of ":" to be a keyword, Hence "after.field:" , "after.form.item:" are valid. +* Similarly "~.is.valid" keyword is suffix only... hence "field.is.valid" , "form.is.valid" are valid. +* The marker is ~ in this case. +* No multiple markers check is done and wont work. +*/ +bool WordList::InListAbridged(const char *s, const char marker) const { + if (0 == words) + return false; + unsigned char firstChar = s[0]; + int j = starts[firstChar]; + if (j >= 0) { + while (static_cast(words[j][0]) == firstChar) { + const char *a = words[j]; + const char *b = s; + while (*a && *a == *b) { + a++; + if (*a == marker) { + a++; + const size_t suffixLengthA = strlen(a); + const size_t suffixLengthB = strlen(b); + if (suffixLengthA >= suffixLengthB) + break; + b = b + suffixLengthB - suffixLengthA - 1; + } + b++; + } + if (!*a && !*b) + return true; + j++; + } + } + + j = starts[static_cast(marker)]; + if (j >= 0) { + while (words[j][0] == marker) { + const char *a = words[j] + 1; + const char *b = s; + const size_t suffixLengthA = strlen(a); + const size_t suffixLengthB = strlen(b); + if (suffixLengthA > suffixLengthB) { + j++; + continue; + } + b = b + suffixLengthB - suffixLengthA; + + while (*a && *a == *b) { + a++; + b++; + } + if (!*a && !*b) + return true; + j++; + } + } + + return false; +} + const char *WordList::WordAt(int n) const { return words[n]; } diff --git a/lexlib/WordList.h b/lexlib/WordList.h index 382be2812..b1f8c85b2 100644 --- a/lexlib/WordList.h +++ b/lexlib/WordList.h @@ -31,6 +31,7 @@ public: void Set(const char *s); bool InList(const char *s) const; bool InListAbbreviated(const char *s, const char marker) const; + bool InListAbridged(const char *s, const char marker) const; const char *WordAt(int n) const; }; diff --git a/test/unit/testWordList.cxx b/test/unit/testWordList.cxx index a4ccf4d6a..e5874c01c 100644 --- a/test/unit/testWordList.cxx +++ b/test/unit/testWordList.cxx @@ -29,4 +29,26 @@ TEST_CASE("WordList") { REQUIRE(0 == strcmp(wl.WordAt(0), "else")); } + SECTION("InListAbridged") { + wl.Set("list w.~.active bo~k a~z ~_frozen"); + REQUIRE(wl.InListAbridged("list", '~')); + + REQUIRE(wl.InListAbridged("w.front.active", '~')); + REQUIRE(wl.InListAbridged("w.x.active", '~')); + REQUIRE(wl.InListAbridged("w..active", '~')); + REQUIRE(!wl.InListAbridged("w.active", '~')); + REQUIRE(!wl.InListAbridged("w.x.closed", '~')); + + REQUIRE(wl.InListAbridged("book", '~')); + REQUIRE(wl.InListAbridged("bok", '~')); + REQUIRE(!wl.InListAbridged("bk", '~')); + + REQUIRE(wl.InListAbridged("a_frozen", '~')); + REQUIRE(wl.InListAbridged("_frozen", '~')); + REQUIRE(!wl.InListAbridged("frozen", '~')); + + REQUIRE(wl.InListAbridged("abcz", '~')); + REQUIRE(wl.InListAbridged("abz", '~')); + REQUIRE(wl.InListAbridged("az", '~')); + } } -- cgit v1.2.3