diff options
Diffstat (limited to 'lexers/LexRaku.cxx')
| -rw-r--r-- | lexers/LexRaku.cxx | 1605 |
1 files changed, 0 insertions, 1605 deletions
diff --git a/lexers/LexRaku.cxx b/lexers/LexRaku.cxx deleted file mode 100644 index 17fff5e94..000000000 --- a/lexers/LexRaku.cxx +++ /dev/null @@ -1,1605 +0,0 @@ -/** @file LexRaku.cxx - ** Lexer for Raku - ** - ** Copyright (c) 2019 Mark Reay <mark@reay.net.au> - **/ -// Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org> -// The License.txt file describes the conditions under which this software may be distributed. - -/* - * Raku (Perl6) Lexer for Scintilla - * --------------------------------- - * --------------------------------- - * 06-Dec-2019: More Unicode support: - * - Added a full scope of allowed numbers and letters - * 29-Nov-2019: More highlighting / implemented basic folding: - * - Operators (blanket cover, no sequence checking) - * - Class / Grammar name highlighting - * - Folding: - * - Comments: line / multi-line - * - POD sections - * - Code blocks {} - * 26-Nov-2019: Basic syntax highlighting covering the following: - * - Comments, both line and embedded (multi-line) - * - POD, no inline highlighting as yet... - * - Heredoc block string, with variable highlighting (with qq) - * - Strings, with variable highlighting (with ") - * - Q Language, including adverbs (also basic q and qq) - * - Regex, including adverbs - * - Numbers - * - Bareword / identifiers - * - Types - * - Variables: mu, positional, associative, callable - * TODO: - * - POD inline - * - Better operator sequence coverage - */ - -#include <stdlib.h> -#include <string.h> -#include <stdio.h> -#include <stdarg.h> -#include <assert.h> -#include <ctype.h> - -#include <string> -#include <vector> -#include <map> - -#include "ILexer.h" -#include "Scintilla.h" -#include "SciLexer.h" - -#include "WordList.h" -#include "LexAccessor.h" -#include "StyleContext.h" -#include "CharacterSet.h" -#include "CharacterCategory.h" -#include "LexerModule.h" -#include "OptionSet.h" -#include "DefaultLexer.h" - -using namespace Scintilla; - -namespace { // anonymous namespace to isolate any name clashes -/*----------------------------------------------------------------------------* - * --- DEFINITIONS: OPTIONS / CONSTANTS --- - *----------------------------------------------------------------------------*/ - -// Number types -#define RAKUNUM_BINARY 1 // order is significant: 1-3 cannot have a dot -#define RAKUNUM_OCTAL 2 -#define RAKUNUM_FLOAT_EXP 3 // exponent part only -#define RAKUNUM_HEX 4 // may be a hex float -#define RAKUNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings -#define RAKUNUM_VECTOR 6 -#define RAKUNUM_V_VECTOR 7 -#define RAKUNUM_VERSION 8 // can contain multiple '.'s -#define RAKUNUM_BAD 9 - -// Regex / Q string types -#define RAKUTYPE_REGEX_NORM 0 // 0 char ident -#define RAKUTYPE_REGEX_S 1 // order is significant: -#define RAKUTYPE_REGEX_M 2 // 1 char ident -#define RAKUTYPE_REGEX_Y 3 // 1 char ident -#define RAKUTYPE_REGEX 4 // > RAKUTYPE_REGEX == 2 char identifiers -#define RAKUTYPE_REGEX_RX 5 // 2 char ident -#define RAKUTYPE_REGEX_TR 6 // 2 char ident -#define RAKUTYPE_QLANG 7 // < RAKUTYPE_QLANG == RAKUTYPE_REGEX_? -#define RAKUTYPE_STR_WQ 8 // 0 char ident < word quote > -#define RAKUTYPE_STR_Q 9 // 1 char ident -#define RAKUTYPE_STR_QX 10 // 2 char ident -#define RAKUTYPE_STR_QW 11 // 2 char ident -#define RAKUTYPE_STR_QQ 12 // 2 char ident -#define RAKUTYPE_STR_QQX 13 // 3 char ident -#define RAKUTYPE_STR_QQW 14 // 3 char ident -#define RAKUTYPE_STR_QQWW 15 // 4 char ident - -// Delimiter types -#define RAKUDELIM_BRACKET 0 // bracket: regex, Q language -#define RAKUDELIM_QUOTE 1 // quote: normal string - -// rakuWordLists: keywords as defined in config -const char *const rakuWordLists[] = { - "Keywords and identifiers", - "Functions", - "Types basic", - "Types composite", - "Types domain-specific", - "Types exception", - "Adverbs", - nullptr, -}; - -// Options and defaults -struct OptionsRaku { - bool fold; - bool foldCompact; - bool foldComment; - bool foldCommentMultiline; - bool foldCommentPOD; - OptionsRaku() { - fold = true; - foldCompact = false; - foldComment = true; - foldCommentMultiline = true; - foldCommentPOD = true; - } -}; - -// init options and words -struct OptionSetRaku : public OptionSet<OptionsRaku> { - OptionSetRaku() { - DefineProperty("fold", &OptionsRaku::fold); - DefineProperty("fold.comment", &OptionsRaku::foldComment); - DefineProperty("fold.compact", &OptionsRaku::foldCompact); - - DefineProperty("fold.raku.comment.multiline", &OptionsRaku::foldCommentMultiline, - "Set this property to 0 to disable folding multi-line comments when fold.comment=1."); - DefineProperty("fold.raku.comment.pod", &OptionsRaku::foldCommentPOD, - "Set this property to 0 to disable folding POD comments when fold.comment=1."); - - // init word lists - DefineWordListSets(rakuWordLists); - } -}; - -// Delimiter pair -struct DelimPair { - int opener; // opener char - int closer[2]; // closer chars - bool interpol; // can variables be interpolated? - short count; // delimiter char count - DelimPair() { - opener = 0; - closer[0] = 0; - closer[1] = 0; - interpol = false; - count = 0; - } - bool isCloser(int ch) const { - return ch == closer[0] || ch == closer[1]; - } -}; - -/*----------------------------------------------------------------------------* - * --- FUNCTIONS --- - *----------------------------------------------------------------------------*/ - -/* - * IsANewLine - * - returns true if this is a new line char - */ -constexpr bool IsANewLine(int ch) noexcept { - return ch == '\r' || ch == '\n'; -} - -/* - * IsAWhitespace - * - returns true if this is a whitespace (or newline) char - */ -bool IsAWhitespace(int ch) noexcept { - return IsASpaceOrTab(ch) || IsANewLine(ch); -} - -/* - * IsAlphabet - * - returns true if this is an alphabetical char - */ -constexpr bool IsAlphabet(int ch) noexcept { - return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); -} - -/* - * IsCommentLine - * - returns true if this is a comment line - * - tests: SCE_RAKU_COMMENTLINE or SCE_RAKU_COMMENTEMBED - * modified from: LexPerl.cxx - */ -bool IsCommentLine(Sci_Position line, LexAccessor &styler, int type = SCE_RAKU_COMMENTLINE) { - Sci_Position pos = styler.LineStart(line); - Sci_Position eol_pos = styler.LineStart(line + 1) - 1; - for (Sci_Position i = pos; i < eol_pos; i++) { - char ch = styler[i]; - int style = styler.StyleAt(i); - if (type == SCE_RAKU_COMMENTEMBED) { - if (i == (eol_pos - 1) && style == type) - return true; - } else { // make sure the line is NOT a SCE_RAKU_COMMENTEMBED - if (ch == '#' && style == type && styler[i+1] != '`' ) - return true; - else if (!IsASpaceOrTab(ch)) - return false; - } - } - return false; -} - -/* - * GetBracketCloseChar - * - returns the end bracket char: opposite of start - * - see: http://www.unicode.org/Public/5.1.0/ucd/BidiMirroring.txt (first section) - * - Categories are general matches for valid BiDi types - * - Most closer chars are opener + 1 - */ -int GetBracketCloseChar(const int ch) noexcept { - const CharacterCategory cc = CategoriseCharacter(ch); - switch (cc) { - case ccSm: - switch (ch) { - case 0x3C: return 0x3E; // LESS-THAN SIGN - case 0x2208: return 0x220B; // ELEMENT OF - case 0x2209: return 0x220C; // NOT AN ELEMENT OF - case 0x220A: return 0x220D; // SMALL ELEMENT OF - case 0x2215: return 0x29F5; // DIVISION SLASH - case 0x2243: return 0x22CD; // ASYMPTOTICALLY EQUAL TO - case 0x2298: return 0x29B8; // CIRCLED DIVISION SLASH - case 0x22A6: return 0x2ADE; // ASSERTION - case 0x22A8: return 0x2AE4; // TRUE - case 0x22A9: return 0x2AE3; // FORCES - case 0x22AB: return 0x2AE5; // DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE - case 0x22F2: return 0x22FA; // ELEMENT OF WITH LONG HORIZONTAL STROKE - case 0x22F3: return 0x22FB; // ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE - case 0x22F4: return 0x22FC; // SMALL ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE - case 0x22F6: return 0x22FD; // ELEMENT OF WITH OVERBAR - case 0x22F7: return 0x22FE; // SMALL ELEMENT OF WITH OVERBAR - case 0xFF1C: return 0xFF1E; // FULLWIDTH LESS-THAN SIGN - } - break; - case ccPs: - switch (ch) { - case 0x5B: return 0x5D; // LEFT SQUARE BRACKET - case 0x7B: return 0x7D; // LEFT CURLY BRACKET - case 0x298D: return 0x2990; // LEFT SQUARE BRACKET WITH TICK IN TOP CORNER - case 0x298F: return 0x298E; // LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER - case 0xFF3B: return 0xFF3D; // FULLWIDTH LEFT SQUARE BRACKET - case 0xFF5B: return 0xFF5D; // FULLWIDTH LEFT CURLY BRACKET - } - break; - case ccPi: - break; - default: return 0; - } - return ch + 1; -} - -/* - * IsValidQuoteOpener - * - - */ -bool IsValidQuoteOpener(const int ch, DelimPair &dp, int type = RAKUDELIM_BRACKET) noexcept { - dp.closer[0] = 0; - dp.closer[1] = 0; - dp.interpol = true; - if (type == RAKUDELIM_QUOTE) { - switch (ch) { - // Opener Closer Description - case '\'': dp.closer[0] = '\''; // APOSTROPHE - dp.interpol = false; - break; - case '"': dp.closer[0] = '"'; // QUOTATION MARK - break; - case 0x2018: dp.closer[0] = 0x2019; // LEFT SINGLE QUOTATION MARK - dp.interpol = false; - break; - case 0x201C: dp.closer[0] = 0x201D; // LEFT DOUBLE QUOTATION MARK - break; - case 0x201D: dp.closer[0] = 0x201C; // RIGHT DOUBLE QUOTATION MARK - break; - case 0x201E: dp.closer[0] = 0x201C; // DOUBLE LOW-9 QUOTATION MARK - dp.closer[1] = 0x201D; - break; - case 0xFF62: dp.closer[0] = 0xFF63; // HALFWIDTH LEFT CORNER BRACKET - dp.interpol = false; - break; - default: return false; - } - } else if (type == RAKUDELIM_BRACKET) { - dp.closer[0] = GetBracketCloseChar(ch); - } - dp.opener = ch; - dp.count = 1; - return dp.closer[0] > 0; -} - -/* - * IsBracketOpenChar - * - true if this is a valid start bracket character - */ -bool IsBracketOpenChar(int ch) noexcept { - return GetBracketCloseChar(ch) > 0; -} - -/* - * IsValidRegOrQAdjacent - * - returns true if ch is a valid character to put directly after Q / q - * * ref: Q Language: https://docs.raku.org/language/quoting - */ -bool IsValidRegOrQAdjacent(int ch) noexcept { - return !(IsAlphaNumeric(ch) || ch == '_' || ch == '(' || ch == ')' || ch == '\'' ); -} - -/* - * IsValidRegOrQPrecede - * - returns true if ch is a valid preceeding character to put directly before Q / q - * * ref: Q Language: https://docs.raku.org/language/quoting - */ -bool IsValidRegOrQPrecede(int ch) noexcept { - return !(IsAlphaNumeric(ch) || ch == '_'); -} - -/* - * MatchCharInRange - * - returns true if the mach character is found in range (of length) - * - ignoreDelim (default false) - */ -bool MatchCharInRange(StyleContext &sc, const Sci_Position length, - const int match, bool ignoreDelim = false) { - Sci_Position len = 0; - int chPrev = sc.chPrev; - while (++len < length) { - const int ch = sc.GetRelativeCharacter(len); - if (ch == match && (ignoreDelim || chPrev != '\\')) - return true; - } - return false; -} - -/* - * PrevNonWhitespaceChar - * - returns the last non-whitespace char - */ -int PrevNonWhitespaceChar(StyleContext &sc) { - Sci_Position rel = 0; - Sci_Position max_back = 0 - sc.currentPos; - while (--rel > max_back) { - const int ch = sc.GetRelativeCharacter(rel); - if (!IsAWhitespace(ch)) - return ch; - } - return 0; // no matching char -} - -/* - * IsQLangStartAtScPos - * - returns true if this is a valid Q Language sc position - * - ref: https://docs.raku.org/language/quoting - * - Q :adverb :adverb //; - * - q,qx,qw,qq,qqx,qqw,qqww :adverb /:adverb /; - */ -bool IsQLangStartAtScPos(StyleContext &sc, int &type, const Sci_Position length) { - const bool valid_adj = IsValidRegOrQAdjacent(sc.chNext); - const int chFw2 = sc.GetRelativeCharacter(2); - const int chFw3 = sc.GetRelativeCharacter(3); - type = -1; - if (IsValidRegOrQPrecede(sc.chPrev)) { - if (sc.ch == 'Q' && valid_adj) { - type = RAKUTYPE_QLANG; - } else if (sc.ch == 'q') { - switch (sc.chNext) { - case 'x': - type = RAKUTYPE_STR_QX; - break; - case 'w': - type = RAKUTYPE_STR_QW; - break; - case 'q': - if (chFw2 == 'x') { - type = RAKUTYPE_STR_QQX; - } else if (chFw2 == 'w') { - if (chFw3 == 'w') { - type = RAKUTYPE_STR_QQWW; - } else { - type = RAKUTYPE_STR_QQW; - } - } else { - type = RAKUTYPE_STR_QQ; - } - break; - default: - type = RAKUTYPE_STR_Q; - } - } else if (sc.ch == '<' && MatchCharInRange(sc, length, '>')) { - type = RAKUTYPE_STR_WQ; // < word quote > - } - } - return type >= 0; -} - -/* - * IsRegexStartAtScPos - * - returns true if this is a valid Regex sc position - * - ref: https://docs.raku.org/language/regexes - * - Regex: (rx/s/m/tr/y) :adverb /:adverb /; - * - regex R :adverb //; - * - /:adverb /; - */ -bool IsRegexStartAtScPos(StyleContext &sc, int &type, CharacterSet &set) { - const bool valid_adj = IsValidRegOrQAdjacent(sc.chNext); - type = -1; - if (IsValidRegOrQPrecede(sc.chPrev)) { - switch (sc.ch) { - case 'r': - if (sc.chNext == 'x') - type = RAKUTYPE_REGEX_RX; - break; - case 't': - case 'T': - if (sc.chNext == 'r' || sc.chNext == 'R') - type = RAKUTYPE_REGEX_TR; - break; - case 'm': - if (valid_adj) - type = RAKUTYPE_REGEX_M; - break; - case 's': - case 'S': - if (valid_adj) - type = RAKUTYPE_REGEX_S; - break; - case 'y': - if (valid_adj) - type = RAKUTYPE_REGEX_Y; - break; - case '/': - if (set.Contains(PrevNonWhitespaceChar(sc))) - type = RAKUTYPE_REGEX_NORM; - } - } - return type >= 0; -} - -/* - * IsValidIdentPrecede - * - returns if ch is a valid preceeding char to put directly before an identifier - */ -bool IsValidIdentPrecede(int ch) noexcept { - return !(IsAlphaNumeric(ch) || ch == '_' || ch == '@' || ch == '$' || ch == '%'); -} - -/* - * IsValidDelimiter - * - returns if ch is a valid delimiter (most chars are valid) - * * ref: Q Language: https://docs.raku.org/language/quoting - */ -bool IsValidDelimiter(int ch) noexcept { - return !(IsAlphaNumeric(ch) || ch == ':'); -} - -/* - * GetDelimiterCloseChar - * - returns the corrisponding close char for a given delimiter (could be the same char) - */ -int GetDelimiterCloseChar(int ch) noexcept { - int ch_end = GetBracketCloseChar(ch); - if (ch_end == 0 && IsValidDelimiter(ch)) { - ch_end = ch; - } - return ch_end; -} - -/* - * GetRepeatCharCount - * - returns the occurence count of match - */ -Sci_Position GetRepeatCharCount(StyleContext &sc, int chMatch, Sci_Position length) { - Sci_Position cnt = 0; - while (cnt < length) { - if (sc.GetRelativeCharacter(cnt) != chMatch) { - break; - } - cnt++; - } - return cnt; -} - -/* - * LengthToDelimiter - * - returns the length until the end of a delimited string section - * - Ignores nested delimiters (if opener != closer) - * - no trailing char after last closer (default false) - */ -Sci_Position LengthToDelimiter(StyleContext &sc, const DelimPair &dp, - Sci_Position length, bool noTrailing = false) { - short cnt_open = 0; // count open bracket - short cnt_close = 0; // count close bracket - Sci_Position len = 0; // count characters - int chOpener = dp.opener; // look for nested opener / closer - if (dp.opener == dp.closer[0]) - chOpener = 0; // no opening delimiter (no nesting possible) - - while (len < length) { - const int chPrev = sc.GetRelativeCharacter(len - 1); - const int ch = sc.GetRelativeCharacter(len); - const int chNext = sc.GetRelativeCharacter(len+1); - - if (cnt_open == 0 && cnt_close == dp.count) { - return len; // end condition has been met - } else { - if (chPrev != '\\' && ch == chOpener) { // ignore escape sequence - cnt_open++; // open nested bracket - } else if (chPrev != '\\' && dp.isCloser(ch)) { // ignore escape sequence - if ( cnt_open > 0 ) { - cnt_open--; // close nested bracket - } else if (dp.count > 1 && cnt_close < (dp.count - 1)) { - if (cnt_close > 1) { - if (dp.isCloser(chPrev)) { - cnt_close++; - } else { // reset if previous char was not close - cnt_close = 0; - } - } else { - cnt_close++; - } - } else if (!noTrailing || (IsAWhitespace(chNext))) { - cnt_close++; // found last close - if (cnt_close > 1 && !dp.isCloser(chPrev)) { - cnt_close = 0; // reset if previous char was not close - } - } else { - cnt_close = 0; // non handled close: reset - } - } else if (IsANewLine(ch)) { - cnt_open = 0; // reset after each line - cnt_close = 0; - } - } - len++; - } - return -1; // end condition has NOT been met -} - -/* - * LengthToEndHeredoc - * - returns the length until the end of a heredoc section - * - delimiter string MUST begin on a new line - */ -Sci_Position LengthToEndHeredoc(const StyleContext &sc, LexAccessor &styler, - const Sci_Position length, const char *delim) { - bool on_new_ln = false; - int i = 0; // str index - for (int n = 0; n < length; n++) { - const char ch = styler.SafeGetCharAt(sc.currentPos + n, 0); - if (on_new_ln) { - if (delim[i] == '\0') - return n; // at end of str, match found! - if (ch != delim[i++]) - i = 0; // no char match, reset 'i'ndex - } - if (i == 0) // detect new line - on_new_ln = IsANewLine(ch); - } - return -1; // no match found -} - -/* - * LengthToNextChar - * - returns the length until the next character - */ -Sci_Position LengthToNextChar(StyleContext &sc, const Sci_Position length) { - Sci_Position len = 0; - while (++len < length) { - const int ch = sc.GetRelativeCharacter(len); - if (!IsASpaceOrTab(ch) && !IsANewLine(ch)) { - break; - } - } - return len; -} - -/* - * GetRelativeString - * - gets a relitive string and sets it in &str - * - resets string before seting - */ -void GetRelativeString(StyleContext &sc, Sci_Position offset, Sci_Position length, - std::string &str) { - Sci_Position pos = offset; - str.clear(); - while (pos < length) { - str += sc.GetRelativeCharacter(pos++); - } -} - -} // end anonymous namespace - -/*----------------------------------------------------------------------------* - * --- class: LexerRaku --- - *----------------------------------------------------------------------------*/ -//class LexerRaku : public ILexerWithMetaData { -class LexerRaku : public DefaultLexer { - CharacterSet setWord; - CharacterSet setSigil; - CharacterSet setTwigil; - CharacterSet setOperator; - CharacterSet setSpecialVar; - WordList regexIdent; // identifiers that specify a regex - OptionsRaku options; // Options from config - OptionSetRaku osRaku; - WordList keywords; // Word Lists from config - WordList functions; - WordList typesBasic; - WordList typesComposite; - WordList typesDomainSpecific; - WordList typesExceptions; - WordList adverbs; - -public: - // Defined as explicit, so that constructor can not be copied - explicit LexerRaku() : - DefaultLexer("raku", SCLEX_RAKU), - setWord(CharacterSet::setAlphaNum, "-_", 0x80), - setSigil(CharacterSet::setNone, "$&%@"), - setTwigil(CharacterSet::setNone, "!*.:<=?^~"), - setOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;<>,?!.~"), - setSpecialVar(CharacterSet::setNone, "_/!") { - regexIdent.Set("regex rule token"); - } - // Deleted so LexerRaku objects can not be copied. - LexerRaku(const LexerRaku &) = delete; - LexerRaku(LexerRaku &&) = delete; - void operator=(const LexerRaku &) = delete; - void operator=(LexerRaku &&) = delete; - virtual ~LexerRaku() { - } - void SCI_METHOD Release() noexcept override { - delete this; - } - int SCI_METHOD Version() const noexcept override { - return lvRelease5; - } - const char *SCI_METHOD PropertyNames() override { - return osRaku.PropertyNames(); - } - int SCI_METHOD PropertyType(const char *name) override { - return osRaku.PropertyType(name); - } - const char *SCI_METHOD DescribeProperty(const char *name) override { - return osRaku.DescribeProperty(name); - } - Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override; - const char *SCI_METHOD PropertyGet(const char *key) override { - return osRaku.PropertyGet(key); - } - const char *SCI_METHOD DescribeWordListSets() override { - return osRaku.DescribeWordListSets(); - } - Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override; - void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; - void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; - - static ILexer5 *LexerFactoryRaku() { - return new LexerRaku(); - } - -protected: - bool IsOperatorChar(const int ch); - bool IsWordChar(const int ch, bool allowNumber = true); - bool IsWordStartChar(const int ch); - bool IsNumberChar(const int ch, int base = 10); - bool ProcessRegexTwinCapture(StyleContext &sc, const Sci_Position length, - int &type, const DelimPair &dp); - void ProcessStringVars(StyleContext &sc, const Sci_Position length, const int varState); - bool ProcessValidRegQlangStart(StyleContext &sc, Sci_Position length, const int type, - WordList &wordsAdverbs, DelimPair &dp); - Sci_Position LengthToNonWordChar(StyleContext &sc, Sci_Position length, - char *s, const int size, Sci_Position offset = 0); -}; - -/*----------------------------------------------------------------------------* - * --- METHODS: LexerRaku --- - *----------------------------------------------------------------------------*/ - -/* - * LexerRaku::IsOperatorChar - * - Test for both ASCII and Unicode operators - * see: https://docs.raku.org/language/unicode_entry - */ -bool LexerRaku::IsOperatorChar(const int ch) { - if (ch > 0x7F) { - switch (ch) { - // Unicode ASCII Equiv. - case 0x2208: // (elem) - case 0x2209: // !(elem) - case 0x220B: // (cont) - case 0x220C: // !(cont) - case 0x2216: // (-) - case 0x2229: // (&) - case 0x222A: // (|) - case 0x2282: // (<) - case 0x2283: // (>) - case 0x2284: // !(<) - case 0x2285: // !(>) - case 0x2286: // (<=) - case 0x2287: // (>=) - case 0x2288: // !(<=) - case 0x2289: // !(>=) - case 0x228D: // (.) - case 0x228E: // (+) - case 0x2296: // (^) - return true; - } - } - return setOperator.Contains(ch); -} - -/* - * LexerRaku::IsWordChar - * - Test for both ASCII and Unicode identifier characters - * see: https://docs.raku.org/language/unicode_ascii - * also: ftp://ftp.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt - * FIXME: *still* may not contain all valid characters - */ -bool LexerRaku::IsWordChar(const int ch, bool allowNumber) { - // Unicode numbers should not apear in word identifiers - if (ch > 0x7F) { - const CharacterCategory cc = CategoriseCharacter(ch); - switch (cc) { - // Letters - case ccLu: - case ccLl: - case ccLt: - case ccLm: - case ccLo: - return true; - default: - return false; - } - } else if (allowNumber && IsADigit(ch)) { - return true; // an ASCII number type - } - return setWord.Contains(ch); -} - -/* - * LexerRaku::IsWordStartChar - * - Test for both ASCII and Unicode identifier "start / first" characters - */ -bool LexerRaku::IsWordStartChar(const int ch) { - return ch != '-' && IsWordChar(ch, false); // no numbers allowed -} - -/* - * LexerRaku::IsNumberChar - * - Test for both ASCII and Unicode identifier number characters - * see: https://docs.raku.org/language/unicode_ascii - * also: ftp://ftp.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt - * FILTERED by Unicode letters that are NUMBER - * and NOT PARENTHESIZED or CIRCLED - * FIXME: *still* may not contain all valid number characters - */ -bool LexerRaku::IsNumberChar(const int ch, int base) { - if (ch > 0x7F) { - const CharacterCategory cc = CategoriseCharacter(ch); - switch (cc) { - // Numbers - case ccNd: - case ccNl: - case ccNo: - return true; - default: - return false; - } - } - return IsADigit(ch, base); -} - -/* - * LexerRaku::PropertySet - * - - */ -Sci_Position SCI_METHOD LexerRaku::PropertySet(const char *key, const char *val) { - if (osRaku.PropertySet(&options, key, val)) - return 0; - return -1; -} - -/* - * LexerRaku::WordListSet - * - - */ -Sci_Position SCI_METHOD LexerRaku::WordListSet(int n, const char *wl) { - WordList *wordListN = nullptr; - switch (n) { - case 0: - wordListN = &keywords; - break; - case 1: - wordListN = &functions; - break; - case 2: - wordListN = &typesBasic; - break; - case 3: - wordListN = &typesComposite; - break; - case 4: - wordListN = &typesDomainSpecific; - break; - case 5: - wordListN = &typesExceptions; - break; - case 6: - wordListN = &adverbs; - break; - } - Sci_Position firstModification = -1; - if (wordListN) { - WordList wlNew; - wlNew.Set(wl); - if (*wordListN != wlNew) { - wordListN->Set(wl); - firstModification = 0; - } - } - return firstModification; -} - -/* - * LexerRaku::ProcessRegexTwinCapture - * - processes the transition between a regex pair (two sets of delimiters) - * - moves to first new delimiter, if a bracket - * - returns true when valid delimiter start found (if bracket) - */ -bool LexerRaku::ProcessRegexTwinCapture(StyleContext &sc, const Sci_Position length, - int &type, const DelimPair &dp) { - - if (type == RAKUTYPE_REGEX_S || type == RAKUTYPE_REGEX_TR || type == RAKUTYPE_REGEX_Y) { - type = -1; // clear type - - // move past chRegQClose if it was the previous char - if (dp.isCloser(sc.chPrev)) - sc.Forward(); - - // no processing needed for non-bracket - if (dp.isCloser(dp.opener)) - return true; - - // move to next opening bracket - const Sci_Position len = LengthToNextChar(sc, length); - if (sc.GetRelativeCharacter(len) == dp.opener) { - sc.Forward(len); - return true; - } - } - return false; -} - -/* - * LexerRaku::ProcessStringVars - * - processes a string and highlights any valid variables - */ -void LexerRaku::ProcessStringVars(StyleContext &sc, const Sci_Position length, const int varState) { - const int state = sc.state; - for (Sci_Position pos = 0; pos < length; pos++) { - if (sc.state == varState && !IsWordChar(sc.ch)) { - sc.SetState(state); - } else if (sc.chPrev != '\\' - && (sc.ch == '$' || sc.ch == '@') - && IsWordStartChar(sc.chNext)) { - sc.SetState(varState); - } - sc.Forward(); // Next character - } -} -/* - * LexerRaku::ProcessValidRegQlangStart - * - processes a section of the document range from after a Regex / Q delimiter - * - returns true on success - * - sets: adverbs, chOpen, chClose, chCount - * ref: https://docs.raku.org/language/regexes - */ -bool LexerRaku::ProcessValidRegQlangStart(StyleContext &sc, Sci_Position length, const int type, - WordList &wordsAdverbs, DelimPair &dp) { - Sci_Position startPos = sc.currentPos; - Sci_Position startLen = length; - const int target_state = sc.state; - int state = SCE_RAKU_DEFAULT; - std::string str; - - // find our opening delimiter (and occurrences) / save any adverbs - dp.opener = 0; // adverbs can be after the first delimiter - bool got_all_adverbs = false; // in Regex statements - bool got_ident = false; // regex can have an identifier: 'regex R' - sc.SetState(state); // set state default to avoid pre-highlights - while ((dp.opener == 0 || !got_all_adverbs) && sc.More()) { - - // move to the next non-space character - const bool was_space = IsAWhitespace(sc.ch); - if (!got_all_adverbs && was_space) { - sc.Forward(LengthToNextChar(sc, length)); - } - length = startLen - (sc.currentPos - startPos); // update length remaining - - // parse / eat an identifier (if type == RAKUTYPE_REGEX) - if (dp.opener == 0 && !got_ident && type == RAKUTYPE_REGEX && IsAlphabet(sc.ch)) { - - // eat identifier / account for special adverb :sym<name> - bool got_sym = false; - while (sc.More()) { - sc.SetState(SCE_RAKU_IDENTIFIER); - while (sc.More() && (IsAlphaNumeric(sc.chNext) - || sc.chNext == '_' || sc.chNext == '-')) { - sc.Forward(); - } - sc.Forward(); - if (got_sym && sc.ch == '>') { - sc.SetState(SCE_RAKU_OPERATOR); // '>' - sc.Forward(); - break; - } else if (type == RAKUTYPE_REGEX && sc.Match(":sym<")) { - sc.SetState(SCE_RAKU_ADVERB); // ':sym' - sc.Forward(4); - sc.SetState(SCE_RAKU_OPERATOR); // '<' - sc.Forward(); - got_sym = true; - } else { - break; - } - } - sc.SetState(state); - got_ident = true; - } - - // parse / save an adverb: RAKUTYPE_REGEX only has adverbs after delim - // >= RAKUTYPE_QLANG only has adverbs before delim - else if (!got_all_adverbs && sc.ch == ':' && (!(dp.opener == 0 && got_ident) - && !(dp.opener > 0 && type >= RAKUTYPE_QLANG))) { - sc.SetState(SCE_RAKU_ADVERB); - while (IsAlphaNumeric(sc.chNext) && sc.More()) { - sc.Forward(); - str += sc.ch; - } - str += ' '; - sc.Forward(); - sc.SetState(state); - } - - // find starting delimiter - else if (dp.opener == 0 && (was_space || IsValidRegOrQAdjacent(sc.ch)) - && IsValidDelimiter(sc.ch)) { // make sure the delimiter is legal (most are) - sc.SetState((state = target_state));// start state here... - dp.opener = sc.ch; // this is our delimiter, get count - if (type < RAKUTYPE_QLANG) // type is Regex - dp.count = 1; // has only one delimiter - else - dp.count = GetRepeatCharCount(sc, dp.opener, length); - sc.Forward(dp.count); - } - - // we must have all the adverbs by now... - else { - if (got_all_adverbs) - break; // prevent infinite loop: occurs on missing open char - got_all_adverbs = true; - } - } - - // set word list / find a valid closing delimiter (or bomb!) - wordsAdverbs.Set(str.c_str()); - dp.closer[0] = GetDelimiterCloseChar(dp.opener); - dp.closer[1] = 0; // no other closer char - return dp.closer[0] > 0; -} - -/* - * LexerRaku::LengthToNonWordChar - * - returns the length until the next non "word" character: AlphaNum + '_' - * - also sets all the parsed chars in 's' - */ -Sci_Position LexerRaku::LengthToNonWordChar(StyleContext &sc, Sci_Position length, - char *s, const int size, Sci_Position offset) { - Sci_Position len = 0; - Sci_Position max_length = size < length ? size : length; - while (len <= max_length) { - const int ch = sc.GetRelativeCharacter(len + offset); - if (!IsWordChar(ch)) { - s[len] = '\0'; - break; - } - s[len] = ch; - len++; - } - s[len + 1] = '\0'; - return len; -} - -/* - * LexerRaku::Lex - * - Main lexer method - */ -void SCI_METHOD LexerRaku::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) { - LexAccessor styler(pAccess); - DelimPair dpEmbeded; // delimiter pair: embeded comments - DelimPair dpString; // delimiter pair: string - DelimPair dpRegQ; // delimiter pair: Regex / Q Lang - std::string hereDelim; // heredoc delimiter (if in heredoc) - int hereState = 0; // heredoc state to use (Q / QQ) - int numState = 0; // number state / type - short cntDecimal = 0; // number decinal count - std::string wordLast; // last word seen - std::string identLast; // last identifier seen - std::string adverbLast; // last (single) adverb seen - WordList lastAdverbs; // last adverbs seen - Sci_Position len; // temp length value - char s[100]; // temp char string - int typeDetect; // temp type detected (for regex and Q lang) - Sci_Position lengthToEnd; // length until the end of range - - // Backtrack to last SCE_RAKU_DEFAULT or 0 - Sci_PositionU newStartPos = startPos; - if (initStyle != SCE_RAKU_DEFAULT) { - while (newStartPos > 0) { - newStartPos--; - if (styler.StyleAt(newStartPos) == SCE_RAKU_DEFAULT) - break; - } - } - - // Backtrack to start of line before SCE_RAKU_HEREDOC_Q? - if (initStyle == SCE_RAKU_HEREDOC_Q || initStyle == SCE_RAKU_HEREDOC_QQ) { - while (newStartPos > 0) { - if (IsANewLine(styler.SafeGetCharAt(newStartPos - 1))) - break; // Stop if previous char is a new line - newStartPos--; - } - } - - // Re-calculate (any) changed startPos, length and initStyle state - if (newStartPos < startPos) { - initStyle = SCE_RAKU_DEFAULT; - length += startPos - newStartPos; - startPos = newStartPos; - } - - // init StyleContext - StyleContext sc(startPos, length, initStyle, styler); - - // StyleContext Loop - for (; sc.More(); sc.Forward()) { - lengthToEnd = (length - (sc.currentPos - startPos)); // end of range - - /* *** Determine if the current state should terminate ************** * - * Everything within the 'switch' statement processes characters up - * until the end of a syntax highlight section / state. - * ****************************************************************** */ - switch (sc.state) { - case SCE_RAKU_OPERATOR: - sc.SetState(SCE_RAKU_DEFAULT); - break; // FIXME: better valid operator sequences needed? - case SCE_RAKU_COMMENTLINE: - if (IsANewLine(sc.ch)) { - sc.SetState(SCE_RAKU_DEFAULT); - } - break; - case SCE_RAKU_COMMENTEMBED: - if ((len = LengthToDelimiter(sc, dpEmbeded, lengthToEnd)) >= 0) { - sc.Forward(len); // Move to end delimiter - sc.SetState(SCE_RAKU_DEFAULT); - } else { - sc.Forward(lengthToEnd); // no end delimiter found - } - break; - case SCE_RAKU_POD: - if (sc.atLineStart && sc.Match("=end pod")) { - sc.Forward(8); - sc.SetState(SCE_RAKU_DEFAULT); - } - break; - case SCE_RAKU_STRING: - - // Process the string for variables: move to end delimiter - if ((len = LengthToDelimiter(sc, dpString, lengthToEnd)) >= 0) { - if (dpString.interpol) { - ProcessStringVars(sc, len, SCE_RAKU_STRING_VAR); - } else { - sc.Forward(len); - } - sc.SetState(SCE_RAKU_DEFAULT); - } else { - sc.Forward(lengthToEnd); // no end delimiter found - } - break; - case SCE_RAKU_STRING_Q: - case SCE_RAKU_STRING_QQ: - case SCE_RAKU_STRING_Q_LANG: - - // No string: previous char was the delimiter - if (dpRegQ.count == 1 && dpRegQ.isCloser(sc.chPrev)) { - sc.SetState(SCE_RAKU_DEFAULT); - } - - // Process the string for variables: move to end delimiter - else if ((len = LengthToDelimiter(sc, dpRegQ, lengthToEnd)) >= 0) { - - // set (any) heredoc delimiter string - if (lastAdverbs.InList("to")) { - GetRelativeString(sc, -1, len - dpRegQ.count, hereDelim); - hereState = SCE_RAKU_HEREDOC_Q; // default heredoc state - } - - // select variable identifiers - if (sc.state == SCE_RAKU_STRING_QQ || lastAdverbs.InList("qq")) { - ProcessStringVars(sc, len, SCE_RAKU_STRING_VAR); - hereState = SCE_RAKU_HEREDOC_QQ; // potential heredoc state - } else { - sc.Forward(len); - } - sc.SetState(SCE_RAKU_DEFAULT); - } else { - sc.Forward(lengthToEnd); // no end delimiter found - } - break; - case SCE_RAKU_HEREDOC_Q: - case SCE_RAKU_HEREDOC_QQ: - if ((len = LengthToEndHeredoc(sc, styler, lengthToEnd, hereDelim.c_str())) >= 0) { - // select variable identifiers - if (sc.state == SCE_RAKU_HEREDOC_QQ) { - ProcessStringVars(sc, len, SCE_RAKU_STRING_VAR); - } else { - sc.Forward(len); - } - sc.SetState(SCE_RAKU_DEFAULT); - } else { - sc.Forward(lengthToEnd); // no end delimiter found - } - hereDelim.clear(); // clear heredoc delimiter - break; - case SCE_RAKU_REGEX: - // account for typeDetect = RAKUTYPE_REGEX_S/TR/Y - while (sc.state == SCE_RAKU_REGEX) { - - // No string: previous char was the delimiter - if (dpRegQ.count == 1 && dpRegQ.isCloser(sc.chPrev)) { - if (ProcessRegexTwinCapture(sc, lengthToEnd, typeDetect, dpRegQ)) - continue; - sc.SetState(SCE_RAKU_DEFAULT); - break; - } - - // Process the string for variables: move to end delimiter - else if ((len = LengthToDelimiter(sc, dpRegQ, lengthToEnd)) >= 0) { - ProcessStringVars(sc, len, SCE_RAKU_REGEX_VAR); - if (ProcessRegexTwinCapture(sc, lengthToEnd, typeDetect, dpRegQ)) - continue; - sc.SetState(SCE_RAKU_DEFAULT); - break; - } else { - sc.Forward(lengthToEnd); // no end delimiter found - break; - } - } - break; - case SCE_RAKU_NUMBER: - if (sc.ch == '.') { - if (sc.chNext == '.') { // '..' is an operator - sc.SetState(SCE_RAKU_OPERATOR); - sc.Forward(); - if (sc.chNext == '.') // '...' is also an operator - sc.Forward(); - break; - } else if (numState > RAKUNUM_FLOAT_EXP - && (cntDecimal < 1 || numState == RAKUNUM_VERSION)) { - cntDecimal++; - sc.Forward(); - } else { - sc.SetState(SCE_RAKU_DEFAULT); - break; // too many decinal places - } - } - switch (numState) { - case RAKUNUM_BINARY: - if (!IsNumberChar(sc.ch, 2)) - sc.SetState(SCE_RAKU_DEFAULT); - break; - case RAKUNUM_OCTAL: - if (!IsNumberChar(sc.ch, 8)) - sc.SetState(SCE_RAKU_DEFAULT); - break; - case RAKUNUM_HEX: - if (!IsNumberChar(sc.ch, 16)) - sc.SetState(SCE_RAKU_DEFAULT); - break; - case RAKUNUM_DECIMAL: - case RAKUNUM_VERSION: - if (!IsNumberChar(sc.ch)) - sc.SetState(SCE_RAKU_DEFAULT); - } - break; - case SCE_RAKU_WORD: - case SCE_RAKU_FUNCTION: - case SCE_RAKU_TYPEDEF: - case SCE_RAKU_ADVERB: - sc.SetState(SCE_RAKU_DEFAULT); - break; - case SCE_RAKU_MU: - case SCE_RAKU_POSITIONAL: - case SCE_RAKU_ASSOCIATIVE: - case SCE_RAKU_CALLABLE: - case SCE_RAKU_IDENTIFIER: - case SCE_RAKU_GRAMMAR: - case SCE_RAKU_CLASS: - sc.SetState(SCE_RAKU_DEFAULT); - break; - } - - /* *** Determine if a new state should be entered ******************* * - * Everything below here identifies the beginning of a state, all or part - * of the characters within this state are processed here, the rest are - * completed above in the terminate state section. - * ****************************************************************** */ - if (sc.state == SCE_RAKU_DEFAULT) { - - // --- Single line comment - if (sc.ch == '#') { - sc.SetState(SCE_RAKU_COMMENTLINE); - } - - // --- POD block - else if (sc.atLineStart && sc.Match("=begin pod")) { - sc.SetState(SCE_RAKU_POD); - sc.Forward(10); - } - - // --- String (normal) - else if (sc.chPrev != '\\' && (IsValidQuoteOpener(sc.ch, dpString, RAKUDELIM_QUOTE))) { - sc.SetState(SCE_RAKU_STRING); - } - - // --- String (Q Language) ---------------------------------------- - // - https://docs.raku.org/language/quoting - // - Q :adverb :adverb //; - // - q,qx,qw,qq,qqx,qqw,qqww :adverb :adverb //; - else if (IsQLangStartAtScPos(sc, typeDetect, lengthToEnd)) { - int state = SCE_RAKU_STRING_Q_LANG; - Sci_Position forward = 1; // single char ident (default) - if (typeDetect > RAKUTYPE_QLANG) { - state = SCE_RAKU_STRING_Q; - if (typeDetect == RAKUTYPE_STR_WQ) - forward = 0; // no char ident - } - if (typeDetect > RAKUTYPE_STR_Q) { - if (typeDetect == RAKUTYPE_STR_QQ) - state = SCE_RAKU_STRING_QQ; - forward++; // two char ident - } - if (typeDetect > RAKUTYPE_STR_QQ) - forward++; // three char ident - if (typeDetect == RAKUTYPE_STR_QQWW) - forward++; // four char ident - - // Proceed: check for a valid character after statement - if (IsValidRegOrQAdjacent(sc.GetRelative(forward)) || typeDetect == RAKUTYPE_QLANG) { - sc.SetState(state); - sc.Forward(forward); - lastAdverbs.Clear(); - - // Process: adverbs / opening delimiter / adverbs after delim - if (ProcessValidRegQlangStart(sc, lengthToEnd, typeDetect, - lastAdverbs, dpRegQ)) - sc.SetState(state); - } - } - - // --- Regex (rx/s/m/tr/y) ---------------------------------------- - // - https://docs.raku.org/language/regexes - else if ((IsRegexStartAtScPos(sc, typeDetect, setOperator) || regexIdent.InList(wordLast.c_str()))) { - if (typeDetect == -1) { // must be a regex identifier word - wordLast.clear(); - typeDetect = RAKUTYPE_REGEX; - } - Sci_Position forward = 0; // no ident (RAKUTYPE_REGEX, RAKUTYPE_REGEX_NORM) - if (typeDetect > 0 && typeDetect != RAKUTYPE_REGEX) - forward++; // single char ident - if (typeDetect > RAKUTYPE_REGEX) - forward++; // two char ident - - // Proceed: check for a valid character after statement - if (IsValidRegOrQAdjacent(sc.GetRelative(forward)) || typeDetect == RAKUTYPE_REGEX_NORM) { - sc.SetState(SCE_RAKU_REGEX); - sc.Forward(forward); - lastAdverbs.Clear(); - - // Process: adverbs / opening delimiter / adverbs after delim - if (ProcessValidRegQlangStart(sc, lengthToEnd, typeDetect, - lastAdverbs, dpRegQ)) - sc.SetState(SCE_RAKU_REGEX); - } - } - - // --- Numbers ---------------------------------------------------- - else if (IsValidIdentPrecede(sc.chPrev) && (IsNumberChar(sc.ch) - || (sc.ch == 'v' && IsNumberChar(sc.chNext) && wordLast == "use"))) { - numState = RAKUNUM_DECIMAL; // default: decimal (base 10) - cntDecimal = 0; - sc.SetState(SCE_RAKU_NUMBER); - if (sc.ch == 'v') // forward past 'v' - sc.Forward(); - if (wordLast == "use") { // package version number - numState = RAKUNUM_VERSION; - } else if (sc.ch == '0') { // other type of number - switch (sc.chNext) { - case 'b': // binary (base 2) - numState = RAKUNUM_BINARY; - break; - case 'o': // octal (base 8) - numState = RAKUNUM_OCTAL; - break; - case 'x': // hexadecimal (base 16) - numState = RAKUNUM_HEX; - } - if (numState != RAKUNUM_DECIMAL) - sc.Forward(); // forward to number type char - } - } - - // --- Keywords / functions / types / barewords ------------------- - else if ((sc.currentPos == 0 || sc.atLineStart || IsValidIdentPrecede(sc.chPrev)) - && IsWordStartChar(sc.ch)) { - len = LengthToNonWordChar(sc, lengthToEnd, s, sizeof(s)); - if (keywords.InList(s)) { - sc.SetState(SCE_RAKU_WORD); // Keywords - } else if(functions.InList(s)) { - sc.SetState(SCE_RAKU_FUNCTION); // Functions - } else if(typesBasic.InList(s)) { - sc.SetState(SCE_RAKU_TYPEDEF); // Types (basic) - } else if(typesComposite.InList(s)) { - sc.SetState(SCE_RAKU_TYPEDEF); // Types (composite) - } else if(typesDomainSpecific.InList(s)) { - sc.SetState(SCE_RAKU_TYPEDEF); // Types (domain-specific) - } else if(typesExceptions.InList(s)) { - sc.SetState(SCE_RAKU_TYPEDEF); // Types (exceptions) - } else { - if (wordLast == "class") - sc.SetState(SCE_RAKU_CLASS); // a Class ident - else if (wordLast == "grammar") - sc.SetState(SCE_RAKU_GRAMMAR); // a Grammar ident - else - sc.SetState(SCE_RAKU_IDENTIFIER); // Bareword - identLast = s; // save identifier - } - if (adverbLast == "sym") { // special adverb ":sym" - sc.SetState(SCE_RAKU_IDENTIFIER); // treat as identifier - identLast = s; // save identifier - } - if (sc.state != SCE_RAKU_IDENTIFIER) - wordLast = s; // save word - sc.Forward(len - 1); // ...forward past word - } - - // --- Adverbs ---------------------------------------------------- - else if (sc.ch == ':' && IsWordStartChar(sc.chNext)) { - len = LengthToNonWordChar(sc, lengthToEnd, s, sizeof(s), 1); - if (adverbs.InList(s)) { - sc.SetState(SCE_RAKU_ADVERB); // Adverbs (begin with ':') - adverbLast = s; // save word - sc.Forward(len); // ...forward past word (less offset: 1) - } - } - - // --- Identifiers: $mu / @positional / %associative / &callable -- - // see: https://docs.raku.org/language/variables - else if (setSigil.Contains(sc.ch) && (setTwigil.Contains(sc.chNext) - || setSpecialVar.Contains(sc.chNext) - || IsWordStartChar(sc.chNext))) { - - // State based on sigil - switch (sc.ch) { - case '$': sc.SetState(SCE_RAKU_MU); - break; - case '@': sc.SetState(SCE_RAKU_POSITIONAL); - break; - case '%': sc.SetState(SCE_RAKU_ASSOCIATIVE); - break; - case '&': sc.SetState(SCE_RAKU_CALLABLE); - } - const int state = sc.state; - sc.Forward(); - char ch_delim = 0; - if (setSpecialVar.Contains(sc.ch) - && !setWord.Contains(sc.chNext)) { // Process Special Var - ch_delim = -1; - } else if (setTwigil.Contains(sc.ch)) { // Process Twigil - sc.SetState(SCE_RAKU_OPERATOR); - if (sc.ch == '<' && setWord.Contains(sc.chNext)) - ch_delim = '>'; - sc.Forward(); - sc.SetState(state); - } - - // Process (any) identifier - if (ch_delim >= 0) { - sc.Forward(LengthToNonWordChar(sc, lengthToEnd, s, sizeof(s)) - 1); - if (ch_delim > 0 && sc.chNext == ch_delim) { - sc.Forward(); - sc.SetState(SCE_RAKU_OPERATOR); - } - identLast = s; // save identifier - } - } - - // --- Operators -------------------------------------------------- - else if (IsOperatorChar(sc.ch)) { - // FIXME: better valid operator sequences needed? - sc.SetState(SCE_RAKU_OPERATOR); - } - - // --- Heredoc: begin --------------------------------------------- - else if (!hereDelim.empty() && sc.atLineEnd) { - if (IsANewLine(sc.ch)) - sc.Forward(); // skip a possible CRLF situation - sc.SetState(hereState); - } - - // Reset words: on operator simi-colon OR '}' (end of statement) - if (sc.state == SCE_RAKU_OPERATOR && (sc.ch == ';' || sc.ch == '}')) { - wordLast.clear(); - identLast.clear(); - adverbLast.clear(); - } - } - - /* *** Determine if an "embedded comment" is to be entered ********** * - * This type of embedded comment section, or multi-line comment comes - * after a normal comment has begun... e.g: #`[ ... ] - * ****************************************************************** */ - else if (sc.state == SCE_RAKU_COMMENTLINE && sc.chPrev == '#' && sc.ch == '`') { - if (IsBracketOpenChar(sc.chNext)) { - sc.Forward(); // Condition met for "embedded comment" - dpEmbeded.opener = sc.ch; - - // Find the opposite (termination) closeing bracket (if any) - dpEmbeded.closer[0] = GetBracketCloseChar(dpEmbeded.opener); - if (dpEmbeded.closer[0] > 0) { // Enter "embedded comment" - - // Find multiple opening character occurence - dpEmbeded.count = GetRepeatCharCount(sc, dpEmbeded.opener, lengthToEnd); - sc.SetState(SCE_RAKU_COMMENTEMBED); - sc.Forward(dpEmbeded.count - 1); // incremented in the next loop - } - } - } - } - - // And we're done... - sc.Complete(); -} - -/* - * LexerRaku::Lex - * - Main fold method - * NOTE: although Raku uses and supports UNICODE characters, we're only looking - * at normal chars here, using 'SafeGetCharAt' - for folding purposes - * that is all we need. - */ -#define RAKU_HEADFOLD_SHIFT 4 -#define RAKU_HEADFOLD_MASK 0xF0 -void SCI_METHOD LexerRaku::Fold(Sci_PositionU startPos, Sci_Position length, int /* initStyle */, IDocument *pAccess) { - - // init LexAccessor / return if fold option is off - if (!options.fold) return; - LexAccessor styler(pAccess); - - // init char and line positions - const Sci_PositionU endPos = startPos + length; - Sci_Position lineCurrent = styler.GetLine(startPos); - - // Backtrack to last SCE_RAKU_DEFAULT line - if (startPos > 0 && lineCurrent > 0) { - while (lineCurrent > 0 && styler.StyleAt(startPos) != SCE_RAKU_DEFAULT) { - lineCurrent--; - startPos = styler.LineStart(lineCurrent); - } - lineCurrent = styler.GetLine(startPos); - } - Sci_PositionU lineStart = startPos; - Sci_PositionU lineStartNext = styler.LineStart(lineCurrent + 1); - - // init line folding level - int levelPrev = SC_FOLDLEVELBASE; - if (lineCurrent > 0) - levelPrev = styler.LevelAt(lineCurrent - 1) >> 16; - int levelCurrent = levelPrev; - - // init char and style variables - char chNext = styler[startPos]; - int stylePrev = styler.StyleAt(startPos - 1); - int styleNext = styler.StyleAt(startPos); - int styleNextStartLine = styler.StyleAt(lineStartNext); - int visibleChars = 0; - bool wasCommentMulti = false; - - // main loop - for (Sci_PositionU i = startPos; i < endPos; i++) { - - // next char, style and flags - const char ch = chNext; - chNext = styler.SafeGetCharAt(i + 1); - const int style = styleNext; - styleNext = styler.StyleAt(i + 1); - const bool atEOL = i == (lineStartNext - 1); - const bool atLineStart = i == lineStart; - - // --- Comments / Multi-line / POD ------------------------------------ - if (options.foldComment) { - - // Multi-line - if (options.foldCommentMultiline) { - if (style == SCE_RAKU_COMMENTLINE && atLineStart && ch == '#' && chNext == '`' - && styleNextStartLine == SCE_RAKU_COMMENTEMBED) { - levelCurrent++; - wasCommentMulti = true; // don't confuse line comments - } else if (style == SCE_RAKU_COMMENTEMBED && atLineStart - && styleNextStartLine != SCE_RAKU_COMMENTEMBED) { - levelCurrent--; - } - } - - // Line comments - if (!wasCommentMulti && atEOL && stylePrev == SCE_RAKU_COMMENTLINE - && IsCommentLine(lineCurrent, styler)) { - if (!IsCommentLine(lineCurrent - 1, styler) - && IsCommentLine(lineCurrent + 1, styler)) - levelCurrent++; - else if (IsCommentLine(lineCurrent - 1, styler) - && !IsCommentLine(lineCurrent + 1, styler)) - levelCurrent--; - } - - // POD - if (options.foldCommentPOD && atLineStart && style == SCE_RAKU_POD) { - if (styler.Match(i, "=begin")) - levelCurrent++; - else if (styler.Match(i, "=end")) - levelCurrent--; - } - } - - // --- Code block ----------------------------------------------------- - if (style == SCE_RAKU_OPERATOR) { - if (ch == '{') { - if (levelCurrent < levelPrev) levelPrev--; - levelCurrent++; - } else if (ch == '}') { - levelCurrent--; - } - } - - // --- at end of line / range / apply fold ---------------------------- - if (atEOL) { - int level = levelPrev; - - // set level flags - level |= levelCurrent << 16; - if (visibleChars == 0 && options.foldCompact) - level |= SC_FOLDLEVELWHITEFLAG; - if ((levelCurrent > levelPrev) && (visibleChars > 0)) - level |= SC_FOLDLEVELHEADERFLAG; - if (level != styler.LevelAt(lineCurrent)) { - styler.SetLevel(lineCurrent, level); - } - lineCurrent++; - lineStart = lineStartNext; - lineStartNext = styler.LineStart(lineCurrent + 1); - styleNextStartLine = styler.StyleAt(lineStartNext); - levelPrev = levelCurrent; - visibleChars = 0; - wasCommentMulti = false; - } - - // increment visibleChars / set previous char - if (!isspacechar(ch)) - visibleChars++; - stylePrev = style; - } - - // Done: set real level of the next line - int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK; - styler.SetLevel(lineCurrent, levelPrev | flagsNext); -} - -/*----------------------------------------------------------------------------* - * --- Scintilla: LexerModule --- - *----------------------------------------------------------------------------*/ - -LexerModule lmRaku(SCLEX_RAKU, LexerRaku::LexerFactoryRaku, "raku", rakuWordLists); |
