diff options
author | Mark Reay <mark@reay.net.au> | 2020-01-03 23:08:28 +0000 |
---|---|---|
committer | Mark Reay <mark@reay.net.au> | 2020-01-03 23:08:28 +0000 |
commit | 7f43e72c8ca4555105cd0b97863d3907f7fc62a5 (patch) | |
tree | 2f4e8c73c2dc1b1e54bedda8f54fb3b4d0a3b938 | |
parent | b5623cb722b4ba7607979f52d4cf26e8be86f385 (diff) | |
download | scintilla-mirror-7f43e72c8ca4555105cd0b97863d3907f7fc62a5.tar.gz |
Added Raku lexer and style properties and example files
-rw-r--r-- | cocoa/ScintillaFramework/ScintillaFramework.xcodeproj/project.pbxproj | 4 | ||||
-rw-r--r-- | cppcheck.suppress | 2 | ||||
-rw-r--r-- | gtk/deps.mak | 14 | ||||
-rw-r--r-- | include/SciLexer.h | 30 | ||||
-rw-r--r-- | include/Scintilla.iface | 32 | ||||
-rw-r--r-- | lexers/LexRaku.cxx | 1602 | ||||
-rw-r--r-- | lexilla/src/Lexilla.cxx | 2 | ||||
-rw-r--r-- | lexilla/src/deps.mak | 14 | ||||
-rw-r--r-- | lexilla/src/lexilla.mak | 1 | ||||
-rw-r--r-- | lexilla/src/nmdeps.mak | 14 | ||||
-rw-r--r-- | lexilla/test/examples/raku/SciTE.properties | 113 | ||||
-rw-r--r-- | lexilla/test/examples/raku/x.p6 | 54 | ||||
-rw-r--r-- | lexilla/test/examples/raku/x.p6.styled | 54 | ||||
-rw-r--r-- | src/Catalogue.cxx | 1 | ||||
-rw-r--r-- | win32/deps.mak | 14 | ||||
-rw-r--r-- | win32/nmdeps.mak | 14 | ||||
-rw-r--r-- | win32/scintilla.mak | 1 |
17 files changed, 1966 insertions, 0 deletions
diff --git a/cocoa/ScintillaFramework/ScintillaFramework.xcodeproj/project.pbxproj b/cocoa/ScintillaFramework/ScintillaFramework.xcodeproj/project.pbxproj index bd60d8784..7c2da51d5 100644 --- a/cocoa/ScintillaFramework/ScintillaFramework.xcodeproj/project.pbxproj +++ b/cocoa/ScintillaFramework/ScintillaFramework.xcodeproj/project.pbxproj @@ -235,6 +235,7 @@ AE894E1CB7328CAE5B2EF47E /* LexX12.cxx in Sources */ = {isa = PBXBuildFile; fileRef = ADA64364A443F3E3F02D294E /* LexX12.cxx */; }; 902B40FE926FE48538B168F1 /* LexDataflex.cxx in Sources */ = {isa = PBXBuildFile; fileRef = 362E48F5A7F79598CB0B037D /* LexDataflex.cxx */; }; 4AA242EE8F0CCEA01AB59842 /* LexHollywood.cxx in Sources */ = {isa = PBXBuildFile; fileRef = 96884184929F317E72FC1BE8 /* LexHollywood.cxx */; }; + 513A4B43B903344E142C441E /* LexRaku.cxx in Sources */ = {isa = PBXBuildFile; fileRef = 48484CD7A1F20D09703376E5 /* LexRaku.cxx */; }; /* End PBXBuildFile section */ /* Begin PBXFileReference section */ @@ -473,6 +474,7 @@ ADA64364A443F3E3F02D294E /* LexX12.cxx */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = LexX12.cxx; path = ../../lexers/LexX12.cxx; sourceTree = SOURCE_ROOT; }; 362E48F5A7F79598CB0B037D /* LexDataflex.cxx */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = LexDataflex.cxx; path = ../../lexers/LexDataflex.cxx; sourceTree = SOURCE_ROOT; }; 96884184929F317E72FC1BE8 /* LexHollywood.cxx */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = LexHollywood.cxx; path = ../../lexers/LexHollywood.cxx; sourceTree = SOURCE_ROOT; }; + 48484CD7A1F20D09703376E5 /* LexRaku.cxx */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = LexRaku.cxx; path = ../../lexers/LexRaku.cxx; sourceTree = SOURCE_ROOT; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -656,6 +658,7 @@ 114B6EF711FA7526004FB6AB /* LexPS.cxx */, 114B6EF811FA7526004FB6AB /* LexPython.cxx */, 114B6EF911FA7526004FB6AB /* LexR.cxx */, + 48484CD7A1F20D09703376E5 /* LexRaku.cxx */, 114B6EFA11FA7526004FB6AB /* LexRebol.cxx */, 28A7D6041995E47D0062D204 /* LexRegistry.cxx */, 114B6EFB11FA7526004FB6AB /* LexRuby.cxx */, @@ -1142,6 +1145,7 @@ AE894E1CB7328CAE5B2EF47E /* LexX12.cxx in Sources */, 902B40FE926FE48538B168F1 /* LexDataflex.cxx in Sources */, 4AA242EE8F0CCEA01AB59842 /* LexHollywood.cxx in Sources */, + 513A4B43B903344E142C441E /* LexRaku.cxx in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/cppcheck.suppress b/cppcheck.suppress index eaba434fe..8523b30f2 100644 --- a/cppcheck.suppress +++ b/cppcheck.suppress @@ -99,6 +99,8 @@ constParameter:scintilla/lexers/LexPython.cxx shadowVariable:scintilla/lexers/LexPowerPro.cxx
constParameter:scintilla/lexers/LexProgress.cxx
variableScope:scintilla/lexers/LexProgress.cxx
+constParameter:scintilla/lexers/LexRaku.cxx
+variableScope:scintilla/lexers/LexRaku.cxx
redundantInitialization:scintilla/lexers/LexRegistry.cxx
constParameter:scintilla/lexers/LexRuby.cxx
variableScope:scintilla/lexers/LexRuby.cxx
diff --git a/gtk/deps.mak b/gtk/deps.mak index 6970424bc..6ba5eff2e 100644 --- a/gtk/deps.mak +++ b/gtk/deps.mak @@ -1630,6 +1630,20 @@ LexR.o: \ ../lexlib/StyleContext.h \ ../lexlib/CharacterSet.h \ ../lexlib/LexerModule.h +LexRaku.o: \ + ../lexers/LexRaku.cxx \ + ../include/ILexer.h \ + ../include/Sci_Position.h \ + ../include/Scintilla.h \ + ../include/SciLexer.h \ + ../lexlib/WordList.h \ + ../lexlib/LexAccessor.h \ + ../lexlib/StyleContext.h \ + ../lexlib/CharacterSet.h \ + ../lexlib/CharacterCategory.h \ + ../lexlib/LexerModule.h \ + ../lexlib/OptionSet.h \ + ../lexlib/DefaultLexer.h LexRebol.o: \ ../lexers/LexRebol.cxx \ ../include/ILexer.h \ diff --git a/include/SciLexer.h b/include/SciLexer.h index 51c0ebb8d..4ea1b6f6f 100644 --- a/include/SciLexer.h +++ b/include/SciLexer.h @@ -143,6 +143,7 @@ #define SCLEX_X12 128 #define SCLEX_DATAFLEX 129 #define SCLEX_HOLLYWOOD 130 +#define SCLEX_RAKU 131 #define SCLEX_AUTOMATIC 1000 #define SCE_P_DEFAULT 0 #define SCE_P_COMMENTLINE 1 @@ -1927,6 +1928,35 @@ #define SCE_HOLLYWOOD_IDENTIFIER 12 #define SCE_HOLLYWOOD_CONSTANT 13 #define SCE_HOLLYWOOD_HEXNUMBER 14 +#define SCE_RAKU_DEFAULT 0 +#define SCE_RAKU_ERROR 1 +#define SCE_RAKU_COMMENTLINE 2 +#define SCE_RAKU_COMMENTEMBED 3 +#define SCE_RAKU_POD 4 +#define SCE_RAKU_CHARACTER 5 +#define SCE_RAKU_HEREDOC_Q 6 +#define SCE_RAKU_HEREDOC_QQ 7 +#define SCE_RAKU_STRING 8 +#define SCE_RAKU_STRING_Q 9 +#define SCE_RAKU_STRING_QQ 10 +#define SCE_RAKU_STRING_Q_LANG 11 +#define SCE_RAKU_STRING_VAR 12 +#define SCE_RAKU_REGEX 13 +#define SCE_RAKU_REGEX_VAR 14 +#define SCE_RAKU_ADVERB 15 +#define SCE_RAKU_NUMBER 16 +#define SCE_RAKU_PREPROCESSOR 17 +#define SCE_RAKU_OPERATOR 18 +#define SCE_RAKU_WORD 19 +#define SCE_RAKU_FUNCTION 20 +#define SCE_RAKU_IDENTIFIER 21 +#define SCE_RAKU_TYPEDEF 22 +#define SCE_RAKU_MU 23 +#define SCE_RAKU_POSITIONAL 24 +#define SCE_RAKU_ASSOCIATIVE 25 +#define SCE_RAKU_CALLABLE 26 +#define SCE_RAKU_GRAMMAR 27 +#define SCE_RAKU_CLASS 28 /* --Autogenerated -- end of section automatically generated from Scintilla.iface */ #endif diff --git a/include/Scintilla.iface b/include/Scintilla.iface index fecd1b45f..26e653900 100644 --- a/include/Scintilla.iface +++ b/include/Scintilla.iface @@ -3185,6 +3185,7 @@ val SCLEX_CIL=127 val SCLEX_X12=128 val SCLEX_DATAFLEX=129 val SCLEX_HOLLYWOOD=130 +val SCLEX_RAKU=131 # When a lexer specifies its language as SCLEX_AUTOMATIC it receives a # value assigned in sequence from SCLEX_AUTOMATIC+1. @@ -5215,6 +5216,37 @@ val SCE_HOLLYWOOD_OPERATOR=11 val SCE_HOLLYWOOD_IDENTIFIER=12 val SCE_HOLLYWOOD_CONSTANT=13 val SCE_HOLLYWOOD_HEXNUMBER=14 +# Lexical states for SCLEX_RAKU +lex Raku=SCLEX_RAKU SCE_RAKU_ +val SCE_RAKU_DEFAULT=0 +val SCE_RAKU_ERROR=1 +val SCE_RAKU_COMMENTLINE=2 +val SCE_RAKU_COMMENTEMBED=3 +val SCE_RAKU_POD=4 +val SCE_RAKU_CHARACTER=5 +val SCE_RAKU_HEREDOC_Q=6 +val SCE_RAKU_HEREDOC_QQ=7 +val SCE_RAKU_STRING=8 +val SCE_RAKU_STRING_Q=9 +val SCE_RAKU_STRING_QQ=10 +val SCE_RAKU_STRING_Q_LANG=11 +val SCE_RAKU_STRING_VAR=12 +val SCE_RAKU_REGEX=13 +val SCE_RAKU_REGEX_VAR=14 +val SCE_RAKU_ADVERB=15 +val SCE_RAKU_NUMBER=16 +val SCE_RAKU_PREPROCESSOR=17 +val SCE_RAKU_OPERATOR=18 +val SCE_RAKU_WORD=19 +val SCE_RAKU_FUNCTION=20 +val SCE_RAKU_IDENTIFIER=21 +val SCE_RAKU_TYPEDEF=22 +val SCE_RAKU_MU=23 +val SCE_RAKU_POSITIONAL=24 +val SCE_RAKU_ASSOCIATIVE=25 +val SCE_RAKU_CALLABLE=26 +val SCE_RAKU_GRAMMAR=27 +val SCE_RAKU_CLASS=28 # Events diff --git a/lexers/LexRaku.cxx b/lexers/LexRaku.cxx new file mode 100644 index 000000000..a06a573bb --- /dev/null +++ b/lexers/LexRaku.cxx @@ -0,0 +1,1602 @@ +/** @file LexRaku.cxx + ** Lexer for Raku + ** + ** Copyright (c) 2019 Mark Reay <mark@reay.net.au> + **/ +// Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org> +// The License.txt file describes the conditions under which this software may be distributed. + +/* + * Raku (Perl6) Lexer for Scintilla + * --------------------------------- + * --------------------------------- + * 06-Dec-2019: More Unicode support: + * - Added a full scope of allowed numbers and letters + * 29-Nov-2019: More highlighting / implemented basic folding: + * - Operators (blanket cover, no sequence checking) + * - Class / Grammar name highlighting + * - Folding: + * - Comments: line / multi-line + * - POD sections + * - Code blocks {} + * 26-Nov-2019: Basic syntax highlighting covering the following: + * - Comments, both line and embedded (multi-line) + * - POD, no inline highlighting as yet... + * - Heredoc block string, with variable highlighting (with qq) + * - Strings, with variable highlighting (with ") + * - Q Language, including adverbs (also basic q and qq) + * - Regex, including adverbs + * - Numbers + * - Bareword / identifiers + * - Types + * - Variables: mu, positional, associative, callable + * TODO: + * - POD inline + * - Better operator sequence coverage + */ + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <stdarg.h> +#include <assert.h> +#include <ctype.h> + +#include <string> +#include <vector> +#include <map> + +#include "ILexer.h" +#include "Scintilla.h" +#include "SciLexer.h" + +#include "WordList.h" +#include "LexAccessor.h" +#include "StyleContext.h" +#include "CharacterSet.h" +#include "CharacterCategory.h" +#include "LexerModule.h" +#include "OptionSet.h" +#include "DefaultLexer.h" + +using namespace Scintilla; + +namespace { // anonymous namespace to isolate any name clashes +/*----------------------------------------------------------------------------* + * --- DEFINITIONS: OPTIONS / CONSTANTS --- + *----------------------------------------------------------------------------*/ + +// Number types +#define RAKUNUM_BINARY 1 // order is significant: 1-3 cannot have a dot +#define RAKUNUM_OCTAL 2 +#define RAKUNUM_FLOAT_EXP 3 // exponent part only +#define RAKUNUM_HEX 4 // may be a hex float +#define RAKUNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings +#define RAKUNUM_VECTOR 6 +#define RAKUNUM_V_VECTOR 7 +#define RAKUNUM_VERSION 8 // can contain multiple '.'s +#define RAKUNUM_BAD 9 + +// Regex / Q string types +#define RAKUTYPE_REGEX_NORM 0 // 0 char ident +#define RAKUTYPE_REGEX_S 1 // order is significant: +#define RAKUTYPE_REGEX_M 2 // 1 char ident +#define RAKUTYPE_REGEX_Y 3 // 1 char ident +#define RAKUTYPE_REGEX 4 // > RAKUTYPE_REGEX == 2 char identifiers +#define RAKUTYPE_REGEX_RX 5 // 2 char ident +#define RAKUTYPE_REGEX_TR 6 // 2 char ident +#define RAKUTYPE_QLANG 7 // < RAKUTYPE_QLANG == RAKUTYPE_REGEX_? +#define RAKUTYPE_STR_WQ 8 // 0 char ident < word quote > +#define RAKUTYPE_STR_Q 9 // 1 char ident +#define RAKUTYPE_STR_QX 10 // 2 char ident +#define RAKUTYPE_STR_QW 11 // 2 char ident +#define RAKUTYPE_STR_QQ 12 // 2 char ident +#define RAKUTYPE_STR_QQX 13 // 3 char ident +#define RAKUTYPE_STR_QQW 14 // 3 char ident +#define RAKUTYPE_STR_QQWW 15 // 4 char ident + +// Delimiter types +#define RAKUDELIM_BRACKET 0 // bracket: regex, Q language +#define RAKUDELIM_QUOTE 1 // quote: normal string + +// rakuWordLists: keywords as defined in config +const char *const rakuWordLists[] = { + "Keywords and identifiers", + "Functions", + "Types basic", + "Types composite", + "Types domain-specific", + "Types exception", + "Adverbs", + nullptr, +}; + +// Options and defaults +struct OptionsRaku { + bool fold; + bool foldCompact; + bool foldComment; + bool foldCommentMultiline; + bool foldCommentPOD; + OptionsRaku() { + fold = true; + foldCompact = false; + foldComment = true; + foldCommentMultiline = true; + foldCommentPOD = true; + } +}; + +// init options and words +struct OptionSetRaku : public OptionSet<OptionsRaku> { + OptionSetRaku() { + DefineProperty("fold", &OptionsRaku::fold); + DefineProperty("fold.comment", &OptionsRaku::foldComment); + DefineProperty("fold.compact", &OptionsRaku::foldCompact); + + DefineProperty("fold.raku.comment.multiline", &OptionsRaku::foldCommentMultiline, + "Set this property to 0 to disable folding multi-line comments when fold.comment=1."); + DefineProperty("fold.raku.comment.pod", &OptionsRaku::foldCommentPOD, + "Set this property to 0 to disable folding POD comments when fold.comment=1."); + + // init word lists + DefineWordListSets(rakuWordLists); + } +}; + +// Delimiter pair +struct DelimPair { + int opener; // opener char + int closer[2]; // closer chars + bool interpol; // can variables be interpolated? + short count; // delimiter char count + DelimPair() { + opener = 0; + closer[0] = 0; + closer[1] = 0; + interpol = false; + count = 0; + } + bool isCloser(int ch) const { + return ch == closer[0] || ch == closer[1]; + } +}; + +/*----------------------------------------------------------------------------* + * --- FUNCTIONS --- + *----------------------------------------------------------------------------*/ + +/* + * IsANewLine + * - returns true if this is a new line char + */ +constexpr bool IsANewLine(int ch) noexcept { + return ch == '\r' || ch == '\n'; +} + +/* + * IsAWhitespace + * - returns true if this is a whitespace (or newline) char + */ +bool IsAWhitespace(int ch) noexcept { + return IsASpaceOrTab(ch) || IsANewLine(ch); +} + +/* + * IsAlphabet + * - returns true if this is an alphabetical char + */ +constexpr bool IsAlphabet(int ch) noexcept { + return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); +} + +/* + * IsCommentLine + * - returns true if this is a comment line + * - tests: SCE_RAKU_COMMENTLINE or SCE_RAKU_COMMENTEMBED + * modified from: LexPerl.cxx + */ +bool IsCommentLine(Sci_Position line, LexAccessor &styler, int type = SCE_RAKU_COMMENTLINE) { + Sci_Position pos = styler.LineStart(line); + Sci_Position eol_pos = styler.LineStart(line + 1) - 1; + for (Sci_Position i = pos; i < eol_pos; i++) { + char ch = styler[i]; + int style = styler.StyleAt(i); + if (type == SCE_RAKU_COMMENTEMBED) { + if (i == (eol_pos - 1) && style == type) + return true; + } else { // make sure the line is NOT a SCE_RAKU_COMMENTEMBED + if (ch == '#' && style == type && styler[i+1] != '`' ) + return true; + else if (!IsASpaceOrTab(ch)) + return false; + } + } + return false; +} + +/* + * GetBracketCloseChar + * - returns the end bracket char: opposite of start + * - see: http://www.unicode.org/Public/5.1.0/ucd/BidiMirroring.txt (first section) + * - Categories are general matches for valid BiDi types + * - Most closer chars are opener + 1 + */ +int GetBracketCloseChar(const int ch) noexcept { + const CharacterCategory cc = CategoriseCharacter(ch); + switch (cc) { + case ccSm: + switch (ch) { + case 0x3C: return 0x3E; // LESS-THAN SIGN + case 0x2208: return 0x220B; // ELEMENT OF + case 0x2209: return 0x220C; // NOT AN ELEMENT OF + case 0x220A: return 0x220D; // SMALL ELEMENT OF + case 0x2215: return 0x29F5; // DIVISION SLASH + case 0x2243: return 0x22CD; // ASYMPTOTICALLY EQUAL TO + case 0x2298: return 0x29B8; // CIRCLED DIVISION SLASH + case 0x22A6: return 0x2ADE; // ASSERTION + case 0x22A8: return 0x2AE4; // TRUE + case 0x22A9: return 0x2AE3; // FORCES + case 0x22AB: return 0x2AE5; // DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE + case 0x22F2: return 0x22FA; // ELEMENT OF WITH LONG HORIZONTAL STROKE + case 0x22F3: return 0x22FB; // ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE + case 0x22F4: return 0x22FC; // SMALL ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE + case 0x22F6: return 0x22FD; // ELEMENT OF WITH OVERBAR + case 0x22F7: return 0x22FE; // SMALL ELEMENT OF WITH OVERBAR + case 0xFF1C: return 0xFF1E; // FULLWIDTH LESS-THAN SIGN + } + break; + case ccPs: + switch (ch) { + case 0x5B: return 0x5D; // LEFT SQUARE BRACKET + case 0x7B: return 0x7D; // LEFT CURLY BRACKET + case 0x298D: return 0x2990; // LEFT SQUARE BRACKET WITH TICK IN TOP CORNER + case 0x298F: return 0x298E; // LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER + case 0xFF3B: return 0xFF3D; // FULLWIDTH LEFT SQUARE BRACKET + case 0xFF5B: return 0xFF5D; // FULLWIDTH LEFT CURLY BRACKET + } + break; + case ccPi: + break; + default: return 0; + } + return ch + 1; +} + +/* + * IsValidQuoteOpener + * - + */ +bool IsValidQuoteOpener(const int ch, DelimPair &dp, int type = RAKUDELIM_BRACKET) noexcept { + dp.closer[0] = 0; + dp.closer[1] = 0; + dp.interpol = true; + if (type == RAKUDELIM_QUOTE) { + switch (ch) { + // Opener Closer Description + case '\'': dp.closer[0] = '\''; // APOSTROPHE + dp.interpol = false; + break; + case '"': dp.closer[0] = '"'; // QUOTATION MARK + break; + case 0x2018: dp.closer[0] = 0x2019; // LEFT SINGLE QUOTATION MARK + dp.interpol = false; + break; + case 0x201C: dp.closer[0] = 0x201D; // LEFT DOUBLE QUOTATION MARK + break; + case 0x201D: dp.closer[0] = 0x201C; // RIGHT DOUBLE QUOTATION MARK + break; + case 0x201E: dp.closer[0] = 0x201C; // DOUBLE LOW-9 QUOTATION MARK + dp.closer[1] = 0x201D; + break; + case 0xFF62: dp.closer[0] = 0xFF63; // HALFWIDTH LEFT CORNER BRACKET + dp.interpol = false; + break; + default: return false; + } + } else if (type == RAKUDELIM_BRACKET) { + dp.closer[0] = GetBracketCloseChar(ch); + } + dp.opener = ch; + dp.count = 1; + return dp.closer[0] > 0; +} + +/* + * IsBracketOpenChar + * - true if this is a valid start bracket character + */ +bool IsBracketOpenChar(int ch) noexcept { + return GetBracketCloseChar(ch) > 0; +} + +/* + * IsValidRegOrQAdjacent + * - returns true if ch is a valid character to put directly after Q / q + * * ref: Q Language: https://docs.raku.org/language/quoting + */ +bool IsValidRegOrQAdjacent(int ch) noexcept { + return !(IsAlphaNumeric(ch) || ch == '_' || ch == '(' || ch == ')' || ch == '\'' ); +} + +/* + * IsValidRegOrQPrecede + * - returns true if ch is a valid preceeding character to put directly before Q / q + * * ref: Q Language: https://docs.raku.org/language/quoting + */ +bool IsValidRegOrQPrecede(int ch) noexcept { + return !(IsAlphaNumeric(ch) || ch == '_'); +} + +/* + * MatchCharInRange + * - returns true if the mach character is found in range (of length) + * - ignoreDelim (default false) + */ +bool MatchCharInRange(StyleContext &sc, const Sci_Position length, + const int match, bool ignoreDelim = false) { + Sci_Position len = 0; + int chPrev = sc.chPrev; + while (++len < length) { + const int ch = sc.GetRelativeCharacter(len); + if (ch == match && (ignoreDelim || chPrev != '\\')) + return true; + } + return false; +} + +/* + * PrevNonWhitespaceChar + * - returns the last non-whitespace char + */ +int PrevNonWhitespaceChar(StyleContext &sc) { + Sci_Position rel = 0; + Sci_Position max_back = 0 - sc.currentPos; + while (--rel > max_back) { + const int ch = sc.GetRelativeCharacter(rel); + if (!IsAWhitespace(ch)) + return ch; + } + return 0; // no matching char +} + +/* + * IsQLangStartAtScPos + * - returns true if this is a valid Q Language sc position + * - ref: https://docs.raku.org/language/quoting + * - Q :adverb :adverb //; + * - q,qx,qw,qq,qqx,qqw,qqww :adverb /:adverb /; + */ +bool IsQLangStartAtScPos(StyleContext &sc, int &type, const Sci_Position length) { + const bool valid_adj = IsValidRegOrQAdjacent(sc.chNext); + const int chFw2 = sc.GetRelativeCharacter(2); + const int chFw3 = sc.GetRelativeCharacter(3); + type = -1; + if (IsValidRegOrQPrecede(sc.chPrev)) { + if (sc.ch == 'Q' && valid_adj) { + type = RAKUTYPE_QLANG; + } else if (sc.ch == 'q') { + switch (sc.chNext) { + case 'x': + type = RAKUTYPE_STR_QX; + break; + case 'w': + type = RAKUTYPE_STR_QW; + break; + case 'q': + if (chFw2 == 'x') { + type = RAKUTYPE_STR_QQX; + } else if (chFw2 == 'w') { + if (chFw3 == 'w') { + type = RAKUTYPE_STR_QQWW; + } else { + type = RAKUTYPE_STR_QQW; + } + } else { + type = RAKUTYPE_STR_QQ; + } + break; + default: + type = RAKUTYPE_STR_Q; + } + } else if (sc.ch == '<' && MatchCharInRange(sc, length, '>')) { + type = RAKUTYPE_STR_WQ; // < word quote > + } + } + return type >= 0; +} + +/* + * IsRegexStartAtScPos + * - returns true if this is a valid Regex sc position + * - ref: https://docs.raku.org/language/regexes + * - Regex: (rx/s/m/tr/y) :adverb /:adverb /; + * - regex R :adverb //; + * - /:adverb /; + */ +bool IsRegexStartAtScPos(StyleContext &sc, int &type, CharacterSet &set) { + const bool valid_adj = IsValidRegOrQAdjacent(sc.chNext); + type = -1; + if (IsValidRegOrQPrecede(sc.chPrev)) { + switch (sc.ch) { + case 'r': + if (sc.chNext == 'x') + type = RAKUTYPE_REGEX_RX; + break; + case 't': + case 'T': + if (sc.chNext == 'r' || sc.chNext == 'R') + type = RAKUTYPE_REGEX_TR; + break; + case 'm': + if (valid_adj) + type = RAKUTYPE_REGEX_M; + break; + case 's': + case 'S': + if (valid_adj) + type = RAKUTYPE_REGEX_S; + break; + case 'y': + if (valid_adj) + type = RAKUTYPE_REGEX_Y; + break; + case '/': + if (set.Contains(PrevNonWhitespaceChar(sc))) + type = RAKUTYPE_REGEX_NORM; + } + } + return type >= 0; +} + +/* + * IsValidIdentPrecede + * - returns if ch is a valid preceeding char to put directly before an identifier + */ +bool IsValidIdentPrecede(int ch) noexcept { + return !(IsAlphaNumeric(ch) || ch == '_' || ch == '@' || ch == '$' || ch == '%'); +} + +/* + * IsValidDelimiter + * - returns if ch is a valid delimiter (most chars are valid) + * * ref: Q Language: https://docs.raku.org/language/quoting + */ +bool IsValidDelimiter(int ch) noexcept { + return !(IsAlphaNumeric(ch) || ch == ':'); +} + +/* + * GetDelimiterCloseChar + * - returns the corrisponding close char for a given delimiter (could be the same char) + */ +int GetDelimiterCloseChar(int ch) noexcept { + int ch_end = GetBracketCloseChar(ch); + if (ch_end == 0 && IsValidDelimiter(ch)) { + ch_end = ch; + } + return ch_end; +} + +/* + * GetRepeatCharCount + * - returns the occurence count of match + */ +Sci_Position GetRepeatCharCount(StyleContext &sc, int chMatch, Sci_Position length) { + Sci_Position cnt = 0; + while (cnt < length) { + if (sc.GetRelativeCharacter(cnt) != chMatch) { + break; + } + cnt++; + } + return cnt; +} + +/* + * LengthToDelimiter + * - returns the length until the end of a delimited string section + * - Ignores nested delimiters (if opener != closer) + * - no trailing char after last closer (default false) + */ +Sci_Position LengthToDelimiter(StyleContext &sc, const DelimPair &dp, + Sci_Position length, bool noTrailing = false) { + short cnt_open = 0; // count open bracket + short cnt_close = 0; // count close bracket + Sci_Position len = 0; // count characters + int chOpener = dp.opener; // look for nested opener / closer + if (dp.opener == dp.closer[0]) + chOpener = 0; // no opening delimiter (no nesting possible) + + while (len < length) { + const int chPrev = sc.GetRelativeCharacter(len - 1); + const int ch = sc.GetRelativeCharacter(len); + const int chNext = sc.GetRelativeCharacter(len+1); + + if (cnt_open == 0 && cnt_close == dp.count) { + return len; // end condition has been met + } else { + if (chPrev != '\\' && ch == chOpener) { // ignore escape sequence + cnt_open++; // open nested bracket + } else if (chPrev != '\\' && dp.isCloser(ch)) { // ignore escape sequence + if ( cnt_open > 0 ) { + cnt_open--; // close nested bracket + } else if (dp.count > 1 && cnt_close < (dp.count - 1)) { + if (cnt_close > 1) { + if (dp.isCloser(chPrev)) { + cnt_close++; + } else { // reset if previous char was not close + cnt_close = 0; + } + } else { + cnt_close++; + } + } else if (!noTrailing || (IsAWhitespace(chNext))) { + cnt_close++; // found last close + if (cnt_close > 1 && !dp.isCloser(chPrev)) { + cnt_close = 0; // reset if previous char was not close + } + } else { + cnt_close = 0; // non handled close: reset + } + } else if (IsANewLine(ch)) { + cnt_open = 0; // reset after each line + cnt_close = 0; + } + } + len++; + } + return -1; // end condition has NOT been met +} + +/* + * LengthToEndHeredoc + * - returns the length until the end of a heredoc section + * - delimiter string MUST begin on a new line + */ +Sci_Position LengthToEndHeredoc(const StyleContext &sc, LexAccessor &styler, + const Sci_Position length, const char *delim) { + bool on_new_ln = false; + int i = 0; // str index + for (int n = 0; n < length; n++) { + const char ch = styler.SafeGetCharAt(sc.currentPos + n, 0); + if (on_new_ln) { + if (delim[i] == '\0') + return n; // at end of str, match found! + if (ch != delim[i++]) + i = 0; // no char match, reset 'i'ndex + } + if (i == 0) // detect new line + on_new_ln = IsANewLine(ch); + } + return -1; // no match found +} + +/* + * LengthToNextChar + * - returns the length until the next character + */ +Sci_Position LengthToNextChar(StyleContext &sc, const Sci_Position length) { + Sci_Position len = 0; + while (++len < length) { + const int ch = sc.GetRelativeCharacter(len); + if (!IsASpaceOrTab(ch) && !IsANewLine(ch)) { + break; + } + } + return len; +} + +/* + * GetRelativeString + * - gets a relitive string and sets it in &str + * - resets string before seting + */ +void GetRelativeString(StyleContext &sc, Sci_Position offset, Sci_Position length, + std::string &str) { + Sci_Position pos = offset; + str.clear(); + while (pos < length) { + str += sc.GetRelativeCharacter(pos++); + } +} + +} // end anonymous namespace + +/*----------------------------------------------------------------------------* + * --- class: LexerRaku --- + *----------------------------------------------------------------------------*/ +//class LexerRaku : public ILexerWithMetaData { +class LexerRaku : public DefaultLexer { + CharacterSet setWord; + CharacterSet setSigil; + CharacterSet setTwigil; + CharacterSet setOperator; + CharacterSet setSpecialVar; + WordList regexIdent; // identifiers that specify a regex + OptionsRaku options; // Options from config + OptionSetRaku osRaku; + WordList keywords; // Word Lists from config + WordList functions; + WordList typesBasic; + WordList typesComposite; + WordList typesDomainSpecific; + WordList typesExceptions; + WordList adverbs; + +public: + // Defined as explicit, so that constructor can not be copied + explicit LexerRaku() : + DefaultLexer("raku", SCLEX_RAKU), + setWord(CharacterSet::setAlphaNum, "-_", 0x80), + setSigil(CharacterSet::setNone, "$&%@"), + setTwigil(CharacterSet::setNone, "!*.:<=?^~"), + setOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;<>,?!.~"), + setSpecialVar(CharacterSet::setNone, "_/!") { + regexIdent.Set("regex rule token"); + } + // Deleted so LexerRaku objects can not be copied. + LexerRaku(const LexerRaku &) = delete; + LexerRaku(LexerRaku &&) = delete; + void operator=(const LexerRaku &) = delete; + void operator=(LexerRaku &&) = delete; + virtual ~LexerRaku() { + } + void SCI_METHOD Release() noexcept override { + delete this; + } + int SCI_METHOD Version() const noexcept override { + return lvRelease5; + } + const char *SCI_METHOD PropertyNames() override { + return osRaku.PropertyNames(); + } + int SCI_METHOD PropertyType(const char *name) override { + return osRaku.PropertyType(name); + } + const char *SCI_METHOD DescribeProperty(const char *name) override { + return osRaku.DescribeProperty(name); + } + Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override; + const char *SCI_METHOD PropertyGet(const char *key) override { + return osRaku.PropertyGet(key); + } + const char *SCI_METHOD DescribeWordListSets() override { + return osRaku.DescribeWordListSets(); + } + Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override; + void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; + void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override; + + static ILexer5 *LexerFactoryRaku() { + return new LexerRaku(); + } + +protected: + bool IsOperatorChar(const int ch); + bool IsWordChar(const int ch, bool allowNumber = true); + bool IsWordStartChar(const int ch); + bool IsNumberChar(const int ch, int base = 10); + bool ProcessRegexTwinCapture(StyleContext &sc, const Sci_Position length, + int &type, const DelimPair &dp); + void ProcessStringVars(StyleContext &sc, const Sci_Position length, const int varState); + bool ProcessValidRegQlangStart(StyleContext &sc, Sci_Position length, const int type, + WordList &wordsAdverbs, DelimPair &dp); + Sci_Position LengthToNonWordChar(StyleContext &sc, Sci_Position length, + char *s, const int size, Sci_Position offset = 0); +}; + +/*----------------------------------------------------------------------------* + * --- METHODS: LexerRaku --- + *----------------------------------------------------------------------------*/ + +/* + * LexerRaku::IsOperatorChar + * - Test for both ASCII and Unicode operators + * see: https://docs.raku.org/language/unicode_entry + */ +bool LexerRaku::IsOperatorChar(const int ch) { + if (ch > 0x7F) { + switch (ch) { + // Unicode ASCII Equiv. + case 0x2208: // (elem) + case 0x2209: // !(elem) + case 0x220B: // (cont) + case 0x220C: // !(cont) + case 0x2216: // (-) + case 0x2229: // (&) + case 0x222A: // (|) + case 0x2282: // (<) + case 0x2283: // (>) + case 0x2284: // !(<) + case 0x2285: // !(>) + case 0x2286: // (<=) + case 0x2287: // (>=) + case 0x2288: // !(<=) + case 0x2289: // !(>=) + case 0x228D: // (.) + case 0x228E: // (+) + case 0x2296: // (^) + return true; + } + } + return setOperator.Contains(ch); +} + +/* + * LexerRaku::IsWordChar + * - Test for both ASCII and Unicode identifier characters + * see: https://docs.raku.org/language/unicode_ascii + * also: ftp://ftp.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt + * FIXME: *still* may not contain all valid characters + */ +bool LexerRaku::IsWordChar(const int ch, bool allowNumber) { + // Unicode numbers should not apear in word identifiers + if (ch > 0x7F) { + const CharacterCategory cc = CategoriseCharacter(ch); + switch (cc) { + // Letters + case ccLu: + case ccLl: + case ccLt: + case ccLm: + case ccLo: + return true; + default: + return false; + } + } else if (allowNumber && IsADigit(ch)) { + return true; // an ASCII number type + } + return setWord.Contains(ch); +} + +/* + * LexerRaku::IsWordStartChar + * - Test for both ASCII and Unicode identifier "start / first" characters + */ +bool LexerRaku::IsWordStartChar(const int ch) { + return ch != '-' && IsWordChar(ch, false); // no numbers allowed +} + +/* + * LexerRaku::IsNumberChar + * - Test for both ASCII and Unicode identifier number characters + * see: https://docs.raku.org/language/unicode_ascii + * also: ftp://ftp.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt + * FILTERED by Unicode letters that are NUMBER + * and NOT PARENTHESIZED or CIRCLED + * FIXME: *still* may not contain all valid number characters + */ +bool LexerRaku::IsNumberChar(const int ch, int base) { + if (ch > 0x7F) { + const CharacterCategory cc = CategoriseCharacter(ch); + switch (cc) { + // Numbers + case ccNd: + case ccNl: + case ccNo: + return true; + default: + return false; + } + } + return IsADigit(ch, base); +} + +/* + * LexerRaku::PropertySet + * - + */ +Sci_Position SCI_METHOD LexerRaku::PropertySet(const char *key, const char *val) { + if (osRaku.PropertySet(&options, key, val)) + return 0; + return -1; +} + +/* + * LexerRaku::WordListSet + * - + */ +Sci_Position SCI_METHOD LexerRaku::WordListSet(int n, const char *wl) { + WordList *wordListN = nullptr; + switch (n) { + case 0: + wordListN = &keywords; + break; + case 1: + wordListN = &functions; + break; + case 2: + wordListN = &typesBasic; + break; + case 3: + wordListN = &typesComposite; + break; + case 4: + wordListN = &typesDomainSpecific; + break; + case 5: + wordListN = &typesExceptions; + break; + case 6: + wordListN = &adverbs; + break; + } + Sci_Position firstModification = -1; + if (wordListN) { + WordList wlNew; + wlNew.Set(wl); + if (*wordListN != wlNew) { + wordListN->Set(wl); + firstModification = 0; + } + } + return firstModification; +} + +/* + * LexerRaku::ProcessRegexTwinCapture + * - processes the transition between a regex pair (two sets of delimiters) + * - moves to first new delimiter, if a bracket + * - returns true when valid delimiter start found (if bracket) + */ +bool LexerRaku::ProcessRegexTwinCapture(StyleContext &sc, const Sci_Position length, + int &type, const DelimPair &dp) { + + if (type == RAKUTYPE_REGEX_S || type == RAKUTYPE_REGEX_TR || type == RAKUTYPE_REGEX_Y) { + type = -1; // clear type + + // move past chRegQClose if it was the previous char + if (dp.isCloser(sc.chPrev)) + sc.Forward(); + + // no processing needed for non-bracket + if (dp.isCloser(dp.opener)) + return true; + + // move to next opening bracket + const Sci_Position len = LengthToNextChar(sc, length); + if (sc.GetRelativeCharacter(len) == dp.opener) { + sc.Forward(len); + return true; + } + } + return false; +} + +/* + * LexerRaku::ProcessStringVars + * - processes a string and highlights any valid variables + */ +void LexerRaku::ProcessStringVars(StyleContext &sc, const Sci_Position length, const int varState) { + const int state = sc.state; + for (Sci_Position pos = 0; pos < length; pos++) { + if (sc.state == varState && !IsWordChar(sc.ch)) { + sc.SetState(state); + } else if (sc.chPrev != '\\' + && (sc.ch == '$' || sc.ch == '@') + && IsWordStartChar(sc.chNext)) { + sc.SetState(varState); + } + sc.Forward(); // Next character + } +} +/* + * LexerRaku::ProcessValidRegQlangStart + * - processes a section of the document range from after a Regex / Q delimiter + * - returns true on success + * - sets: adverbs, chOpen, chClose, chCount + * ref: https://docs.raku.org/language/regexes + */ +bool LexerRaku::ProcessValidRegQlangStart(StyleContext &sc, Sci_Position length, const int type, + WordList &wordsAdverbs, DelimPair &dp) { + Sci_Position startPos = sc.currentPos; + const int target_state = sc.state; + int state = SCE_RAKU_DEFAULT; + std::string str; + + // find our opening delimiter (and occurrences) / save any adverbs + dp.opener = 0; // adverbs can be after the first delimiter + bool got_all_adverbs = false; // in Regex statements + bool got_ident = false; // regex can have an identifier: 'regex R' + sc.SetState(state); // set state default to avoid pre-highlights + while ((dp.opener == 0 || !got_all_adverbs) && sc.More()) { + + // move to the next non-space character + const bool was_space = IsAWhitespace(sc.ch); + if (!got_all_adverbs && was_space) { + sc.Forward(LengthToNextChar(sc, length)); + } + length -= sc.currentPos - startPos; // update length remaining + + // parse / eat an identifier (if type == RAKUTYPE_REGEX) + if (dp.opener == 0 && !got_ident && type == RAKUTYPE_REGEX && IsAlphabet(sc.ch)) { + + // eat identifier / account for special adverb :sym<name> + bool got_sym = false; + while (sc.More()) { + sc.SetState(SCE_RAKU_IDENTIFIER); + while (sc.More() && (IsAlphaNumeric(sc.chNext) + || sc.chNext == '_' || sc.chNext == '-')) { + sc.Forward(); + } + sc.Forward(); + if (got_sym && sc.ch == '>') { + sc.SetState(SCE_RAKU_OPERATOR); // '>' + sc.Forward(); + break; + } else if (type == RAKUTYPE_REGEX && sc.Match(":sym<")) { + sc.SetState(SCE_RAKU_ADVERB); // ':sym' + sc.Forward(4); + sc.SetState(SCE_RAKU_OPERATOR); // '<' + sc.Forward(); + got_sym = true; + } else { + break; + } + } + sc.SetState(state); + got_ident = true; + } + + // parse / save an adverb: RAKUTYPE_REGEX only has adverbs after delim + // >= RAKUTYPE_QLANG only has adverbs before delim + else if (!got_all_adverbs && sc.ch == ':' && (!(dp.opener == 0 && got_ident) + && !(dp.opener > 0 && type >= RAKUTYPE_QLANG))) { + sc.SetState(SCE_RAKU_ADVERB); + while (IsAlphaNumeric(sc.chNext) && sc.More()) { + sc.Forward(); + str += sc.ch; + } + str += ' '; + sc.Forward(); + sc.SetState(state); + } + + // find starting delimiter + else if (dp.opener == 0 && (was_space || IsValidRegOrQAdjacent(sc.ch)) + && IsValidDelimiter(sc.ch)) { // make sure the delimiter is legal (most are) + sc.SetState((state = target_state));// start state here... + dp.opener = sc.ch; // this is our delimiter, get count + if (type < RAKUTYPE_QLANG) // type is Regex + dp.count = 1; // has only one delimiter + else + dp.count = GetRepeatCharCount(sc, dp.opener, length); + sc.Forward(dp.count); + } + + // we must have all the adverbs by now... + else { + if (got_all_adverbs) + break; // prevent infinite loop: occurs on missing open char + got_all_adverbs = true; + } + } + + // set word list / find a valid closing delimiter (or bomb!) + wordsAdverbs.Set(str.c_str()); + dp.closer[0] = GetDelimiterCloseChar(dp.opener); + dp.closer[1] = 0; // no other closer char + return dp.closer[0] > 0; +} + +/* + * LexerRaku::LengthToNonWordChar + * - returns the length until the next non "word" character: AlphaNum + '_' + * - also sets all the parsed chars in 's' + */ +Sci_Position LexerRaku::LengthToNonWordChar(StyleContext &sc, Sci_Position length, + char *s, const int size, Sci_Position offset) { + Sci_Position len = 0; + Sci_Position max_length = size < length ? size : length; + while (len <= max_length) { + const int ch = sc.GetRelativeCharacter(len + offset); + if (!IsWordChar(ch)) { + s[len] = '\0'; + break; + } + s[len] = ch; + len++; + } + s[len + 1] = '\0'; + return len; +} + +/* + * LexerRaku::Lex + * - Main lexer method + */ +void SCI_METHOD LexerRaku::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) { + LexAccessor styler(pAccess); + DelimPair dpEmbeded; // delimiter pair: embeded comments + DelimPair dpString; // delimiter pair: string + DelimPair dpRegQ; // delimiter pair: Regex / Q Lang + std::string hereDelim; // heredoc delimiter (if in heredoc) + int hereState = 0; // heredoc state to use (Q / QQ) + int numState = 0; // number state / type + short cntDecimal = 0; // number decinal count + std::string wordLast; // last word seen + std::string identLast; // last identifier seen + std::string adverbLast; // last (single) adverb seen + WordList lastAdverbs; // last adverbs seen + Sci_Position len; // temp length value + char s[100]; // temp char string + int typeDetect; // temp type detected (for regex and Q lang) + Sci_Position lengthToEnd; // length until the end of range + + // Backtrack to last SCE_RAKU_DEFAULT or 0 + Sci_PositionU newStartPos = startPos; + if (initStyle != SCE_RAKU_DEFAULT) { + while (newStartPos > 0) { + newStartPos--; + if (styler.StyleAt(newStartPos) == SCE_RAKU_DEFAULT) + break; + } + } + + // Backtrack to start of line before SCE_RAKU_HEREDOC_Q? + if (initStyle == SCE_RAKU_HEREDOC_Q || initStyle == SCE_RAKU_HEREDOC_QQ) { + while (newStartPos > 0) { + if (IsANewLine(styler.SafeGetCharAt(newStartPos - 1))) + break; // Stop if previous char is a new line + newStartPos--; + } + } + + // Re-calculate (any) changed startPos, length and initStyle state + if (newStartPos < startPos) { + initStyle = SCE_RAKU_DEFAULT; + length += startPos - newStartPos; + startPos = newStartPos; + } + + // init StyleContext + StyleContext sc(startPos, length, initStyle, styler); + + // StyleContext Loop + for (; sc.More(); sc.Forward()) { + lengthToEnd = (length - (sc.currentPos - startPos)); // end of range + + /* *** Determine if the current state should terminate ************** * + * Everything within the 'switch' statement processes characters up + * until the end of a syntax highlight section / state. + * ****************************************************************** */ + switch (sc.state) { + case SCE_RAKU_OPERATOR: + sc.SetState(SCE_RAKU_DEFAULT); + break; // FIXME: better valid operator sequences needed? + case SCE_RAKU_COMMENTLINE: + if (sc.atLineEnd) { + sc.SetState(SCE_RAKU_DEFAULT); + } + break; + case SCE_RAKU_COMMENTEMBED: + if ((len = LengthToDelimiter(sc, dpEmbeded, lengthToEnd)) >= 0) { + sc.Forward(len); // Move to end delimiter + sc.SetState(SCE_RAKU_DEFAULT); + } else { + sc.Forward(lengthToEnd); // no end delimiter found + } + break; + case SCE_RAKU_POD: + if (sc.atLineStart && sc.Match("=end pod")) { + sc.Forward(8); + sc.SetState(SCE_RAKU_DEFAULT); + } + break; + case SCE_RAKU_STRING: + + // Process the string for variables: move to end delimiter + if ((len = LengthToDelimiter(sc, dpString, lengthToEnd)) >= 0) { + if (dpString.interpol) { + ProcessStringVars(sc, len, SCE_RAKU_STRING_VAR); + } else { + sc.Forward(len); + } + sc.SetState(SCE_RAKU_DEFAULT); + } else { + sc.Forward(lengthToEnd); // no end delimiter found + } + break; + case SCE_RAKU_STRING_Q: + case SCE_RAKU_STRING_QQ: + case SCE_RAKU_STRING_Q_LANG: + + // No string: previous char was the delimiter + if (dpRegQ.count == 1 && dpRegQ.isCloser(sc.chPrev)) { + sc.SetState(SCE_RAKU_DEFAULT); + } + + // Process the string for variables: move to end delimiter + else if ((len = LengthToDelimiter(sc, dpRegQ, lengthToEnd)) >= 0) { + + // set (any) heredoc delimiter string + if (lastAdverbs.InList("to")) { + GetRelativeString(sc, -1, len - dpRegQ.count, hereDelim); + hereState = SCE_RAKU_HEREDOC_Q; // default heredoc state + } + + // select variable identifiers + if (sc.state == SCE_RAKU_STRING_QQ || lastAdverbs.InList("qq")) { + ProcessStringVars(sc, len, SCE_RAKU_STRING_VAR); + hereState = SCE_RAKU_HEREDOC_QQ; // potential heredoc state + } else { + sc.Forward(len); + } + sc.SetState(SCE_RAKU_DEFAULT); + } else { + sc.Forward(lengthToEnd); // no end delimiter found + } + break; + case SCE_RAKU_HEREDOC_Q: + case SCE_RAKU_HEREDOC_QQ: + if ((len = LengthToEndHeredoc(sc, styler, lengthToEnd, hereDelim.c_str())) >= 0) { + // select variable identifiers + if (sc.state == SCE_RAKU_HEREDOC_QQ) { + ProcessStringVars(sc, len, SCE_RAKU_STRING_VAR); + } else { + sc.Forward(len); + } + sc.SetState(SCE_RAKU_DEFAULT); + } else { + sc.Forward(lengthToEnd); // no end delimiter found + } + hereDelim.clear(); // clear heredoc delimiter + break; + case SCE_RAKU_REGEX: + // account for typeDetect = RAKUTYPE_REGEX_S/TR/Y + while (sc.state == SCE_RAKU_REGEX) { + + // No string: previous char was the delimiter + if (dpRegQ.count == 1 && dpRegQ.isCloser(sc.chPrev)) { + if (ProcessRegexTwinCapture(sc, lengthToEnd, typeDetect, dpRegQ)) + continue; + sc.SetState(SCE_RAKU_DEFAULT); + break; + } + + // Process the string for variables: move to end delimiter + else if ((len = LengthToDelimiter(sc, dpRegQ, lengthToEnd)) >= 0) { + ProcessStringVars(sc, len, SCE_RAKU_REGEX_VAR); + if (ProcessRegexTwinCapture(sc, lengthToEnd, typeDetect, dpRegQ)) + continue; + sc.SetState(SCE_RAKU_DEFAULT); + break; + } else { + sc.Forward(lengthToEnd); // no end delimiter found + break; + } + } + break; + case SCE_RAKU_NUMBER: + if (sc.ch == '.') { + if (sc.chNext == '.') { // '..' is an operator + sc.SetState(SCE_RAKU_OPERATOR); + sc.Forward(); + if (sc.chNext == '.') // '...' is also an operator + sc.Forward(); + break; + } else if (numState > RAKUNUM_FLOAT_EXP + && (cntDecimal < 1 || numState == RAKUNUM_VERSION)) { + cntDecimal++; + sc.Forward(); + } else { + sc.SetState(SCE_RAKU_DEFAULT); + break; // too many decinal places + } + } + switch (numState) { + case RAKUNUM_BINARY: + if (!IsNumberChar(sc.ch, 2)) + sc.SetState(SCE_RAKU_DEFAULT); + break; + case RAKUNUM_OCTAL: + if (!IsNumberChar(sc.ch, 8)) + sc.SetState(SCE_RAKU_DEFAULT); + break; + case RAKUNUM_HEX: + if (!IsNumberChar(sc.ch, 16)) + sc.SetState(SCE_RAKU_DEFAULT); + break; + case RAKUNUM_DECIMAL: + case RAKUNUM_VERSION: + if (!IsNumberChar(sc.ch)) + sc.SetState(SCE_RAKU_DEFAULT); + } + break; + case SCE_RAKU_WORD: + case SCE_RAKU_FUNCTION: + case SCE_RAKU_TYPEDEF: + case SCE_RAKU_ADVERB: + sc.SetState(SCE_RAKU_DEFAULT); + break; + case SCE_RAKU_MU: + case SCE_RAKU_POSITIONAL: + case SCE_RAKU_ASSOCIATIVE: + case SCE_RAKU_CALLABLE: + case SCE_RAKU_IDENTIFIER: + case SCE_RAKU_GRAMMAR: + case SCE_RAKU_CLASS: + sc.SetState(SCE_RAKU_DEFAULT); + break; + } + + /* *** Determine if a new state should be entered ******************* * + * Everything below here identifies the beginning of a state, all or part + * of the characters within this state are processed here, the rest are + * completed above in the terminate state section. + * ****************************************************************** */ + if (sc.state == SCE_RAKU_DEFAULT) { + + // --- Single line comment + if (sc.ch == '#') { + sc.SetState(SCE_RAKU_COMMENTLINE); + } + + // --- POD block + else if (sc.atLineStart && sc.Match("=begin pod")) { + sc.SetState(SCE_RAKU_POD); + sc.Forward(10); + } + + // --- String (normal) + else if (sc.chPrev != '\\' && (IsValidQuoteOpener(sc.ch, dpString, RAKUDELIM_QUOTE))) { + sc.SetState(SCE_RAKU_STRING); + } + + // --- String (Q Language) ---------------------------------------- + // - https://docs.raku.org/language/quoting + // - Q :adverb :adverb //; + // - q,qx,qw,qq,qqx,qqw,qqww :adverb :adverb //; + else if (IsQLangStartAtScPos(sc, typeDetect, lengthToEnd)) { + int state = SCE_RAKU_STRING_Q_LANG; + Sci_Position forward = 1; // single char ident (default) + if (typeDetect > RAKUTYPE_QLANG) { + state = SCE_RAKU_STRING_Q; + if (typeDetect == RAKUTYPE_STR_WQ) + forward = 0; // no char ident + } + if (typeDetect > RAKUTYPE_STR_Q) { + if (typeDetect == RAKUTYPE_STR_QQ) + state = SCE_RAKU_STRING_QQ; + forward++; // two char ident + } + if (typeDetect > RAKUTYPE_STR_QQ) + forward++; // three char ident + if (typeDetect == RAKUTYPE_STR_QQWW) + forward++; // four char ident + + // Proceed: check for a valid character after statement + if (IsValidRegOrQAdjacent(sc.GetRelative(forward)) || typeDetect == RAKUTYPE_QLANG) { + sc.SetState(state); + sc.Forward(forward); + lastAdverbs.Clear(); + + // Process: adverbs / opening delimiter / adverbs after delim + if (ProcessValidRegQlangStart(sc, lengthToEnd, typeDetect, + lastAdverbs, dpRegQ)) + sc.SetState(state); + } + } + + // --- Regex (rx/s/m/tr/y) ---------------------------------------- + // - https://docs.raku.org/language/regexes + else if ((IsRegexStartAtScPos(sc, typeDetect, setOperator) || regexIdent.InList(wordLast.c_str()))) { + if (typeDetect == -1) { // must be a regex identifier word + wordLast.clear(); + typeDetect = RAKUTYPE_REGEX; + } + Sci_Position forward = 0; // no ident (RAKUTYPE_REGEX, RAKUTYPE_REGEX_NORM) + if (typeDetect > 0 && typeDetect != RAKUTYPE_REGEX) + forward++; // single char ident + if (typeDetect > RAKUTYPE_REGEX) + forward++; // two char ident + + // Proceed: check for a valid character after statement + if (IsValidRegOrQAdjacent(sc.GetRelative(forward)) || typeDetect == RAKUTYPE_REGEX_NORM) { + sc.SetState(SCE_RAKU_REGEX); + sc.Forward(forward); + lastAdverbs.Clear(); + + // Process: adverbs / opening delimiter / adverbs after delim + if (ProcessValidRegQlangStart(sc, lengthToEnd, typeDetect, + lastAdverbs, dpRegQ)) + sc.SetState(SCE_RAKU_REGEX); + } + } + + // --- Numbers ---------------------------------------------------- + else if (IsValidIdentPrecede(sc.chPrev) && (IsNumberChar(sc.ch) + || (sc.ch == 'v' && IsNumberChar(sc.chNext) && wordLast == "use"))) { + numState = RAKUNUM_DECIMAL; // default: decimal (base 10) + cntDecimal = 0; + sc.SetState(SCE_RAKU_NUMBER); + if (sc.ch == 'v') // forward past 'v' + sc.Forward(); + if (wordLast == "use") { // package version number + numState = RAKUNUM_VERSION; + } else if (sc.ch == '0') { // other type of number + switch (sc.chNext) { + case 'b': // binary (base 2) + numState = RAKUNUM_BINARY; + break; + case 'o': // octal (base 8) + numState = RAKUNUM_OCTAL; + break; + case 'x': // hexadecimal (base 16) + numState = RAKUNUM_HEX; + } + if (numState != RAKUNUM_DECIMAL) + sc.Forward(); // forward to number type char + } + } + + // --- Keywords / functions / types / barewords ------------------- + else if ((sc.currentPos == 0 || sc.atLineStart || IsValidIdentPrecede(sc.chPrev)) + && IsWordStartChar(sc.ch)) { + len = LengthToNonWordChar(sc, lengthToEnd, s, sizeof(s)); + if (keywords.InList(s)) { + sc.SetState(SCE_RAKU_WORD); // Keywords + } else if(functions.InList(s)) { + sc.SetState(SCE_RAKU_FUNCTION); // Functions + } else if(typesBasic.InList(s)) { + sc.SetState(SCE_RAKU_TYPEDEF); // Types (basic) + } else if(typesComposite.InList(s)) { + sc.SetState(SCE_RAKU_TYPEDEF); // Types (composite) + } else if(typesDomainSpecific.InList(s)) { + sc.SetState(SCE_RAKU_TYPEDEF); // Types (domain-specific) + } else if(typesExceptions.InList(s)) { + sc.SetState(SCE_RAKU_TYPEDEF); // Types (exceptions) + } else { + if (wordLast == "class") + sc.SetState(SCE_RAKU_CLASS); // a Class ident + else if (wordLast == "grammar") + sc.SetState(SCE_RAKU_GRAMMAR); // a Grammar ident + else + sc.SetState(SCE_RAKU_IDENTIFIER); // Bareword + identLast = s; // save identifier + } + if (adverbLast == "sym") { // special adverb ":sym" + sc.SetState(SCE_RAKU_IDENTIFIER); // treat as identifier + identLast = s; // save identifier + } + if (sc.state != SCE_RAKU_IDENTIFIER) + wordLast = s; // save word + sc.Forward(len - 1); // ...forward past word + } + + // --- Adverbs ---------------------------------------------------- + else if (sc.ch == ':' && IsWordStartChar(sc.chNext)) { + len = LengthToNonWordChar(sc, lengthToEnd, s, sizeof(s), 1); + if (adverbs.InList(s)) { + sc.SetState(SCE_RAKU_ADVERB); // Adverbs (begin with ':') + adverbLast = s; // save word + sc.Forward(len); // ...forward past word (less offset: 1) + } + } + + // --- Identifiers: $mu / @positional / %associative / &callable -- + // see: https://docs.raku.org/language/variables + else if (setSigil.Contains(sc.ch) && (setTwigil.Contains(sc.chNext) + || setSpecialVar.Contains(sc.chNext) + || IsWordStartChar(sc.chNext))) { + + // State based on sigil + switch (sc.ch) { + case '$': sc.SetState(SCE_RAKU_MU); + break; + case '@': sc.SetState(SCE_RAKU_POSITIONAL); + break; + case '%': sc.SetState(SCE_RAKU_ASSOCIATIVE); + break; + case '&': sc.SetState(SCE_RAKU_CALLABLE); + } + const int state = sc.state; + sc.Forward(); + char ch_delim = 0; + if (setSpecialVar.Contains(sc.ch) + && !setWord.Contains(sc.chNext)) { // Process Special Var + ch_delim = -1; + } else if (setTwigil.Contains(sc.ch)) { // Process Twigil + sc.SetState(SCE_RAKU_OPERATOR); + if (sc.ch == '<' && setWord.Contains(sc.chNext)) + ch_delim = '>'; + sc.Forward(); + sc.SetState(state); + } + + // Process (any) identifier + if (ch_delim >= 0) { + sc.Forward(LengthToNonWordChar(sc, lengthToEnd, s, sizeof(s)) - 1); + if (ch_delim > 0 && sc.chNext == ch_delim) { + sc.Forward(); + sc.SetState(SCE_RAKU_OPERATOR); + } + identLast = s; // save identifier + } + } + + // --- Operators -------------------------------------------------- + else if (IsOperatorChar(sc.ch)) { + // FIXME: better valid operator sequences needed? + sc.SetState(SCE_RAKU_OPERATOR); + } + + // --- Heredoc: begin --------------------------------------------- + else if (sc.atLineEnd && !hereDelim.empty()) { + sc.SetState(hereState); + } + + // Reset words: on operator simi-colon OR '}' (end of statement) + if (sc.state == SCE_RAKU_OPERATOR && (sc.ch == ';' || sc.ch == '}')) { + wordLast.clear(); + identLast.clear(); + adverbLast.clear(); + } + } + + /* *** Determine if an "embedded comment" is to be entered ********** * + * This type of embedded comment section, or multi-line comment comes + * after a normal comment has begun... e.g: #`[ ... ] + * ****************************************************************** */ + else if (sc.state == SCE_RAKU_COMMENTLINE && sc.chPrev == '#' && sc.ch == '`') { + if (IsBracketOpenChar(sc.chNext)) { + sc.Forward(); // Condition met for "embedded comment" + dpEmbeded.opener = sc.ch; + + // Find the opposite (termination) closeing bracket (if any) + dpEmbeded.closer[0] = GetBracketCloseChar(dpEmbeded.opener); + if (dpEmbeded.closer[0] > 0) { // Enter "embedded comment" + + // Find multiple opening character occurence + dpEmbeded.count = GetRepeatCharCount(sc, dpEmbeded.opener, lengthToEnd); + sc.SetState(SCE_RAKU_COMMENTEMBED); + sc.Forward(dpEmbeded.count - 1); // incremented in the next loop + } + } + } + } + + // And we're done... + sc.Complete(); +} + +/* + * LexerRaku::Lex + * - Main fold method + * NOTE: although Raku uses and supports UNICODE characters, we're only looking + * at normal chars here, using 'SafeGetCharAt' - for folding purposes + * that is all we need. + */ +#define RAKU_HEADFOLD_SHIFT 4 +#define RAKU_HEADFOLD_MASK 0xF0 +void SCI_METHOD LexerRaku::Fold(Sci_PositionU startPos, Sci_Position length, int /* initStyle */, IDocument *pAccess) { + + // init LexAccessor / return if fold option is off + if (!options.fold) return; + LexAccessor styler(pAccess); + + // init char and line positions + const Sci_PositionU endPos = startPos + length; + Sci_Position lineCurrent = styler.GetLine(startPos); + + // Backtrack to last SCE_RAKU_DEFAULT line + if (startPos > 0 && lineCurrent > 0) { + while (lineCurrent > 0 && styler.StyleAt(startPos) != SCE_RAKU_DEFAULT) { + lineCurrent--; + startPos = styler.LineStart(lineCurrent); + } + lineCurrent = styler.GetLine(startPos); + } + Sci_PositionU lineStart = startPos; + Sci_PositionU lineStartNext = styler.LineStart(lineCurrent + 1); + + // init line folding level + int levelPrev = SC_FOLDLEVELBASE; + if (lineCurrent > 0) + levelPrev = styler.LevelAt(lineCurrent - 1) >> 16; + int levelCurrent = levelPrev; + + // init char and style variables + char chNext = styler[startPos]; + int stylePrev = styler.StyleAt(startPos - 1); + int styleNext = styler.StyleAt(startPos); + int styleNextStartLine = styler.StyleAt(lineStartNext); + int visibleChars = 0; + bool wasCommentMulti = false; + + // main loop + for (Sci_PositionU i = startPos; i < endPos; i++) { + + // next char, style and flags + const char ch = chNext; + chNext = styler.SafeGetCharAt(i + 1); + const int style = styleNext; + styleNext = styler.StyleAt(i + 1); + const bool atEOL = i == (lineStartNext - 1); + const bool atLineStart = i == lineStart; + + // --- Comments / Multi-line / POD ------------------------------------ + if (options.foldComment) { + + // Multi-line + if (options.foldCommentMultiline) { + if (style == SCE_RAKU_COMMENTLINE && atLineStart && ch == '#' && chNext == '`' + && styleNextStartLine == SCE_RAKU_COMMENTEMBED) { + levelCurrent++; + wasCommentMulti = true; // don't confuse line comments + } else if (style == SCE_RAKU_COMMENTEMBED && atLineStart + && styleNextStartLine != SCE_RAKU_COMMENTEMBED) { + levelCurrent--; + } + } + + // Line comments + if (!wasCommentMulti && atEOL && stylePrev == SCE_RAKU_COMMENTLINE + && IsCommentLine(lineCurrent, styler)) { + if (!IsCommentLine(lineCurrent - 1, styler) + && IsCommentLine(lineCurrent + 1, styler)) + levelCurrent++; + else if (IsCommentLine(lineCurrent - 1, styler) + && !IsCommentLine(lineCurrent + 1, styler)) + levelCurrent--; + } + + // POD + if (options.foldCommentPOD && atLineStart && style == SCE_RAKU_POD) { + if (styler.Match(i, "=begin")) + levelCurrent++; + else if (styler.Match(i, "=end")) + levelCurrent--; + } + } + + // --- Code block ----------------------------------------------------- + if (style == SCE_RAKU_OPERATOR) { + if (ch == '{') { + if (levelCurrent < levelPrev) levelPrev--; + levelCurrent++; + } else if (ch == '}') { + levelCurrent--; + } + } + + // --- at end of line / range / apply fold ---------------------------- + if (atEOL) { + int level = levelPrev; + + // set level flags + level |= levelCurrent << 16; + if (visibleChars == 0 && options.foldCompact) + level |= SC_FOLDLEVELWHITEFLAG; + if ((levelCurrent > levelPrev) && (visibleChars > 0)) + level |= SC_FOLDLEVELHEADERFLAG; + if (level != styler.LevelAt(lineCurrent)) { + styler.SetLevel(lineCurrent, level); + } + lineCurrent++; + lineStart = lineStartNext; + lineStartNext = styler.LineStart(lineCurrent + 1); + styleNextStartLine = styler.StyleAt(lineStartNext); + levelPrev = levelCurrent; + visibleChars = 0; + wasCommentMulti = false; + } + + // increment visibleChars / set previous char + if (!isspacechar(ch)) + visibleChars++; + stylePrev = style; + } + + // Done: set real level of the next line + int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK; + styler.SetLevel(lineCurrent, levelPrev | flagsNext); +} + +/*----------------------------------------------------------------------------* + * --- Scintilla: LexerModule --- + *----------------------------------------------------------------------------*/ + +LexerModule lmRaku(SCLEX_RAKU, LexerRaku::LexerFactoryRaku, "raku", rakuWordLists); diff --git a/lexilla/src/Lexilla.cxx b/lexilla/src/Lexilla.cxx index 1e01598f6..2cc1ed7c9 100644 --- a/lexilla/src/Lexilla.cxx +++ b/lexilla/src/Lexilla.cxx @@ -121,6 +121,7 @@ extern LexerModule lmPS; extern LexerModule lmPureBasic; extern LexerModule lmPython; extern LexerModule lmR; +extern LexerModule lmRaku; extern LexerModule lmREBOL; extern LexerModule lmRegistry; extern LexerModule lmRuby; @@ -263,6 +264,7 @@ void AddEachLexer() { catalogueLexilla.AddLexerModule(&lmPureBasic); catalogueLexilla.AddLexerModule(&lmPython); catalogueLexilla.AddLexerModule(&lmR); + catalogueLexilla.AddLexerModule(&lmRaku); catalogueLexilla.AddLexerModule(&lmREBOL); catalogueLexilla.AddLexerModule(&lmRegistry); catalogueLexilla.AddLexerModule(&lmRuby); diff --git a/lexilla/src/deps.mak b/lexilla/src/deps.mak index 4fb265559..ae10b42c4 100644 --- a/lexilla/src/deps.mak +++ b/lexilla/src/deps.mak @@ -1170,6 +1170,20 @@ LexR.o: \ ../../lexlib/StyleContext.h \ ../../lexlib/CharacterSet.h \ ../../lexlib/LexerModule.h +LexRaku.o: \ + ../../lexers/LexRaku.cxx \ + ../../include/ILexer.h \ + ../../include/Sci_Position.h \ + ../../include/Scintilla.h \ + ../../include/SciLexer.h \ + ../../lexlib/WordList.h \ + ../../lexlib/LexAccessor.h \ + ../../lexlib/StyleContext.h \ + ../../lexlib/CharacterSet.h \ + ../../lexlib/CharacterCategory.h \ + ../../lexlib/LexerModule.h \ + ../../lexlib/OptionSet.h \ + ../../lexlib/DefaultLexer.h LexRebol.o: \ ../../lexers/LexRebol.cxx \ ../../include/ILexer.h \ diff --git a/lexilla/src/lexilla.mak b/lexilla/src/lexilla.mak index fd14b9ac3..2a7a839c8 100644 --- a/lexilla/src/lexilla.mak +++ b/lexilla/src/lexilla.mak @@ -143,6 +143,7 @@ LEX_OBJS=\ $(DIR_O)\LexPS.obj \ $(DIR_O)\LexPython.obj \ $(DIR_O)\LexR.obj \ + $(DIR_O)\LexRaku.obj \ $(DIR_O)\LexRebol.obj \ $(DIR_O)\LexRegistry.obj \ $(DIR_O)\LexRuby.obj \ diff --git a/lexilla/src/nmdeps.mak b/lexilla/src/nmdeps.mak index bd56ef9cd..50b7b3a30 100644 --- a/lexilla/src/nmdeps.mak +++ b/lexilla/src/nmdeps.mak @@ -1170,6 +1170,20 @@ $(DIR_O)/LexR.obj: \ ../../lexlib/StyleContext.h \ ../../lexlib/CharacterSet.h \ ../../lexlib/LexerModule.h +$(DIR_O)/LexRaku.obj: \ + ../../lexers/LexRaku.cxx \ + ../../include/ILexer.h \ + ../../include/Sci_Position.h \ + ../../include/Scintilla.h \ + ../../include/SciLexer.h \ + ../../lexlib/WordList.h \ + ../../lexlib/LexAccessor.h \ + ../../lexlib/StyleContext.h \ + ../../lexlib/CharacterSet.h \ + ../../lexlib/CharacterCategory.h \ + ../../lexlib/LexerModule.h \ + ../../lexlib/OptionSet.h \ + ../../lexlib/DefaultLexer.h $(DIR_O)/LexRebol.obj: \ ../../lexers/LexRebol.cxx \ ../../include/ILexer.h \ diff --git a/lexilla/test/examples/raku/SciTE.properties b/lexilla/test/examples/raku/SciTE.properties new file mode 100644 index 000000000..065af1363 --- /dev/null +++ b/lexilla/test/examples/raku/SciTE.properties @@ -0,0 +1,113 @@ +lexer.*.p6=raku +# Keywords (base) +keywords.$(file.patterns.raku)=BEGIN CATCH CHECK CONTROL END ENTER EVAL FIRST \ + INIT KEEP LAST LEAVE NEXT POST PRE START TEMP UNDO after also andthen as \ + async augment bag before but category circumfix class cmp complex constant \ + contend default defer div does dynamic else elsif enum eq eqv extra fail \ + fatal ff fff for gather gcd ge given grammar gt handles has if infix is lcm \ + le leave leg let lift loop lt macro make maybe method mix mod module multi \ + ne not o only oo or orelse orwith postcircumfix postfix prefix proto regex \ + repeat require return-rw returns role rule size_t slang start str submethod \ + subset supersede take temp term token trusts try unit unless until when \ + where while with without x xor xx +# Keywords (functions) +keywords2.$(file.patterns.raku)=ACCEPTS AT-KEY EVALFILE EXISTS-KEY Filetests \ + IO STORE abs accept acos acosec acosech acosh acotan acotanh alarm and \ + antipairs asec asech asin asinh atan atan2 atanh base bind binmode bless \ + break caller ceiling chars chdir chmod chomp chop chr chroot chrs cis close \ + closedir codes comb conj connect contains continue cos cosec cosech cosh \ + cotan cotanh crypt dbm defined die do dump each elems eof exec exists exit \ + exp expmod fc fcntl fileno flat flip flock floor fmt fork formats functions \ + get getc getpeername getpgrp getppid getpriority getsock gist glob gmtime \ + goto grep hyper import index int invert ioctl is-prime iterator join keyof \ + keys kill kv last lazy lc lcfirst lines link list listen local localtime \ + lock log log10 lsb lstat map match mkdir msb msg my narrow new next no of \ + open ord ords our pack package pairs path pick pipe polymod pop pos pred \ + print printf prototype push quoting race rand read readdir readline readlink \ + readpipe recv redo ref rename requires reset return reverse rewinddir rindex \ + rmdir roots round samecase say scalar sec sech seek seekdir select semctl \ + semget semop send set setpgrp setpriority setsockopt shift shm shutdown sign \ + sin sinh sleep sockets sort splice split sprintf sqrt srand stat state study \ + sub subst substr substr-rw succ symlink sys syscall system syswrite tan tanh \ + tc tclc tell telldir tie time times trans trim trim-leading trim-trailing \ + truncate uc ucfirst unimatch uniname uninames uniprop uniprops unival unlink \ + unpack unpolar unshift untie use utime values wait waitpid wantarray warn \ + wordcase words write +# Keywords (types) +keywords3.$(file.patterns.raku)=AST Any Block Bool CallFrame Callable Code \ + Collation Compiler Complex ComplexStr Cool CurrentThreadScheduler Date \ + DateTime Dateish Distribution Distribution::Hash Distribution::Locally \ + Distribution::Path Duration Encoding Encoding::Registry Endian FatRat \ + ForeignCode HyperSeq HyperWhatever Instant Int IntStr Junction Label \ + Lock::Async Macro Method Mu Nil Num NumStr Numeric ObjAt Parameter Perl \ + PredictiveIterator Proxy RaceSeq Rat RatStr Rational Real Routine \ + Routine::WrapHandle Scalar Sequence Signature Str StrDistance Stringy Sub \ + Submethod Telemetry Telemetry::Instrument::Thread \ + Telemetry::Instrument::ThreadPool Telemetry::Instrument::Usage \ + Telemetry::Period Telemetry::Sampler UInt ValueObjAt Variable Version \ + Whatever WhateverCode atomicint bit bool buf buf1 buf16 buf2 buf32 buf4 \ + buf64 buf8 int int1 int16 int2 int32 int4 int64 int8 long longlong num \ + num32 num64 rat rat1 rat16 rat2 rat32 rat4 rat64 rat8 uint uint1 uint16 \ + uint2 uint32 uint4 uint64 uint8 utf16 utf32 utf8 +# Keywords (types composite) +keywords4.$(file.patterns.raku)=Array Associative Bag BagHash Baggy Blob Buf \ + Capture Enumeration Hash Iterable Iterator List Map Mix MixHash Mixy NFC NFD \ + NFKC NFKD Pair Positional PositionalBindFailover PseudoStash QuantHash Range \ + Seq Set SetHash Setty Slip Stash Uni utf8 +# Keywords (types domain specific) +keywords5.$(file.patterns.raku)=Attribute Cancellation Channel CompUnit \ + CompUnit::Repository CompUnit::Repository::FileSystem \ + CompUnit::Repository::Installation Distro Grammar IO IO::ArgFiles \ + IO::CatHandle IO::Handle IO::Notification IO::Path IO::Path::Cygwin \ + IO::Path::QNX IO::Path::Unix IO::Path::Win32 IO::Pipe IO::Socket \ + IO::Socket::Async IO::Socket::INET IO::Spec IO::Spec::Cygwin \ + IO::Spec::QNX IO::Spec::Unix IO::Spec::Win32 IO::Special Kernel Lock \ + Match Order Pod::Block Pod::Block::Code Pod::Block::Comment \ + Pod::Block::Declarator Pod::Block::Named Pod::Block::Para Pod::Block::Table \ + Pod::Defn Pod::FormattingCode Pod::Heading Pod::Item Proc Proc::Async \ + Promise Regex Scheduler Semaphore Supplier Supplier::Preserving Supply \ + Systemic Tap Thread ThreadPoolScheduler VM +# Keywords (types domain exceptions) +keywords6.$(file.patterns.raku)=Backtrace Backtrace::Frame CX::Done CX::Emit \ + CX::Last CX::Next CX::Proceed CX::Redo CX::Return CX::Succeed CX::Take \ + CX::Warn Exception Failure X::AdHoc X::Anon::Augment X::Anon::Multi \ + X::Assignment::RO X::Attribute::NoPackage X::Attribute::Package \ + X::Attribute::Required X::Attribute::Undeclared X::Augment::NoSuchType \ + X::Bind X::Bind::NativeType X::Bind::Slice X::Caller::NotDynamic \ + X::Channel::ReceiveOnClosed X::Channel::SendOnClosed X::Comp \ + X::Composition::NotComposable X::Constructor::Positional X::Control \ + X::ControlFlow X::ControlFlow::Return X::DateTime::TimezoneClash \ + X::Declaration::Scope X::Declaration::Scope::Multi X::Does::TypeObject \ + X::Dynamic::NotFound X::Eval::NoSuchLang X::Export::NameClash X::IO \ + X::IO::Chdir X::IO::Chmod X::IO::Copy X::IO::Cwd X::IO::Dir X::IO::DoesNotExist \ + X::IO::Link X::IO::Mkdir X::IO::Move X::IO::Rename X::IO::Rmdir \ + X::IO::Symlink X::IO::Unlink X::Inheritance::NotComposed \ + X::Inheritance::Unsupported X::Method::InvalidQualifier X::Method::NotFound \ + X::Method::Private::Permission X::Method::Private::Unqualified \ + X::Mixin::NotComposable X::NYI X::NoDispatcher X::Numeric::Real \ + X::OS X::Obsolete X::OutOfRange X::Package::Stubbed X::Parameter::Default \ + X::Parameter::MultipleTypeConstraints X::Parameter::Placeholder \ + X::Parameter::Twigil X::Parameter::WrongOrder X::Phaser::Multiple \ + X::Phaser::PrePost X::Placeholder::Block X::Placeholder::Mainline \ + X::Pod X::Proc::Async X::Proc::Async::AlreadyStarted X::Proc::Async::BindOrUse \ + X::Proc::Async::CharsOrBytes X::Proc::Async::MustBeStarted \ + X::Proc::Async::OpenForWriting X::Proc::Async::TapBeforeSpawn \ + X::Proc::Unsuccessful X::Promise::CauseOnlyValidOnBroken X::Promise::Vowed \ + X::Redeclaration X::Role::Initialization X::Scheduler::CueInNaNSeconds \ + X::Seq::Consumed X::Sequence::Deduction X::Signature::NameClash \ + X::Signature::Placeholder X::Str::Numeric X::StubCode X::Syntax \ + X::Syntax::Augment::WithoutMonkeyTyping X::Syntax::Comment::Embedded \ + X::Syntax::Confused X::Syntax::InfixInTermPosition X::Syntax::Malformed \ + X::Syntax::Missing X::Syntax::NegatedPair X::Syntax::NoSelf \ + X::Syntax::Number::RadixOutOfRange X::Syntax::P5 X::Syntax::Perl5Var \ + X::Syntax::Regex::Adverb X::Syntax::Regex::SolitaryQuantifier \ + X::Syntax::Reserved X::Syntax::Self::WithoutObject \ + X::Syntax::Signature::InvocantMarker X::Syntax::Term::MissingInitializer \ + X::Syntax::UnlessElse X::Syntax::Variable::Match X::Syntax::Variable::Numeric \ + X::Syntax::Variable::Twigil X::Temporal X::Temporal::InvalidFormat \ + X::TypeCheck X::TypeCheck::Assignment X::TypeCheck::Binding \ + X::TypeCheck::Return X::TypeCheck::Splice X::Undeclared +# Keywords (adverbs) +keywords7.$(file.patterns.raku)=D a array b backslash c closure delete double \ + exec exists f function h hash heredoc k kv p q qq quotewords s scalar single \ + sym to v val w words ww x diff --git a/lexilla/test/examples/raku/x.p6 b/lexilla/test/examples/raku/x.p6 new file mode 100644 index 000000000..0cbdb6a57 --- /dev/null +++ b/lexilla/test/examples/raku/x.p6 @@ -0,0 +1,54 @@ +use v6; + +# Normal single line comment +my Int $i = 0; +my Rat $r = 3.142; +my Str $s = "Hello, world! \$i == $i and \$r == $r"; +say $s; + +#`{{ +*** This is a multi-line comment *** +}} + +my @array = #`[[ inline comment ]] <f fo foo food>; +my %hash = ( AAA => 1, BBB => 2 ); + +say q[This back\slash stays]; +say q[This back\\slash stays]; # Identical output +say Q:q!Just a literal "\n" here!; + +=begin pod +POD Documentation... +=end pod + +say qq:to/END/; +A multi-line +string with interpolated vars: $i, $r +END + +sub function { + return q:to/END/; +Here is +some multi-line +string +END +} + +my $func = &function; +say $func(); + +grammar Calculator { + token TOP { <calc-op> } + proto rule calc-op {*} + rule calc-op:sym<add> { <num> '+' <num> } + rule calc-op:sym<sub> { <num> '-' <num> } + token num { \d+ } +} + +class Calculations { + method TOP ($/) { make $<calc-op>.made; } + method calc-op:sym<add> ($/) { make [+] $<num>; } + method calc-op:sym<sub> ($/) { make [-] $<num>; } +} + +say Calculator.parse('2 + 3', actions => Calculations).made; diff --git a/lexilla/test/examples/raku/x.p6.styled b/lexilla/test/examples/raku/x.p6.styled new file mode 100644 index 000000000..9bef97940 --- /dev/null +++ b/lexilla/test/examples/raku/x.p6.styled @@ -0,0 +1,54 @@ +{20}use{0} {16}v6{18};{0} + +{2}# Normal single line comment{0} +{20}my{0} {22}Int{0} {23}$i{0} {18}={0} {16}0{18};{0} +{20}my{0} {22}Rat{0} {23}$r{0} {18}={0} {16}3.142{18};{0} +{20}my{0} {22}Str{0} {23}$s{0} {18}={0} {8}"Hello, world! \$i == {12}$i{8} and \$r == {12}$r{8}"{18};{0} +{20}say{0} {23}$s{18};{0} + +{2}#`{3}{{ +*** This is a multi-line comment *** +}}{0} + +{20}my{0} {24}@array{0} {18}={0} {2}#`{3}[[ inline comment ]]{0} {9}<f fo foo food>{18};{0} +{20}my{0} {25}%hash{0} {18}={0} {18}({0} {21}AAA{0} {18}=>{0} {16}1{18},{0} {21}BBB{0} {18}=>{0} {16}2{0} {18});{0} + +{20}say{0} {9}q[This back\slash stays]{18};{0} +{20}say{0} {9}q[This back\\slash stays]{18};{0} {2}# Identical output{0} +{20}say{0} {11}Q{15}:q{11}!Just a literal "\n" here!{18};{0} + +{4}=begin pod +POD Documentation... +=end pod{0} + +{20}say{0} {10}qq{15}:to{10}/END/{18};{7} +A multi-line +string with interpolated vars: {12}$i{7}, {12}$r{7} +END{0} + +{20}sub{0} {21}function{0} {18}{{0} + {20}return{0} {9}q{15}:to{9}/END/{18};{6} +Here is +some multi-line +string +END{0} +{18}}{0} + +{20}my{0} {23}$func{0} {18}={0} {26}&function{18};{0} +{20}say{0} {23}$func{18}();{0} + +{19}grammar{0} {27}Calculator{0} {18}{{0} + {19}token{0} {21}TOP{0} {13}{ <calc-op> }{0} + {19}proto{0} {19}rule{0} {21}calc-op{0} {13}{*}{0} + {19}rule{0} {21}calc-op{15}:sym{18}<{21}add{18}>{0} {13}{ <num> '+' <num> }{0} + {19}rule{0} {21}calc-op{15}:sym{18}<{21}sub{18}>{0} {13}{ <num> '-' <num> }{0} + {19}token{0} {21}num{0} {13}{ \d+ }{0} +{18}}{0} + +{19}class{0} {28}Calculations{0} {18}{{0} + {19}method{0} {21}TOP{0} {18}({23}$/{18}){0} {18}{{0} {19}make{0} {23}${18}<{23}calc-op{18}>.{21}made{18};{0} {18}}{0} + {19}method{0} {21}calc-op{15}:sym{18}<{21}add{18}>{0} {18}({23}$/{18}){0} {18}{{0} {21}make{0} {18}[+]{0} {23}${18}<{23}num{18}>;{0} {18}}{0} + {19}method{0} {21}calc-op{15}:sym{18}<{21}sub{18}>{0} {18}({23}$/{18}){0} {18}{{0} {21}make{0} {18}[-]{0} {23}${18}<{23}num{18}>;{0} {18}}{0} +{18}}{0} + +{20}say{0} {21}Calculator{18}.{21}parse{18}({8}'2 + 3'{18},{0} {21}actions{0} {18}=>{0} {21}Calculations{18}).{21}made{18};{0} diff --git a/src/Catalogue.cxx b/src/Catalogue.cxx index e3fcddafb..50254de8b 100644 --- a/src/Catalogue.cxx +++ b/src/Catalogue.cxx @@ -156,6 +156,7 @@ int Scintilla_LinkLexers() { LINK_LEXER(lmPureBasic); LINK_LEXER(lmPython); LINK_LEXER(lmR); + LINK_LEXER(lmRaku); LINK_LEXER(lmREBOL); LINK_LEXER(lmRegistry); LINK_LEXER(lmRuby); diff --git a/win32/deps.mak b/win32/deps.mak index ef38f88e2..f3a6771d2 100644 --- a/win32/deps.mak +++ b/win32/deps.mak @@ -1642,6 +1642,20 @@ LexR.o: \ ../lexlib/StyleContext.h \ ../lexlib/CharacterSet.h \ ../lexlib/LexerModule.h +LexRaku.o: \ + ../lexers/LexRaku.cxx \ + ../include/ILexer.h \ + ../include/Sci_Position.h \ + ../include/Scintilla.h \ + ../include/SciLexer.h \ + ../lexlib/WordList.h \ + ../lexlib/LexAccessor.h \ + ../lexlib/StyleContext.h \ + ../lexlib/CharacterSet.h \ + ../lexlib/CharacterCategory.h \ + ../lexlib/LexerModule.h \ + ../lexlib/OptionSet.h \ + ../lexlib/DefaultLexer.h LexRebol.o: \ ../lexers/LexRebol.cxx \ ../include/ILexer.h \ diff --git a/win32/nmdeps.mak b/win32/nmdeps.mak index 753bc6463..643615584 100644 --- a/win32/nmdeps.mak +++ b/win32/nmdeps.mak @@ -1642,6 +1642,20 @@ $(DIR_O)/LexR.obj: \ ../lexlib/StyleContext.h \ ../lexlib/CharacterSet.h \ ../lexlib/LexerModule.h +$(DIR_O)/LexRaku.obj: \ + ../lexers/LexRaku.cxx \ + ../include/ILexer.h \ + ../include/Sci_Position.h \ + ../include/Scintilla.h \ + ../include/SciLexer.h \ + ../lexlib/WordList.h \ + ../lexlib/LexAccessor.h \ + ../lexlib/StyleContext.h \ + ../lexlib/CharacterSet.h \ + ../lexlib/CharacterCategory.h \ + ../lexlib/LexerModule.h \ + ../lexlib/OptionSet.h \ + ../lexlib/DefaultLexer.h $(DIR_O)/LexRebol.obj: \ ../lexers/LexRebol.cxx \ ../include/ILexer.h \ diff --git a/win32/scintilla.mak b/win32/scintilla.mak index fb5e11867..4a652f426 100644 --- a/win32/scintilla.mak +++ b/win32/scintilla.mak @@ -189,6 +189,7 @@ LEX_OBJS=\ $(DIR_O)\LexPS.obj \ $(DIR_O)\LexPython.obj \ $(DIR_O)\LexR.obj \ + $(DIR_O)\LexRaku.obj \ $(DIR_O)\LexRebol.obj \ $(DIR_O)\LexRegistry.obj \ $(DIR_O)\LexRuby.obj \ |