Feature: [feature-requests:#1008]. Lexer added for KVIrc script.

From OmegaPhil.
author: Neil <nyamatongwe@gmail.com> 2013-08-21 14:50:48 +1000
committer: Neil <nyamatongwe@gmail.com> 2013-08-21 14:50:48 +1000
commit: 44f44d7ab98c7cf019fd42d9b34ddb19802413f7 (patch)
tree: e6ef95a0c5c830e2dc44099fbf2057cc2e0c9cb1
parent: 8e6ddfd4ed7a72190796d138557ad3eb5da7b0db (diff)
download: scintilla-mirror-44f44d7ab98c7cf019fd42d9b34ddb19802413f7.tar.gz
6 files changed, 512 insertions, 0 deletions
diff --git a/doc/ScintillaHistory.html b/doc/ScintillaHistory.html
index 97ff11f58..bc80371a6 100644
--- a/doc/ScintillaHistory.html
+++ b/doc/ScintillaHistory.html
@@ -429,6 +429,7 @@
 	<td>Brian Griffin</td>
 	<td>Özgür Emir</td>
 	<td>Neomi</td>
+	<td>OmegaPhil</td>
     </tr>
     </table>
     <p>
@@ -459,6 +460,10 @@
 	<a href="http://sourceforge.net/p/scintilla/feature-requests/1007/">Feature #1007.</a>
 	</li>
 	<li>
+	Lexer added for KVIrc script.
+	<a href="http://sourceforge.net/p/scintilla/feature-requests/1008/">Feature #1008.</a>
+	</li>
+	<li>
 	Bash lexer fixed quoted HereDoc delimiters.
 	<a href="http://sourceforge.net/p/scintilla/bugs/1500/">Bug #1500</a>.
 	</li>
diff --git a/include/SciLexer.h b/include/SciLexer.h
index 18cdb984c..bd127557f 100644
--- a/include/SciLexer.h
+++ b/include/SciLexer.h
@@ -122,6 +122,7 @@
 #define SCLEX_VISUALPROLOG 107
 #define SCLEX_LITERATEHASKELL 108
 #define SCLEX_STTXT 109
+#define SCLEX_KVIRC 110
 #define SCLEX_AUTOMATIC 1000
 #define SCE_P_DEFAULT 0
 #define SCE_P_COMMENTLINE 1
@@ -1647,6 +1648,19 @@
 #define SCE_STTXT_DATETIME 16
 #define SCE_STTXT_VARS 17
 #define SCE_STTXT_PRAGMAS 18
+#define SCE_KVIRC_DEFAULT 0
+#define SCE_KVIRC_COMMENT 1
+#define SCE_KVIRC_COMMENTBLOCK 2
+#define SCE_KVIRC_STRING 3
+#define SCE_KVIRC_WORD 4
+#define SCE_KVIRC_KEYWORD 5
+#define SCE_KVIRC_FUNCTION_KEYWORD 6
+#define SCE_KVIRC_FUNCTION 7
+#define SCE_KVIRC_VARIABLE 8
+#define SCE_KVIRC_NUMBER 9
+#define SCE_KVIRC_OPERATOR 10
+#define SCE_KVIRC_STRING_FUNCTION 11
+#define SCE_KVIRC_STRING_VARIABLE 12
 /* --Autogenerated -- end of section automatically generated from Scintilla.iface */
 
 #endif
diff --git a/include/Scintilla.iface b/include/Scintilla.iface
index 7909f2084..a2170501f 100644
--- a/include/Scintilla.iface
+++ b/include/Scintilla.iface
@@ -2599,6 +2599,7 @@ val SCLEX_OSCRIPT=106
 val SCLEX_VISUALPROLOG=107
 val SCLEX_LITERATEHASKELL=108
 val SCLEX_STTXT=109
+val SCLEX_KVIRC=110
 
 # When a lexer specifies its language as SCLEX_AUTOMATIC it receives a
 # value assigned in sequence from SCLEX_AUTOMATIC+1.
@@ -4324,6 +4325,21 @@ val SCE_STTXT_IDENTIFIER=15
 val SCE_STTXT_DATETIME=16
 val SCE_STTXT_VARS=17
 val SCE_STTXT_PRAGMAS=18
+# Lexical states for SCLEX_KVIRC
+lex KVIrc=SCLEX_KVIRC SCLEX_KVIRC_
+val SCE_KVIRC_DEFAULT=0
+val SCE_KVIRC_COMMENT=1
+val SCE_KVIRC_COMMENTBLOCK=2
+val SCE_KVIRC_STRING=3
+val SCE_KVIRC_WORD=4
+val SCE_KVIRC_KEYWORD=5
+val SCE_KVIRC_FUNCTION_KEYWORD=6
+val SCE_KVIRC_FUNCTION=7
+val SCE_KVIRC_VARIABLE=8
+val SCE_KVIRC_NUMBER=9
+val SCE_KVIRC_OPERATOR=10
+val SCE_KVIRC_STRING_FUNCTION=11
+val SCE_KVIRC_STRING_VARIABLE=12
 
 # Events
 
diff --git a/lexers/LexKVIrc.cxx b/lexers/LexKVIrc.cxx
new file mode 100644
index 000000000..958a2cf67
--- /dev/null
+++ b/lexers/LexKVIrc.cxx
@@ -0,0 +1,473 @@
+// Scintilla source code edit control
+/** @file LexKVIrc.cxx
+ ** Lexer for KVIrc script.
+ **/
+// Copyright 2013 by OmegaPhil <OmegaPhil+scintilla@gmail.com>, based in
+// part from LexPython Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
+// and LexCmake Copyright 2007 by Cristian Adam <cristian [dot] adam [at] gmx [dot] net>
+
+// The License.txt file describes the conditions under which this software may be distributed.
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <assert.h>
+#include <ctype.h>
+
+#include "ILexer.h"
+#include "Scintilla.h"
+#include "SciLexer.h"
+
+#include "WordList.h"
+#include "LexAccessor.h"
+#include "Accessor.h"
+#include "StyleContext.h"
+#include "CharacterSet.h"
+#include "LexerModule.h"
+
+#ifdef SCI_NAMESPACE
+using namespace Scintilla;
+#endif
+
+
+/* KVIrc Script syntactic rules: http://www.kvirc.net/doc/doc_syntactic_rules.html */
+
+/* Utility functions */
+static inline bool IsAWordChar(int ch) {
+
+    /* Keyword list includes modules, i.e. words including '.', and
+     * alias namespaces include ':' */
+    return (ch < 0x80) && (isalnum(ch) || ch == '_' || ch == '.'
+            || ch == ':');
+}
+static inline bool IsAWordStart(int ch) {
+
+    /* Functions (start with '$') are treated separately to keywords */
+    return (ch < 0x80) && (isalnum(ch) || ch == '_' );
+}
+
+/* Interface function called by Scintilla to request some text to be
+ syntax highlighted */
+static void ColouriseKVIrcDoc(unsigned int startPos, int length,
+                              int initStyle, WordList *keywordlists[],
+                              Accessor &styler)
+{
+    /* Fetching style context */
+    StyleContext sc(startPos, length, initStyle, styler);
+
+    /* Accessing keywords and function-marking keywords */
+    WordList &keywords = *keywordlists[0];
+    WordList &functionKeywords = *keywordlists[1];
+
+    /* Looping for all characters - only automatically moving forward
+     * when asked for (transitions leaving strings and keywords do this
+     * already) */
+    bool next = true;
+    for( ; sc.More(); next ? sc.Forward() : (void)0 )
+    {
+        /* Resetting next */
+        next = true;
+
+        /* Dealing with different states */
+        switch (sc.state)
+        {
+            case SCE_KVIRC_DEFAULT:
+
+                /* Detecting single-line comments
+                 * Unfortunately KVIrc script allows raw '#<channel
+                 * name>' to be used, and appending # to an array returns
+                 * its length...
+                 * Going for a compromise where single line comments not
+                 * starting on a newline are allowed in all cases except
+                 * when they are preceeded with an opening bracket or comma
+                 * (this will probably be the most common style a valid
+                 * string-less channel name will be used with), with the
+                 * array length case included
+                 */
+                if (
+                    (sc.ch == '#' && sc.atLineStart) ||
+                    (sc.ch == '#' && (
+                        sc.chPrev != '(' && sc.chPrev != ',' &&
+                        sc.chPrev != ']')
+                    )
+                )
+                {
+                    sc.SetState(SCE_KVIRC_COMMENT);
+                    break;
+                }
+
+                /* Detecting multi-line comments */
+                if (sc.Match('/', '*'))
+                {
+                    sc.SetState(SCE_KVIRC_COMMENTBLOCK);
+                    break;
+                }
+
+                /* Detecting strings */
+                if (sc.ch == '"')
+                {
+                    sc.SetState(SCE_KVIRC_STRING);
+                    break;
+                }
+
+                /* Detecting functions */
+                if (sc.ch == '$')
+                {
+                    sc.SetState(SCE_KVIRC_FUNCTION);
+                    break;
+                }
+
+                /* Detecting variables */
+                if (sc.ch == '%')
+                {
+                    sc.SetState(SCE_KVIRC_VARIABLE);
+                    break;
+                }
+
+                /* Detecting numbers - isdigit is unsafe as it does not
+                 * validate, use CharacterSet.h functions */
+                if (IsADigit(sc.ch))
+                {
+                    sc.SetState(SCE_KVIRC_NUMBER);
+                    break;
+                }
+
+                /* Detecting words */
+                if (IsAWordStart(sc.ch) && IsAWordChar(sc.chNext))
+                {
+                    sc.SetState(SCE_KVIRC_WORD);
+                    sc.Forward();
+                    break;
+                }
+
+                /* Detecting operators */
+                if (isoperator(sc.ch))
+                {
+                    sc.SetState(SCE_KVIRC_OPERATOR);
+                    break;
+                }
+
+                break;
+
+            case SCE_KVIRC_COMMENT:
+
+                /* Breaking out of single line comment when a newline
+                 * is introduced */
+                if (sc.ch == '\r' || sc.ch == '\n')
+                {
+                    sc.SetState(SCE_KVIRC_DEFAULT);
+                    break;
+                }
+
+                break;
+
+            case SCE_KVIRC_COMMENTBLOCK:
+
+                /* Detecting end of multi-line comment */
+                if (sc.Match('*', '/'))
+                {
+                    // Moving the current position forward two characters
+                    // so that '*/' is included in the comment
+                    sc.Forward(2);
+                    sc.SetState(SCE_KVIRC_DEFAULT);
+
+                    /* Comment has been exited and the current position
+                     * moved forward, yet the new current character
+                     * has yet to be defined - loop without moving
+                     * forward again */
+                    next = false;
+                    break;
+                }
+
+                break;
+
+            case SCE_KVIRC_STRING:
+
+                /* Detecting end of string - closing speechmarks */
+                if (sc.ch == '"')
+                {
+                    /* Allowing escaped speechmarks to pass */
+                    if (sc.chPrev == '\\')
+                        break;
+
+                    /* Moving the current position forward to capture the
+                     * terminating speechmarks, and ending string */
+                    sc.ForwardSetState(SCE_KVIRC_DEFAULT);
+
+                    /* String has been exited and the current position
+                     * moved forward, yet the new current character
+                     * has yet to be defined - loop without moving
+                     * forward again */
+                    next = false;
+                    break;
+                }
+
+                /* Functions and variables are now highlighted in strings
+                 * Detecting functions */
+                if (sc.ch == '$')
+                {
+                    /* Allowing escaped functions to pass */
+                    if (sc.chPrev == '\\')
+                        break;
+
+                    sc.SetState(SCE_KVIRC_STRING_FUNCTION);
+                    break;
+                }
+
+                /* Detecting variables */
+                if (sc.ch == '%')
+                {
+                    /* Allowing escaped variables to pass */
+                    if (sc.chPrev == '\\')
+                        break;
+
+                    sc.SetState(SCE_KVIRC_STRING_VARIABLE);
+                    break;
+                }
+
+                /* Breaking out of a string when a newline is introduced */
+                if (sc.ch == '\r' || sc.ch == '\n')
+                {
+                    /* Allowing escaped newlines */
+                    if (sc.chPrev == '\\')
+                        break;
+
+                    sc.SetState(SCE_KVIRC_DEFAULT);
+                    break;
+                }
+
+                break;
+
+            case SCE_KVIRC_FUNCTION:
+            case SCE_KVIRC_VARIABLE:
+
+                /* Detecting the end of a function/variable (word) */
+                if (!IsAWordChar(sc.ch))
+                {
+                    sc.SetState(SCE_KVIRC_DEFAULT);
+
+                    /* Word has been exited yet the current character
+                     * has yet to be defined - loop without moving
+                     * forward again */
+                    next = false;
+                    break;
+                }
+
+                break;
+
+            case SCE_KVIRC_STRING_FUNCTION:
+            case SCE_KVIRC_STRING_VARIABLE:
+
+                /* A function or variable in a string
+                 * Detecting the end of a function/variable (word) */
+                if (!IsAWordChar(sc.ch))
+                {
+                    sc.SetState(SCE_KVIRC_STRING);
+
+                    /* Word has been exited yet the current character
+                     * has yet to be defined - loop without moving
+                     * forward again */
+                    next = false;
+                    break;
+                }
+
+                break;
+
+            case SCE_KVIRC_NUMBER:
+
+                /* Detecting the end of a number */
+                if (!IsADigit(sc.ch))
+                {
+                    sc.SetState(SCE_KVIRC_DEFAULT);
+
+                    /* Number has been exited yet the current character
+                     * has yet to be defined - loop without moving
+                     * forward */
+                    next = false;
+                    break;
+                }
+
+                break;
+
+            case SCE_KVIRC_OPERATOR:
+
+                /* Because '%' is an operator but is also the marker for
+                 * a variable, I need to always treat operators as single
+                 * character strings and therefore redo their detection
+                 * after every character */
+                sc.SetState(SCE_KVIRC_DEFAULT);
+
+                /* Operator has been exited yet the current character
+                 * has yet to be defined - loop without moving
+                 * forward */
+                next = false;
+                break;
+
+            case SCE_KVIRC_WORD:
+
+                /* Detecting the end of a word */
+                if (!IsAWordChar(sc.ch))
+                {
+                    /* Checking if the word was actually a keyword -
+                     * fetching the current word, NULL-terminated like
+                     * the keyword list */
+                    char s[100];
+                    int wordLen = sc.currentPos - styler.GetStartSegment();
+                    if (wordLen > 99)
+                        wordLen = 99;  /* Include '\0' in buffer */
+                    int i;
+                    for( i = 0; i < wordLen; ++i )
+                    {
+                        s[i] = styler.SafeGetCharAt( styler.GetStartSegment() + i );
+                    }
+                    s[wordLen] = '\0';
+
+                    /* Actually detecting keywords and fixing the state */
+                    if (keywords.InList(s))
+                    {
+                        /* The SetState call actually commits the
+                         * previous keyword state */
+                        sc.ChangeState(SCE_KVIRC_KEYWORD);
+                    }
+                    else if (functionKeywords.InList(s))
+                    {
+                        // Detecting function keywords and fixing the state
+                        sc.ChangeState(SCE_KVIRC_FUNCTION_KEYWORD);
+                    }
+
+                    /* Transitioning to default and committing the previous
+                     * word state */
+                    sc.SetState(SCE_KVIRC_DEFAULT);
+
+                    /* Word has been exited yet the current character
+                     * has yet to be defined - loop without moving
+                     * forward again */
+                    next = false;
+                    break;
+                }
+
+                break;
+        }
+    }
+
+    /* Indicating processing is complete */
+    sc.Complete();
+}
+
+static void FoldKVIrcDoc(unsigned int startPos, int length, int /*initStyle - unused*/,
+                      WordList *[], Accessor &styler)
+{
+    /* Based on CMake's folder */
+    
+    /* Exiting if folding isnt enabled */
+    if ( styler.GetPropertyInt("fold") == 0 )
+        return;
+
+    /* Obtaining current line number*/
+    int currentLine = styler.GetLine(startPos);
+
+    /* Obtaining starting character - indentation is done on a line basis,
+     * not character */
+    unsigned int safeStartPos = styler.LineStart( currentLine );
+
+    /* Initialising current level - this is defined as indentation level
+     * in the low 12 bits, with flag bits in the upper four bits.
+     * It looks like two indentation states are maintained in the returned
+     * 32bit value - 'nextLevel' in the most-significant bits, 'currentLevel'
+     * in the least-significant bits. Since the next level is the most
+     * up to date, this must refer to the current state of indentation.
+     * So the code bitshifts the old current level out of existence to
+     * get at the actual current state of indentation
+     * Based on the LexerCPP.cxx line 958 comment */
+    int currentLevel = SC_FOLDLEVELBASE;
+    if (currentLine > 0)
+        currentLevel = styler.LevelAt(currentLine - 1) >> 16;
+    int nextLevel = currentLevel;
+
+    // Looping for characters in range
+    for (unsigned int i = safeStartPos; i < startPos + length; ++i)
+    {
+        /* Folding occurs after syntax highlighting, meaning Scintilla
+         * already knows where the comments are
+         * Fetching the current state */
+        int state = styler.StyleAt(i) & 31;
+
+        switch( styler.SafeGetCharAt(i) )
+        {
+            case '{':
+
+                /* Indenting only when the braces are not contained in
+                 * a comment */
+                if (state != SCE_KVIRC_COMMENT &&
+                    state != SCE_KVIRC_COMMENTBLOCK)
+                    ++nextLevel;
+                break;
+
+            case '}':
+
+                /* Outdenting only when the braces are not contained in
+                 * a comment */
+                if (state != SCE_KVIRC_COMMENT &&
+                    state != SCE_KVIRC_COMMENTBLOCK)
+                    --nextLevel;
+                break;
+
+            case '\n':
+            case '\r':
+
+                /* Preparing indentation information to return - combining
+                 * current and next level data */
+                int lev = currentLevel | nextLevel << 16;
+
+                /* If the next level increases the indent level, mark the
+                 * current line as a fold point - current level data is
+                 * in the least significant bits */
+                if (nextLevel > currentLevel )
+                    lev |= SC_FOLDLEVELHEADERFLAG;
+
+                /* Updating indentation level if needed */
+                if (lev != styler.LevelAt(currentLine))
+                    styler.SetLevel(currentLine, lev);
+
+                /* Updating variables */
+                ++currentLine;
+                currentLevel = nextLevel;
+
+                /* Dealing with problematic Windows newlines -
+                 * incrementing to avoid the extra newline breaking the
+                 * fold point */
+                if (styler.SafeGetCharAt(i) == '\r' &&
+                    styler.SafeGetCharAt(i + 1) == '\n')
+                    ++i;
+                break;
+        }
+    }
+
+    /* At this point the data has ended, so presumably the end of the line?
+     * Preparing indentation information to return - combining current
+     * and next level data */
+    int lev = currentLevel | nextLevel << 16;
+
+    /* If the next level increases the indent level, mark the current
+     * line as a fold point - current level data is in the least
+     * significant bits */
+    if (nextLevel > currentLevel )
+        lev |= SC_FOLDLEVELHEADERFLAG;
+
+    /* Updating indentation level if needed */
+    if (lev != styler.LevelAt(currentLine))
+        styler.SetLevel(currentLine, lev);
+}
+
+/* Registering wordlists */
+static const char *const kvircWordListDesc[] = {
+	"primary",
+	"function_keywords",
+	0
+};
+
+
+/* Registering functions and wordlists */
+LexerModule lmKVIrc(SCLEX_KVIRC, ColouriseKVIrcDoc, "kvirc", FoldKVIrcDoc,
+                    kvircWordListDesc);
diff --git a/src/Catalogue.cxx b/src/Catalogue.cxx
index af07fe57f..2e9e6381b 100644
--- a/src/Catalogue.cxx
+++ b/src/Catalogue.cxx
@@ -121,6 +121,7 @@ int Scintilla_LinkLexers() {
 	LINK_LEXER(lmHTML);
 	LINK_LEXER(lmInno);
 	LINK_LEXER(lmKix);
+	LINK_LEXER(lmKVIrc);
 	LINK_LEXER(lmLatex);
 	LINK_LEXER(lmLISP);
 	LINK_LEXER(lmLiterateHaskell);
diff --git a/win32/scintilla.mak b/win32/scintilla.mak
index 9d3e4ee7b..825c48fe0 100644
--- a/win32/scintilla.mak
+++ b/win32/scintilla.mak
@@ -128,6 +128,7 @@ LEXOBJS=\
 	$(DIR_O)\LexHTML.obj \
 	$(DIR_O)\LexInno.obj \
 	$(DIR_O)\LexKix.obj \
+	$(DIR_O)\LexKVIrc.obj \
 	$(DIR_O)\LexLaTeX.obj \
 	$(DIR_O)\LexLisp.obj \
 	$(DIR_O)\LexLout.obj \
@@ -390,6 +391,8 @@ $(DIR_O)\LexInno.obj: ..\lexers\LexInno.cxx $(LEX_HEADERS)
 
 $(DIR_O)\LexKix.obj: ..\lexers\LexKix.cxx $(LEX_HEADERS)
 
+$(DIR_O)\LexKVIrc.obj: ..\lexers\LexKVIrc.cxx $(LEX_HEADERS)
+
 $(DIR_O)\LexLaTeX.obj: ..\lexers\LexLaTeX.cxx $(LEX_HEADERS)
 
 $(DIR_O)\LexLisp.obj: ..\lexers\LexLisp.cxx $(LEX_HEADERS)
author	Neil <nyamatongwe@gmail.com>	2013-08-21 14:50:48 +1000
committer	Neil <nyamatongwe@gmail.com>	2013-08-21 14:50:48 +1000
commit	44f44d7ab98c7cf019fd42d9b34ddb19802413f7 (patch)
tree	e6ef95a0c5c830e2dc44099fbf2057cc2e0c9cb1
parent	8e6ddfd4ed7a72190796d138557ad3eb5da7b0db (diff)
download	scintilla-mirror-44f44d7ab98c7cf019fd42d9b34ddb19802413f7.tar.gz