From 8fe23071ec3f82bf2b602f2ba5edee0cf6bc6fa3 Mon Sep 17 00:00:00 2001 From: Neil Date: Mon, 17 Jul 2017 14:21:40 +1000 Subject: Documentation for style metadata. --- doc/ScintillaDoc.html | 110 +++++++++++--------------- doc/StyleMetadata.html | 204 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 249 insertions(+), 65 deletions(-) create mode 100644 doc/StyleMetadata.html diff --git a/doc/ScintillaDoc.html b/doc/ScintillaDoc.html index a04205387..c669d3c40 100644 --- a/doc/ScintillaDoc.html +++ b/doc/ScintillaDoc.html @@ -6763,7 +6763,7 @@ sptr_t CallScintilla(unsigned int iMessage, uptr_t wParam, sptr_t lParam){ purpose would exhaust the number of allowed styles quickly. This is alleviated by substyles which allow the application to determine how many sets of identifiers to allocate for each purpose. - Lexers have to explicitly support this feature by implementing the methods in ILexerWithSubStyles.

+ Lexers have to explicitly support this feature by implementing particular methods.

SCI_GETSUBSTYLEBASES(<unused>, char *styles NUL-terminated) → int
Fill styles with a byte for each style that can be split into substyles.

@@ -6797,7 +6797,7 @@ sptr_t CallScintilla(unsigned int iMessage, uptr_t wParam, sptr_t lParam){

Lexer Objects

-

Lexers are programmed as objects that implement the ILexer interface and that interact +

Lexers are programmed as objects that implement the ILexer4 interface and that interact with the document they are lexing through the IDocument interface. Previously lexers were defined by providing lexing and folding functions but creating an object to handle the interaction of a lexer with a document allows the lexer to store state information that @@ -6805,48 +6805,36 @@ sptr_t CallScintilla(unsigned int iMessage, uptr_t wParam, sptr_t lParam){ or variable declarations and style these depending on their role.

A set of helper classes allows older lexers defined by functions to be used in Scintilla.

-

ILexer

+

ILexer4

-class ILexer {
+class ILexer4 {
public:
-    virtual - int SCI_METHOD - Version() -const = - 0;
-    virtual - void SCI_METHOD - Release() -= 0;
-    virtual -const -char -* -SCI_METHOD PropertyNames() - = 0;
-    virtual - int SCI_METHOD PropertyType(const char *name) = 0;
-    virtual - const char * SCI_METHOD DescribeProperty(const char *name) = 0;
-    virtual - Sci_Position SCI_METHOD - PropertySet(const char *key, const char *val) = 0;
-    virtual - const char * SCI_METHOD - DescribeWordListSets() = 0;
-    virtual - Sci_Position SCI_METHOD - WordListSet(int n, const char *wl) = 0;
-    virtual - void SCI_METHOD - Lex(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, IDocument *pAccess) = 0;
-    virtual - void SCI_METHOD - Fold(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, IDocument *pAccess) = 0;
-    virtual - void * SCI_METHOD - PrivateCall(int operation, void *pointer) = 0;
+        virtual int SCI_METHOD Version() const = 0;
+        virtual void SCI_METHOD Release() = 0;
+        virtual const char * SCI_METHOD PropertyNames() = 0;
+        virtual int SCI_METHOD PropertyType(const char *name) = 0;
+        virtual const char * SCI_METHOD DescribeProperty(const char *name) = 0;
+        virtual Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) = 0;
+        virtual const char * SCI_METHOD DescribeWordListSets() = 0;
+        virtual Sci_Position SCI_METHOD WordListSet(int n, const char *wl) = 0;
+        virtual void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, IDocument *pAccess) = 0;
+        virtual void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position lengthDoc, int initStyle, IDocument *pAccess) = 0;
+        virtual void * SCI_METHOD PrivateCall(int operation, void *pointer) = 0;
+        virtual int SCI_METHOD LineEndTypesSupported() = 0;
+        virtual int SCI_METHOD AllocateSubStyles(int styleBase, int numberStyles) = 0;
+        virtual int SCI_METHOD SubStylesStart(int styleBase) = 0;
+        virtual int SCI_METHOD SubStylesLength(int styleBase) = 0;
+        virtual int SCI_METHOD StyleFromSubStyle(int subStyle) = 0;
+        virtual int SCI_METHOD PrimaryStyleFromStyle(int style) = 0;
+        virtual void SCI_METHOD FreeSubStyles() = 0;
+        virtual void SCI_METHOD SetIdentifiers(int style, const char *identifiers) = 0;
+        virtual int SCI_METHOD DistanceToSecondaryStyles() = 0;
+        virtual const char * SCI_METHOD GetSubStyleBases() = 0;
+        virtual int SCI_METHOD NamedStyles() = 0;
+        virtual const char * SCI_METHOD NameOfStyle(int style) = 0;
+        virtual const char * SCI_METHOD TagsOfStyle(int style) = 0;
+        virtual const char * SCI_METHOD DescriptionOfStyle(int style) = 0;
};
@@ -6855,6 +6843,12 @@ The types Sci_Position and Sci_PositionU are used for With Scintilla 4, 64-bit builds define these as 64-bit types to allow future implementation of documents larger than 2 GB.

+

+Methods that return strings as const char * are not required to maintain separate allocations indefinitely: +lexer implementations may own a single buffer that is reused for each call. +Callers should make an immediate copy of returned strings. +

+

The return values from PropertySet and WordListSet are used to indicate whether the change requires performing lexing or folding over any of the document. It is the position at which to restart lexing and folding or -1 @@ -6864,7 +6858,7 @@ optimisation could be to remember where a setting first affects the document and

Version returns an enumerated value specifying which version of the interface is implemented: -lvOriginal for ILexer and lvSubStyles for ILexerWithSubStyles.

+lvRelease4 for ILexer4. Prior to Scintilla 4.0 different values were possible.

Release is called to destroy the lexer object.

@@ -6882,29 +6876,15 @@ needs to be folded as this allowed fixing up the last line from the previous fol The new approach allows the lexer to decide whether to backtrack or to handle this more efficiently.

-

ILexerWithSubStyles

- -

-To allow lexers to report which line ends they support, and to support substyles, -Ilexer is extended to ILexerWithSubStyles. +

NamedStyles, NameOfStyle, +TagsOfStyle, and DescriptionOfStyle +are used to provide information on the set of styles used by this lexer. +NameOfStyle is the C-language identifier like "SCE_LUA_COMMENT". +TagsOfStyle is a set of tags describing the style in a standardized way like "literal string multiline raw". +A set of common tags and conventions for combining them is described here. +DescriptionOfStyle is an English description of the style like "Function or method name definition".

-
-class ILexerWithSubStyles : public ILexer {
-public:
-        virtual int SCI_METHOD LineEndTypesSupported() = 0;
-        virtual int SCI_METHOD AllocateSubStyles(int styleBase, int numberStyles) = 0;
-        virtual int SCI_METHOD SubStylesStart(int styleBase) = 0;
-        virtual int SCI_METHOD SubStylesLength(int styleBase) = 0;
-        virtual int SCI_METHOD StyleFromSubStyle(int subStyle) = 0;
-        virtual int SCI_METHOD PrimaryStyleFromStyle(int style) = 0;
-        virtual void SCI_METHOD FreeSubStyles() = 0;
-        virtual void SCI_METHOD SetIdentifiers(int style, const char *identifiers) = 0;
-        virtual int SCI_METHOD DistanceToSecondaryStyles() = 0;
-        virtual const char * SCI_METHOD GetSubStyleBases() = 0;
-};
-
-

IDocument

@@ -6967,8 +6947,8 @@ The pWidth argument can be NULL if the caller doe bytes in the character.

-

The ILexer, ILexerWithSubStyles, and IDocument interfaces may be -expanded in the future with extended versions (ILexer2...). +

The ILexer4 and IDocument interfaces may be +expanded in the future with extended versions (ILexer5...). The Version method indicates which interface is implemented and thus which methods may be called.

diff --git a/doc/StyleMetadata.html b/doc/StyleMetadata.html new file mode 100644 index 000000000..79742be24 --- /dev/null +++ b/doc/StyleMetadata.html @@ -0,0 +1,204 @@ + + + + + + + + + + Scintilla Style Metadata + + + + + + + + + +
+ Scintilla icon + + Scintilla +
+

+ Language Types +

+

+ Scintilla contains lexers for various types of languages: +

+

+

+ Some languages can be used in different ways. JavaScript is a programming language but also + the basis of JSON data files. Similarly, + Lisp s expressions can be used for both source code and data. +

+

+ Each language type has common elements such as identifiers in programming languages. + These common elements should be identified so that languages can be displayed with common + styles for these elements. + Style tags are used for this purpose in Scintilla. +

+

+ Style Tags +

+

+ Every style has a list of tags where a tag is a lower-case word containing only the common ASCII letters 'a'-'z' + such as "comment" or "operator". +

+

+ Tags are ordered from most important to least important. +

+

+ While applications may assign visual attributes for tag lists in many different ways, one reasonable technique is to + apply tag-specific attributes in reverse order so that earlier and more important tags override less important tags. + For example, the tag list "error comment documentation keyword" with + a set of tag attributes
+ { comment=fore:green,back:very-light-green,font:Serif documentation=fore:light-green error=strikethrough keyword=bold }
+ could be rendered as
+ bold,fore:light-green,back:very-light-green,font:Serif,strikethrough. +

+

+ Alternative renderings could check for multi-tag combinations like + { comment.documentation=fore:light-green comment.line=dark-green comment=green }. +

+

+ Commonly, a tag list will contain an optional embedded language; optional statuses; a base type; and a set of type modifiers:
+ embedded-language? status* base-type modifiers* +

+

Embedded language

+

+ The embedded language may be a source (client | server) followed by a language name + (javascript | php | python | basic). + This may be extended in the future with other programming languages and style-definition languages like CSS. +

+

Status

+

+ The statuses may be (error | unused | predefined | inactive).
+ The error status is used for lexical statuses that indicate errors in the source code such as unterminated quoted strings.
+ The unused status may indicate a gap in the lexical states, possibly because an old lexical class is no longer used or an upcoming lexical class may fill that position.
+ The predefined status indicates a style in the range 32.39 that is used for non-lexical purposes in Scintilla.
+ The inactive status is used for text that is not currently interpreted such as C++ code that is contained within a '#if 0' preprocessor block. +

+

Basic Types

+

+ The basic types for programming languages are (default | operator | keyword | identifier | literal | comment | preprocessor | label).
+ The default type is commonly used for spaces and tabs between tokens although it may cover other characters in some languages. +

+

+ Assembler languages add (instruction | register). to the basic types from programming languages.
+

+

+ The basic types for markup languages are (default | tag | attribute | comment | preprocessor).
+

+

+ The basic types for data languages are (default | key | data | comment).
+

+

Comments

+

+ Programming languages may differentiate between line and stream comments and treat documentation comments as distinct from other comments. + Documentation comments may be marked up with documentation keywords.
+ The additional attributes commonly used are (line | documentation | keyword | taskmarker). +

+

Literals

+

+ Programming and assembler languages contain a rich set of literals including numbers like 7 and 3.89e23; "string\n"; and nullptr + and differentiating between these is often wanted.
+ The common literal types are (numeric | boolean | string | regex | date | time | uuid | nil | compound).
+ Numeric literal types are subdivided into (integer | real).
+ String literal types may add (perhaps multiple) further attributes from (heredoc | character | escapesequence | interpolated | multiline | raw).
+

+

+ An escape sequence within an interpolated heredoc may thus be literal string heredoc escapesequence. +

+

+ List of known tags +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
attributeMarkup attribute
basicEmbedded Basic
booleanTrue or false literal
characterSingle character literal as opposed to a string literal
clientScript executed on client
commentThe standard comment type in a language: may be stream or line
compoundLiteral containing multiple subliterals such as a tuple or complex number
dataA value in a data file
dateLiteral representing a data such as '19/November/1975'
defaultStarting state commonly also used for white space
documentationComment that can be extracted into documentation
errorState indicating an invalid or erroneous element
escapesequenceParts of a string that are not literal such as '\t' for tab in C
heredocLengthy text literal marked by a word at both ends
identifierName that identifies an object or class of object
inactiveCode that is not currently interpreted
instructionMnemonic in assembler languages like 'addc'
integerNumeric literal with no fraction or exponent like '738'
interpolatedString that can contain expressions
javascriptEmbedded Javascript
keyElement which allows finding associated data
keywordReserved word with special meaning like 'while'
labelDestination for jumps in programming and assembler languages
lineDifferentiates between stream comments and line comments in languages that have both
literalFixed value in source code
multilineDifferentiates between single line and multiline elements, commonly strings
nilLiteral for the null pointer such as nullptr in C++ or NULL in C
numericLiteral number like '16'
operatorPunctuation character such as '&' or '['
phpEmbedded PHP
predefinedStyle in the range 32.39 that is used for non-lexical purposes
preprocessorElement that is recognized in an early stage of translation
pythonEmbedded Python
rawString type that avoids interpretation: may be used for regular expressions in languages without a specific regex type
realNumeric literal which may have a fraction or exponent like '3.84e-15'
regexRegular expression literal like '^[a-z]+'
registerCPU register in assembler languages
serverScript executed on server
stringSequence of characters
tagMarkup tag like '<br />'
taskmarkerWord in comment that marks future work like 'FIXME'
timeLiteral representing a time such as '9:34:31'
unusedStyle that is not currently used
uuidUniversally unique identifier often used in interface definition files which may look like '{098f2470-bae0-11cd-b579-08002b30bfeb}'
+

+ Extension +

+

+ Each element in this scheme may be extended in the future. This may be done by revising this document to provide a common approach to new features. + Individual lexers may also choose to expose unique language features through new tags. +

+

+ Translation +

+

+ Tags could be exposed directly in user interfaces or configuration languages. + However, an application may also translate these to match its naming schema. + Capitalization and punctuation could be different (like Here-Doc instead of heredoc), + terminology changed ("constant" instead of "literal"), + or human language changed from English to Chinese or Spanish. +

+

+ Starting from a common set of tags makes these modifications tractable. +

+

+ Open issues +

+

+ The C++ lexer (for example) has inactive states and dynamically allocated substyles. + These should be exposed through the metadata mechanism but are not currently. +

+ + -- cgit v1.2.3