4 files changed, 0 insertions, 2149 deletions
diff --git a/lexlua/html2.lua b/lexlua/html2.lua
deleted file mode 100644
index ad1bd9c87..000000000
--- a/lexlua/html2.lua
+++ /dev/null
@@ -1,147 +0,0 @@
--- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt.
--- HTML LPeg lexer.
-
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
-
-local lexer = l.new('html')
-
--- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-lexer:add_rule('whitespace', ws)
-
--- Comments.
-lexer:add_rule('comment',
-               token(l.COMMENT, '<!--' * (l.any - '-->')^0 * P('-->')^-1))
-
--- Doctype.
-lexer:add_rule('doctype', token('doctype', '<!' * word_match('doctype', true) *
-                                           (l.any - '>')^1 * '>'))
-lexer:add_style('doctype', l.STYLE_COMMENT)
-
--- Elements.
-local known_element = token('element', '<' * P('/')^-1 * word_match([[
-  a abbr address area article aside audio b base bdi bdo blockquote body
-  br button canvas caption cite code col colgroup content data datalist dd
-  decorator del details dfn div dl dt element em embed fieldset figcaption
-  figure footer form h1 h2 h3 h4 h5 h6 head header hr html i iframe img input
-  ins kbd keygen label legend li link main map mark menu menuitem meta meter
-  nav noscript object ol optgroup option output p param pre progress q rp rt
-  ruby s samp script section select shadow small source spacer span strong
-  style sub summary sup table tbody td template textarea tfoot th thead time
-  title tr track u ul var video wbr
-]], true))
-lexer:add_style('element', l.STYLE_KEYWORD)
-local unknown_element = token('unknown_element', '<' * P('/')^-1 * l.word)
-lexer:add_style('unknown_element', l.STYLE_KEYWORD..',italics')
-local element = known_element + unknown_element
-lexer:add_rule('element', element)
-
--- Closing tags.
-local tag_close = token('element', P('/')^-1 * '>')
-lexer:add_rule('tag_close', tag_close)
-
--- Attributes.
-local known_attribute = token('attribute', word_match([[
-  accept accept-charset accesskey action align alt async autocomplete autofocus
-  autoplay bgcolor border buffered challenge charset checked cite class code
-  codebase color cols colspan content contenteditable contextmenu controls
-  coords data data- datetime default defer dir dirname disabled download
-  draggable dropzone enctype for form headers height hidden high href hreflang
-  http-equiv icon id ismap itemprop keytype kind label lang language list
-  loop low manifest max maxlength media method min multiple name novalidate
-  open optimum pattern ping placeholder poster preload pubdate radiogroup
-  readonly rel required reversed role rows rowspan sandbox scope scoped
-  seamless selected shape size sizes span spellcheck src srcdoc srclang
-  start step style summary tabindex target title type usemap value width wrap
-]], true) + ((P('data-') + 'aria-') * (l.alnum + '-')^1))
-lexer:add_style('attribute', l.STYLE_TYPE)
-local unknown_attribute = token('unknown_attribute', l.word)
-lexer:add_style('unknown_attribute', l.STYLE_TYPE..',italics')
-local attribute = (known_attribute + unknown_attribute) * #(l.space^0 * '=')
-lexer:add_rule('attribute', attribute)
-
--- TODO: performance is terrible on large files.
-local in_tag = P(function(input, index)
-  local before = input:sub(1, index - 1)
-  local s, e = before:find('<[^>]-$'), before:find('>[^<]-$')
-  if s and e then return s > e and index or nil end
-  if s then return index end
-  return input:find('^[^<]->', index) and index or nil
-end)
-
--- Equals.
-local equals = token(l.OPERATOR, '=') --* in_tag
---lexer:add_rule('equals', equals)
-
--- Strings.
-local sq_str = l.delimited_range("'")
-local dq_str = l.delimited_range('"')
-local string = #S('\'"') * l.last_char_includes('=') *
-               token(l.STRING, sq_str + dq_str)
-lexer:add_rule('string', string)
-
--- Numbers.
-lexer:add_rule('number', #l.digit * l.last_char_includes('=') *
-                         token(l.NUMBER, l.digit^1 * P('%')^-1)) --* in_tag)
-
--- Entities.
-lexer:add_rule('entity', token('entity', '&' * (l.any - l.space - ';')^1 * ';'))
-lexer:add_style('entity', l.STYLE_COMMENT)
-
--- Fold points.
-lexer:add_fold_point('element', '<', '</')
-lexer:add_fold_point('element', '<', '/>')
-lexer:add_fold_point('unknown_element', '<', '</')
-lexer:add_fold_point('unknown_element', '<', '/>')
-lexer:add_fold_point(l.COMMENT, '<!--', '-->')
-
--- Tags that start embedded languages.
-lexer.embed_start_tag = element *
-                        (ws * attribute * ws^-1 * equals * ws^-1 * string)^0 *
-                        ws^-1 * tag_close
-lexer.embed_end_tag = element * tag_close
-
--- Embedded CSS.
-local css = l.load('css')
-local style_element = word_match('style', true)
-local css_start_rule = #(P('<') * style_element *
-                         ('>' + P(function(input, index)
-  if input:find('^%s+type%s*=%s*(["\'])text/css%1', index) then
-    return index
-  end
-end))) * lexer.embed_start_tag -- <style type="text/css">
-local css_end_rule = #('</' * style_element * ws^-1 * '>') *
-                     lexer.embed_end_tag -- </style>
-lexer:embed(css, css_start_rule, css_end_rule)
-
--- Embedded JavaScript.
-local js = l.load('javascript')
-local script_element = word_match('script', true)
-local js_start_rule = #(P('<') * script_element *
-                        ('>' + P(function(input, index)
-  if input:find('^%s+type%s*=%s*(["\'])text/javascript%1', index) then
-    return index
-  end
-end))) * lexer.embed_start_tag -- <script type="text/javascript">
-local js_end_rule = #('</' * script_element * ws^-1 * '>') *
-                    lexer.embed_end_tag -- </script>
-local js_line_comment = '//' * (l.nonnewline_esc - js_end_rule)^0
-local js_block_comment = '/*' * (l.any - '*/' - js_end_rule)^0 * P('*/')^-1
-js:modify_rule('comment', token(l.COMMENT, js_line_comment + js_block_comment))
-lexer:embed(js, js_start_rule, js_end_rule)
-
--- Embedded CoffeeScript.
-local cs = l.load('coffeescript')
-local script_element = word_match('script', true)
-local cs_start_rule = #(P('<') * script_element * P(function(input, index)
-  if input:find('^[^>]+type%s*=%s*(["\'])text/coffeescript%1', index) then
-    return index
-  end
-end)) * lexer.embed_start_tag -- <script type="text/coffeescript">
-local cs_end_rule = #('</' * script_element * ws^-1 * '>') *
-                    lexer.embed_end_tag -- </script>
-lexer:embed(cs, cs_start_rule, cs_end_rule)
-
-return lexer
diff --git a/lexlua/lexer2.lua b/lexlua/lexer2.lua
deleted file mode 100644
index b32240aab..000000000
--- a/lexlua/lexer2.lua
+++ /dev/null
@@ -1,1723 +0,0 @@
--- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt.
-
-local M = {}
-
---[=[ This comment is for LuaDoc.
----
--- Lexes Scintilla documents and source code with Lua and LPeg.
---
--- ## Overview
---
--- Lexers highlight the syntax of source code. Scintilla (the editing component
--- behind [Textadept][] and [SciTE][]) traditionally uses static, compiled C++
--- lexers which are notoriously difficult to create and/or extend. On the other
--- hand, Lua makes it easy to to rapidly create new lexers, extend existing
--- ones, and embed lexers within one another. Lua lexers tend to be more
--- readable than C++ lexers too.
---
--- Lexers are Parsing Expression Grammars, or PEGs, composed with the Lua
--- [LPeg library][]. The following table comes from the LPeg documentation and
--- summarizes all you need to know about constructing basic LPeg patterns. This
--- module provides convenience functions for creating and working with other
--- more advanced patterns and concepts.
---
--- Operator             | Description
--- ---------------------|------------
--- `lpeg.P(string)`     | Matches `string` literally.
--- `lpeg.P(`_`n`_`)`    | Matches exactly _`n`_ characters.
--- `lpeg.S(string)`     | Matches any character in set `string`.
--- `lpeg.R("`_`xy`_`")` | Matches any character between range `x` and `y`.
--- `patt^`_`n`_         | Matches at least _`n`_ repetitions of `patt`.
--- `patt^-`_`n`_        | Matches at most _`n`_ repetitions of `patt`.
--- `patt1 * patt2`      | Matches `patt1` followed by `patt2`.
--- `patt1 + patt2`      | Matches `patt1` or `patt2` (ordered choice).
--- `patt1 - patt2`      | Matches `patt1` if `patt2` does not match.
--- `-patt`              | Equivalent to `("" - patt)`.
--- `#patt`              | Matches `patt` but consumes no input.
---
--- The first part of this document deals with rapidly constructing a simple
--- lexer. The next part deals with more advanced techniques, such as custom
--- coloring and embedding lexers within one another. Following that is a
--- discussion about code folding, or being able to tell Scintilla which code
--- blocks are "foldable" (temporarily hideable from view). After that are
--- instructions on how to use LPeg lexers with the aforementioned Textadept and
--- SciTE editors. Finally there are comments on lexer performance and
--- limitations.
---
--- [LPeg library]: http://www.inf.puc-rio.br/~roberto/lpeg/lpeg.html
--- [Textadept]: http://foicica.com/textadept
--- [SciTE]: http://scintilla.org/SciTE.html
---
--- ## Lexer Basics
---
--- The *lexers/* directory contains all lexers, including your new one. Before
--- attempting to write one from scratch though, first determine if your
--- programming language is similar to any of the 100+ languages supported. If
--- so, you may be able to copy and modify that lexer, saving some time and
--- effort. The filename of your lexer should be the name of your programming
--- language in lower case followed by a *.lua* extension. For example, a new Lua
--- lexer has the name *lua.lua*.
---
--- Note: Try to refrain from using one-character language names like "c", "d",
--- or "r". For example, Scintillua uses "ansi_c", "dmd", and "rstats",
--- respectively.
---
--- ### New Lexer Template
---
--- There is a *lexers/template.txt* file that contains a simple template for a
--- new lexer. Feel free to use it, replacing the '?'s with the name of your
--- lexer:
---
---     -- ? LPeg lexer.
---
---     local l = require('lexer')
---     local token, word_match = l.token, l.word_match
---     local P, R, S = lpeg.P, lpeg.R, lpeg.S
---
---     local lexer = l.new('?')
---
---     -- Whitespace.
---     local ws = token(l.WHITESPACE, l.space^1)
---     lexer:add_rule('whitespace', ws)
---
---     return lexer
---
--- The first 3 lines of code simply define often used convenience variables. The
--- fourth and last lines [define](#lexer.new) and return the lexer object
--- Scintilla uses; they are very important and must be part of every lexer. The
--- fifth line defines something called a "token", an essential building block of
--- lexers. You will learn about tokens shortly. The sixth line defines a lexer
--- grammar rule, which you will learn about later, as well as token styles.
--- Note, however, the `local` prefix in front of variables, which is needed
--- so-as not to affect Lua's global environment. All in all, this is a minimal,
--- working lexer that you can build on.
---
--- ### Tokens
---
--- Take a moment to think about your programming language's structure. What kind
--- of key elements does it have? In the template shown earlier, one predefined
--- element all languages have is whitespace. Your language probably also has
--- elements like comments, strings, and keywords. Lexers refer to these elements
--- as "tokens". Tokens are the fundamental "building blocks" of lexers. Lexers
--- break down source code into tokens for coloring, which results in the syntax
--- highlighting familiar to you. It is up to you how specific your lexer is when
--- it comes to tokens. Perhaps only distinguishing between keywords and
--- identifiers is necessary, or maybe recognizing constants and built-in
--- functions, methods, or libraries is desirable. The Lua lexer, for example,
--- defines 11 tokens: whitespace, keywords, built-in functions, constants,
--- built-in libraries, identifiers, strings, comments, numbers, labels, and
--- operators. Even though constants, built-in functions, and built-in libraries
--- are subsets of identifiers, Lua programmers find it helpful for the lexer to
--- distinguish between them all. It is perfectly acceptable to just recognize
--- keywords and identifiers.
---
--- In a lexer, tokens consist of a token name and an LPeg pattern that matches a
--- sequence of characters recognized as an instance of that token. Create tokens
--- using the [`lexer.token()`]() function. Let us examine the "whitespace" token
--- defined in the template shown earlier:
---
---     local ws = token(l.WHITESPACE, l.space^1)
---
--- At first glance, the first argument does not appear to be a string name and
--- the second argument does not appear to be an LPeg pattern. Perhaps you
--- expected something like:
---
---     local ws = token('whitespace', S('\t\v\f\n\r ')^1)
---
--- The `lexer` (`l`) module actually provides a convenient list of common token
--- names and common LPeg patterns for you to use. Token names include
--- [`lexer.DEFAULT`](), [`lexer.WHITESPACE`](), [`lexer.COMMENT`](),
--- [`lexer.STRING`](), [`lexer.NUMBER`](), [`lexer.KEYWORD`](),
--- [`lexer.IDENTIFIER`](), [`lexer.OPERATOR`](), [`lexer.ERROR`](),
--- [`lexer.PREPROCESSOR`](), [`lexer.CONSTANT`](), [`lexer.VARIABLE`](),
--- [`lexer.FUNCTION`](), [`lexer.CLASS`](), [`lexer.TYPE`](), [`lexer.LABEL`](),
--- [`lexer.REGEX`](), and [`lexer.EMBEDDED`](). Patterns include
--- [`lexer.any`](), [`lexer.ascii`](), [`lexer.extend`](), [`lexer.alpha`](),
--- [`lexer.digit`](), [`lexer.alnum`](), [`lexer.lower`](), [`lexer.upper`](),
--- [`lexer.xdigit`](), [`lexer.cntrl`](), [`lexer.graph`](), [`lexer.print`](),
--- [`lexer.punct`](), [`lexer.space`](), [`lexer.newline`](),
--- [`lexer.nonnewline`](), [`lexer.nonnewline_esc`](), [`lexer.dec_num`](),
--- [`lexer.hex_num`](), [`lexer.oct_num`](), [`lexer.integer`](),
--- [`lexer.float`](), and [`lexer.word`](). You may use your own token names if
--- none of the above fit your language, but an advantage to using predefined
--- token names is that your lexer's tokens will inherit the universal syntax
--- highlighting color theme used by your text editor.
---
--- #### Example Tokens
---
--- So, how might you define other tokens like keywords, comments, and strings?
--- Here are some examples.
---
--- **Keywords**
---
--- Instead of matching _n_ keywords with _n_ `P('keyword_`_`n`_`')` ordered
--- choices, use another convenience function: [`lexer.word_match()`](). It is
--- much easier and more efficient to write word matches like:
---
---     local keyword = token(l.KEYWORD, l.word_match[[
---       keyword_1 keyword_2 ... keyword_n
---     ]])
---
---     local case_insensitive_keyword = token(l.KEYWORD, l.word_match([[
---       KEYWORD_1 keyword_2 ... KEYword_n
---     ]], true))
---
---     local hyphened_keyword = token(l.KEYWORD, l.word_match[[
---       keyword-1 keyword-2 ... keyword-n
---     ]])
---
--- **Comments**
---
--- Line-style comments with a prefix character(s) are easy to express with LPeg:
---
---     local shell_comment = token(l.COMMENT, '#' * l.nonnewline^0)
---     local c_line_comment = token(l.COMMENT, '//' * l.nonnewline_esc^0)
---
--- The comments above start with a '#' or "//" and go to the end of the line.
--- The second comment recognizes the next line also as a comment if the current
--- line ends with a '\' escape character.
---
--- C-style "block" comments with a start and end delimiter are also easy to
--- express:
---
---     local c_comment = token(l.COMMENT, '/*' * (l.any - '*/')^0 * P('*/')^-1)
---
--- This comment starts with a "/\*" sequence and contains anything up to and
--- including an ending "\*/" sequence. The ending "\*/" is optional so the lexer
--- can recognize unfinished comments as comments and highlight them properly.
---
--- **Strings**
---
--- It is tempting to think that a string is not much different from the block
--- comment shown above in that both have start and end delimiters:
---
---     local dq_str = '"' * (l.any - '"')^0 * P('"')^-1
---     local sq_str = "'" * (l.any - "'")^0 * P("'")^-1
---     local simple_string = token(l.STRING, dq_str + sq_str)
---
--- However, most programming languages allow escape sequences in strings such
--- that a sequence like "\\&quot;" in a double-quoted string indicates that the
--- '&quot;' is not the end of the string. The above token incorrectly matches
--- such a string. Instead, use the [`lexer.delimited_range()`]() convenience
--- function.
---
---     local dq_str = l.delimited_range('"')
---     local sq_str = l.delimited_range("'")
---     local string = token(l.STRING, dq_str + sq_str)
---
--- In this case, the lexer treats '\' as an escape character in a string
--- sequence.
---
--- **Numbers**
---
--- Most programming languages have the same format for integer and float tokens,
--- so it might be as simple as using a couple of predefined LPeg patterns:
---
---     local number = token(l.NUMBER, l.float + l.integer)
---
--- However, some languages allow postfix characters on integers.
---
---     local integer = P('-')^-1 * (l.dec_num * S('lL')^-1)
---     local number = token(l.NUMBER, l.float + l.hex_num + integer)
---
--- Your language may need other tweaks, but it is up to you how fine-grained you
--- want your highlighting to be. After all, you are not writing a compiler or
--- interpreter!
---
--- ### Rules
---
--- Programming languages have grammars, which specify valid token structure. For
--- example, comments usually cannot appear within a string. Grammars consist of
--- rules, which are simply combinations of tokens. Recall from the lexer
--- template the [`lexer:add_rule()`]() call, which adds a rule to the lexer's
--- grammar:
---
---     lexer:add_rule('whitespace', ws)
---
--- Each rule has an associated name, but rule names are completely arbitrary and
--- serve only to identify and distinguish between different rules. Rule order is
--- important: if text does not match the first rule added to the grammar, the
--- lexer tries to match the second rule added, and so on. Right now this lexer
--- simply matches whitespace tokens under a rule named "whitespace".
---
--- To illustrate the importance of rule order, here is an example of a
--- simplified Lua lexer:
---
---     lexer:add_rule('whitespace', token(l.WHITESPACE, ...))
---     lexer:add_rule('keyword', token(l.KEYWORD, ...))
---     lexer:add_rule('identifier', token(l.IDENTIFIER, ...))
---     lexer:add_rule('string', token(l.STRING, ...))
---     lexer:add_rule('comment', token(l.COMMENT, ...))
---     lexer:add_rule('number', token(l.NUMBER, ...))
---     lexer:add_rule('label', token(l.LABEL, ...))
---     lexer:add_rule('operator', token(l.OPERATOR, ...))
---
--- Note how identifiers come after keywords. In Lua, as with most programming
--- languages, the characters allowed in keywords and identifiers are in the same
--- set (alphanumerics plus underscores). If the lexer added the "identifier"
--- rule before the "keyword" rule, all keywords would match identifiers and thus
--- incorrectly highlight as identifiers instead of keywords. The same idea
--- applies to function, constant, etc. tokens that you may want to distinguish
--- between: their rules should come before identifiers.
---
--- So what about text that does not match any rules? For example in Lua, the '!'
--- character is meaningless outside a string or comment. Normally the lexer
--- skips over such text. If instead you want to highlight these "syntax errors",
--- add an additional end rule:
---
---     lexer:add_rule('whitespace', ws)
---     ...
---     lexer:add_rule('error', token(l.ERROR, l.any))
---
--- This identifies and highlights any character not matched by an existing
--- rule as a `lexer.ERROR` token.
---
--- Even though the rules defined in the examples above contain a single token,
--- rules may consist of multiple tokens. For example, a rule for an HTML tag
--- could consist of a tag token followed by an arbitrary number of attribute
--- tokens, allowing the lexer to highlight all tokens separately. That rule
--- might look something like this:
---
---     lexer:add_rule('tag', tag_start * (ws * attributes)^0 * tag_end^-1)
---
--- Note however that lexers with complex rules like these are more prone to lose
--- track of their state, especially if they span multiple lines.
---
--- ### Summary
---
--- Lexers primarily consist of tokens and grammar rules. At your disposal are a
--- number of convenience patterns and functions for rapidly creating a lexer. If
--- you choose to use predefined token names for your tokens, you do not have to
--- define how the lexer highlights them. The tokens will inherit the default
--- syntax highlighting color theme your editor uses.
---
--- ## Advanced Techniques
---
--- ### Styles and Styling
---
--- The most basic form of syntax highlighting is assigning different colors to
--- different tokens. Instead of highlighting with just colors, Scintilla allows
--- for more rich highlighting, or "styling", with different fonts, font sizes,
--- font attributes, and foreground and background colors, just to name a few.
--- The unit of this rich highlighting is called a "style". Styles are simply
--- strings of comma-separated property settings. By default, lexers associate
--- predefined token names like `lexer.WHITESPACE`, `lexer.COMMENT`,
--- `lexer.STRING`, etc. with particular styles as part of a universal color
--- theme. These predefined styles include [`lexer.STYLE_CLASS`](),
--- [`lexer.STYLE_COMMENT`](), [`lexer.STYLE_CONSTANT`](),
--- [`lexer.STYLE_ERROR`](), [`lexer.STYLE_EMBEDDED`](),
--- [`lexer.STYLE_FUNCTION`](), [`lexer.STYLE_IDENTIFIER`](),
--- [`lexer.STYLE_KEYWORD`](), [`lexer.STYLE_LABEL`](), [`lexer.STYLE_NUMBER`](),
--- [`lexer.STYLE_OPERATOR`](), [`lexer.STYLE_PREPROCESSOR`](),
--- [`lexer.STYLE_REGEX`](), [`lexer.STYLE_STRING`](), [`lexer.STYLE_TYPE`](),
--- [`lexer.STYLE_VARIABLE`](), and [`lexer.STYLE_WHITESPACE`](). Like with
--- predefined token names and LPeg patterns, you may define your own styles. At
--- their core, styles are just strings, so you may create new ones and/or modify
--- existing ones. Each style consists of the following comma-separated settings:
---
--- Setting        | Description
--- ---------------|------------
--- font:_name_    | The name of the font the style uses.
--- size:_int_     | The size of the font the style uses.
--- [not]bold      | Whether or not the font face is bold.
--- weight:_int_   | The weight or boldness of a font, between 1 and 999.
--- [not]italics   | Whether or not the font face is italic.
--- [not]underlined| Whether or not the font face is underlined.
--- fore:_color_   | The foreground color of the font face.
--- back:_color_   | The background color of the font face.
--- [not]eolfilled | Does the background color extend to the end of the line?
--- case:_char_    | The case of the font ('u': upper, 'l': lower, 'm': normal).
--- [not]visible   | Whether or not the text is visible.
--- [not]changeable| Whether the text is changeable or read-only.
---
--- Specify font colors in either "#RRGGBB" format, "0xBBGGRR" format, or the
--- decimal equivalent of the latter. As with token names, LPeg patterns, and
--- styles, there is a set of predefined color names, but they vary depending on
--- the current color theme in use. Therefore, it is generally not a good idea to
--- manually define colors within styles in your lexer since they might not fit
--- into a user's chosen color theme. Try to refrain from even using predefined
--- colors in a style because that color may be theme-specific. Instead, the best
--- practice is to either use predefined styles or derive new color-agnostic
--- styles from predefined ones. For example, Lua "longstring" tokens use the
--- existing `lexer.STYLE_STRING` style instead of defining a new one.
---
--- #### Example Styles
---
--- Defining styles is pretty straightforward. An empty style that inherits the
--- default theme settings is simply an empty string:
---
---     local style_nothing = ''
---
--- A similar style but with a bold font face looks like this:
---
---     local style_bold = 'bold'
---
--- If you want the same style, but also with an italic font face, define the new
--- style in terms of the old one:
---
---     local style_bold_italic = style_bold..',italics'
---
--- This allows you to derive new styles from predefined ones without having to
--- rewrite them. This operation leaves the old style unchanged. Thus if you
--- had a "static variable" token whose style you wanted to base off of
--- `lexer.STYLE_VARIABLE`, it would probably look like:
---
---     local style_static_var = l.STYLE_VARIABLE..',italics'
---
--- The color theme files in the *lexers/themes/* folder give more examples of
--- style definitions.
---
--- ### Token Styles
---
--- Lexers use the [`lexer:add_style()`]() function to assign styles to
--- particular tokens. Recall the token definition and from the lexer template:
---
---     local ws = token(l.WHITESPACE, l.space^1)
---     lexer:add_rule('whitespace', ws)
---
--- Why is a style not assigned to the `lexer.WHITESPACE` token? As mentioned
--- earlier, lexers automatically associate tokens that use predefined token
--- names with a particular style. Only tokens with custom token names need
--- manual style associations. As an example, consider a custom whitespace token:
---
---     local ws = token('custom_whitespace', l.space^1)
---
--- Assigning a style to this token looks like:
---
---     lexer:add_style('custom_whitespace', l.STYLE_WHITESPACE)
---
--- Do not confuse token names with rule names. They are completely different
--- entities. In the example above, the lexer associates the "custom_whitespace"
--- token with the existing style for `lexer.WHITESPACE` tokens. If instead you
--- prefer to color the background of whitespace a shade of grey, it might look
--- like:
---
---     local custom_style = l.STYLE_WHITESPACE..',back:$(color.grey)'
---     lexer:add_style('custom_whitespace', custom_style)
---
--- Notice that the lexer peforms Scintilla/SciTE-style "$()" property expansion.
--- You may also use "%()". Remember to refrain from assigning specific colors in
--- styles, but in this case, all user color themes probably define the
--- "color.grey" property.
---
--- ### Line Lexers
---
--- By default, lexers match the arbitrary chunks of text passed to them by
--- Scintilla. These chunks may be a full document, only the visible part of a
--- document, or even just portions of lines. Some lexers need to match whole
--- lines. For example, a lexer for the output of a file "diff" needs to know if
--- the line started with a '+' or '-' and then style the entire line
--- accordingly. To indicate that your lexer matches by line, create the lexer
--- with an extra parameter:
---
---     local lexer = l.new('?', {lex_by_line = true})
---
--- Now the input text for the lexer is a single line at a time. Keep in mind
--- that line lexers do not have the ability to look ahead at subsequent lines.
---
--- ### Embedded Lexers
---
--- Lexers embed within one another very easily, requiring minimal effort. In the
--- following sections, the lexer being embedded is called the "child" lexer and
--- the lexer a child is being embedded in is called the "parent". For example,
--- consider an HTML lexer and a CSS lexer. Either lexer stands alone for styling
--- their respective HTML and CSS files. However, CSS can be embedded inside
--- HTML. In this specific case, the CSS lexer is the "child" lexer with the HTML
--- lexer being the "parent". Now consider an HTML lexer and a PHP lexer. This
--- sounds a lot like the case with CSS, but there is a subtle difference: PHP
--- _embeds itself into_ HTML while CSS is _embedded in_ HTML. This fundamental
--- difference results in two types of embedded lexers: a parent lexer that
--- embeds other child lexers in it (like HTML embedding CSS), and a child lexer
--- that embeds itself into a parent lexer (like PHP embedding itself in HTML).
---
--- #### Parent Lexer
---
--- Before embedding a child lexer into a parent lexer, the parent lexer needs to
--- load the child lexer. This is done with the [`lexer.load()`]() function. For
--- example, loading the CSS lexer within the HTML lexer looks like:
---
---     local css = l.load('css')
---
--- The next part of the embedding process is telling the parent lexer when to
--- switch over to the child lexer and when to switch back. The lexer refers to
--- these indications as the "start rule" and "end rule", respectively, and are
--- just LPeg patterns. Continuing with the HTML/CSS example, the transition from
--- HTML to CSS is when the lexer encounters a "style" tag with a "type"
--- attribute whose value is "text/css":
---
---     local css_tag = P('<style') * P(function(input, index)
---       if input:find('^[^>]+type="text/css"', index) then
---         return index
---       end
---     end)
---
--- This pattern looks for the beginning of a "style" tag and searches its
--- attribute list for the text "`type="text/css"`". (In this simplified example,
--- the Lua pattern does not consider whitespace between the '=' nor does it
--- consider that using single quotes is valid.) If there is a match, the
--- functional pattern returns a value instead of `nil`. In this case, the value
--- returned does not matter because we ultimately want to style the "style" tag
--- as an HTML tag, so the actual start rule looks like this:
---
---     local css_start_rule = #css_tag * tag
---
--- Now that the parent knows when to switch to the child, it needs to know when
--- to switch back. In the case of HTML/CSS, the switch back occurs when the
--- lexer encounters an ending "style" tag, though the lexer should still style
--- the tag as an HTML tag:
---
---     local css_end_rule = #P('</style>') * tag
---
--- Once the parent loads the child lexer and defines the child's start and end
--- rules, it embeds the child with the [`lexer:embed()`]() function:
---
---     lexer:embed(css, css_start_rule, css_end_rule)
---
--- #### Child Lexer
---
--- The process for instructing a child lexer to embed itself into a parent is
--- very similar to embedding a child into a parent: first, load the parent lexer
--- into the child lexer with the [`lexer.load()`]() function and then create
--- start and end rules for the child lexer. However, in this case, call
--- [`lexer:embed()`]() with switched arguments. For example, in the PHP lexer:
---
---     local html = l.load('html')
---     local php_start_rule = token('php_tag', '<?php ')
---     local php_end_rule = token('php_tag', '?>')
---     lexer:add_style('php_tag', l.STYLE_EMBEDDED)
---     html:embed(lexer, php_start_rule, php_end_rule)
---
--- ### Lexers with Complex State
---
--- A vast majority of lexers are not stateful and can operate on any chunk of
--- text in a document. However, there may be rare cases where a lexer does need
--- to keep track of some sort of persistent state. Rather than using `lpeg.P`
--- function patterns that set state variables, it is recommended to make use of
--- Scintilla's built-in, per-line state integers via [`lexer.line_state`](). It
--- was designed to accommodate up to 32 bit flags for tracking state.
--- [`lexer.line_from_position()`]() will return the line for any position given
--- to an `lpeg.P` function pattern. (Any positions derived from that position
--- argument will also work.)
---
--- Writing stateful lexers is beyond the scope of this document.
---
--- ## Code Folding
---
--- When reading source code, it is occasionally helpful to temporarily hide
--- blocks of code like functions, classes, comments, etc. This is the concept of
--- "folding". In the Textadept and SciTE editors for example, little indicators
--- in the editor margins appear next to code that can be folded at places called
--- "fold points". When the user clicks an indicator, the editor hides the code
--- associated with the indicator until the user clicks the indicator again. The
--- lexer specifies these fold points and what code exactly to fold.
---
--- The fold points for most languages occur on keywords or character sequences.
--- Examples of fold keywords are "if" and "end" in Lua and examples of fold
--- character sequences are '{', '}', "/\*", and "\*/" in C for code block and
--- comment delimiters, respectively. However, these fold points cannot occur
--- just anywhere. For example, lexers should not recognize fold keywords that
--- appear within strings or comments. The [`lexer:add_fold_point()`]() function
--- allows you to conveniently define fold points with such granularity. For
--- example, consider C:
---
---     lexer:add_fold_point(l.OPERATOR, '{', '}')
---     lexer:add_fold_point(l.COMMENT, '/*', '*/')
---
--- The first assignment states that any '{' or '}' that the lexer recognized as
--- an `lexer.OPERATOR` token is a fold point. Likewise, the second assignment
--- states that any "/\*" or "\*/" that the lexer recognizes as part of a
--- `lexer.COMMENT` token is a fold point. The lexer does not consider any
--- occurrences of these characters outside their defined tokens (such as in a
--- string) as fold points. How do you specify fold keywords? Here is an example
--- for Lua:
---
---     lexer:add_fold_point(l.KEYWORD, 'if', 'end')
---     lexer:add_fold_point(l.KEYWORD, 'do', 'end')
---     lexer:add_fold_point(l.KEYWORD, 'function', 'end')
---     lexer:add_fold_point(l.KEYWORD, 'repeat', 'until')
---
--- If your lexer has case-insensitive keywords as fold points, simply add a
--- `case_insensitive_fold_points = true` option to [`lexer.new()`](), and
--- specify keywords in lower case.
---
--- If your lexer needs to do some additional processing in order to determine if
--- a token is a fold point, pass a function that returns an integer to
--- `lexer:add_fold_point()`. Returning `1` indicates the token is a beginning
--- fold point and returning `-1` indicates the token is an ending fold point.
--- Returning `0` indicates the token is not a fold point. For example:
---
---     local function fold_strange_token(text, pos, line, s, symbol)
---       if ... then
---         return 1 -- beginning fold point
---       elseif ... then
---         return -1 -- ending fold point
---       end
---       return 0
---     end
---
---     lexer:add_fold_point('strange_token', '|', fold_strange_token)
---
--- Any time the lexer encounters a '|' that is a "strange_token", it calls the
--- `fold_strange_token` function to determine if '|' is a fold point. The lexer
--- calls these functions with the following arguments: the text to identify fold
--- points in, the beginning position of the current line in the text to fold,
--- the current line's text, the position in the current line the fold point text
--- starts at, and the fold point text itself.
---
--- ### Fold by Indentation
---
--- Some languages have significant whitespace and/or no delimiters that indicate
--- fold points. If your lexer falls into this category and you would like to
--- mark fold points based on changes in indentation, create the lexer with a
--- `fold_by_indentation = true` option:
---
---     local lexer = l.new('?', {fold_by_indentation = true})
---
--- ## Using Lexers
---
--- ### Textadept
---
--- Put your lexer in your *~/.textadept/lexers/* directory so you do not
--- overwrite it when upgrading Textadept. Also, lexers in this directory
--- override default lexers. Thus, Textadept loads a user *lua* lexer instead of
--- the default *lua* lexer. This is convenient for tweaking a default lexer to
--- your liking. Then add a [file type][] for your lexer if necessary.
---
--- [file type]: textadept.file_types.html
---
--- ### SciTE
---
--- Create a *.properties* file for your lexer and `import` it in either your
--- *SciTEUser.properties* or *SciTEGlobal.properties*. The contents of the
--- *.properties* file should contain:
---
---     file.patterns.[lexer_name]=[file_patterns]
---     lexer.$(file.patterns.[lexer_name])=[lexer_name]
---
--- where `[lexer_name]` is the name of your lexer (minus the *.lua* extension)
--- and `[file_patterns]` is a set of file extensions to use your lexer for.
---
--- Please note that Lua lexers ignore any styling information in *.properties*
--- files. Your theme file in the *lexers/themes/* directory contains styling
--- information.
---
--- ## Considerations
---
--- ### Performance
---
--- There might be some slight overhead when initializing a lexer, but loading a
--- file from disk into Scintilla is usually more expensive. On modern computer
--- systems, I see no difference in speed between LPeg lexers and Scintilla's C++
--- ones. Optimize lexers for speed by re-arranging `lexer:add_rule()` calls so
--- that the most common rules match first. Do keep in mind that order matters
--- for similar rules.
---
--- In some cases, folding may be far more expensive than lexing, particularly
--- in lexers with a lot of potential fold points. If your lexer is exhibiting
--- signs of slowness, try disabling folding your text editor first. If that
--- speeds things up, you can try reducing the number of fold points you added,
--- overriding `lexer:fold()` with your own implementation, or simply eliminating
--- folding support from your lexer.
---
--- ### Limitations
---
--- Embedded preprocessor languages like PHP cannot completely embed in their
--- parent languages in that the parent's tokens do not support start and end
--- rules. This mostly goes unnoticed, but code like
---
---     <div id="<?php echo $id; ?>">
---
--- will not style correctly.
---
--- ### Troubleshooting
---
--- Errors in lexers can be tricky to debug. Lexers print Lua errors to
--- `io.stderr` and `_G.print()` statements to `io.stdout`. Running your editor
--- from a terminal is the easiest way to see errors as they occur.
---
--- ### Risks
---
--- Poorly written lexers have the ability to crash Scintilla (and thus its
--- containing application), so unsaved data might be lost. However, I have only
--- observed these crashes in early lexer development, when syntax errors or
--- pattern errors are present. Once the lexer actually starts styling text
--- (either correctly or incorrectly, it does not matter), I have not observed
--- any crashes.
---
--- ### Acknowledgements
---
--- Thanks to Peter Odding for his [lexer post][] on the Lua mailing list
--- that inspired me, and thanks to Roberto Ierusalimschy for LPeg.
---
--- [lexer post]: http://lua-users.org/lists/lua-l/2007-04/msg00116.html
--- @field path (string)
---   The path used to search for a lexer to load.
---   Identical in format to Lua's `package.path` string.
---   The default value is `package.path`.
--- @field DEFAULT (string)
---   The token name for default tokens.
--- @field WHITESPACE (string)
---   The token name for whitespace tokens.
--- @field COMMENT (string)
---   The token name for comment tokens.
--- @field STRING (string)
---   The token name for string tokens.
--- @field NUMBER (string)
---   The token name for number tokens.
--- @field KEYWORD (string)
---   The token name for keyword tokens.
--- @field IDENTIFIER (string)
---   The token name for identifier tokens.
--- @field OPERATOR (string)
---   The token name for operator tokens.
--- @field ERROR (string)
---   The token name for error tokens.
--- @field PREPROCESSOR (string)
---   The token name for preprocessor tokens.
--- @field CONSTANT (string)
---   The token name for constant tokens.
--- @field VARIABLE (string)
---   The token name for variable tokens.
--- @field FUNCTION (string)
---   The token name for function tokens.
--- @field CLASS (string)
---   The token name for class tokens.
--- @field TYPE (string)
---   The token name for type tokens.
--- @field LABEL (string)
---   The token name for label tokens.
--- @field REGEX (string)
---   The token name for regex tokens.
--- @field STYLE_CLASS (string)
---   The style typically used for class definitions.
--- @field STYLE_COMMENT (string)
---   The style typically used for code comments.
--- @field STYLE_CONSTANT (string)
---   The style typically used for constants.
--- @field STYLE_ERROR (string)
---   The style typically used for erroneous syntax.
--- @field STYLE_FUNCTION (string)
---   The style typically used for function definitions.
--- @field STYLE_KEYWORD (string)
---   The style typically used for language keywords.
--- @field STYLE_LABEL (string)
---   The style typically used for labels.
--- @field STYLE_NUMBER (string)
---   The style typically used for numbers.
--- @field STYLE_OPERATOR (string)
---   The style typically used for operators.
--- @field STYLE_REGEX (string)
---   The style typically used for regular expression strings.
--- @field STYLE_STRING (string)
---   The style typically used for strings.
--- @field STYLE_PREPROCESSOR (string)
---   The style typically used for preprocessor statements.
--- @field STYLE_TYPE (string)
---   The style typically used for static types.
--- @field STYLE_VARIABLE (string)
---   The style typically used for variables.
--- @field STYLE_WHITESPACE (string)
---   The style typically used for whitespace.
--- @field STYLE_EMBEDDED (string)
---   The style typically used for embedded code.
--- @field STYLE_IDENTIFIER (string)
---   The style typically used for identifier words.
--- @field STYLE_DEFAULT (string)
---   The style all styles are based off of.
--- @field STYLE_LINENUMBER (string)
---   The style used for all margins except fold margins.
--- @field STYLE_BRACELIGHT (string)
---   The style used for highlighted brace characters.
--- @field STYLE_BRACEBAD (string)
---   The style used for unmatched brace characters.
--- @field STYLE_CONTROLCHAR (string)
---   The style used for control characters.
---   Color attributes are ignored.
--- @field STYLE_INDENTGUIDE (string)
---   The style used for indentation guides.
--- @field STYLE_CALLTIP (string)
---   The style used by call tips if [`buffer.call_tip_use_style`]() is set.
---   Only the font name, size, and color attributes are used.
--- @field STYLE_FOLDDISPLAYTEXT (string)
---   The style used for fold display text.
--- @field any (pattern)
---   A pattern that matches any single character.
--- @field ascii (pattern)
---   A pattern that matches any ASCII character (codes 0 to 127).
--- @field extend (pattern)
---   A pattern that matches any ASCII extended character (codes 0 to 255).
--- @field alpha (pattern)
---   A pattern that matches any alphabetic character ('A'-'Z', 'a'-'z').
--- @field digit (pattern)
---   A pattern that matches any digit ('0'-'9').
--- @field alnum (pattern)
---   A pattern that matches any alphanumeric character ('A'-'Z', 'a'-'z',
---     '0'-'9').
--- @field lower (pattern)
---   A pattern that matches any lower case character ('a'-'z').
--- @field upper (pattern)
---   A pattern that matches any upper case character ('A'-'Z').
--- @field xdigit (pattern)
---   A pattern that matches any hexadecimal digit ('0'-'9', 'A'-'F', 'a'-'f').
--- @field cntrl (pattern)
---   A pattern that matches any control character (ASCII codes 0 to 31).
--- @field graph (pattern)
---   A pattern that matches any graphical character ('!' to '~').
--- @field print (pattern)
---   A pattern that matches any printable character (' ' to '~').
--- @field punct (pattern)
---   A pattern that matches any punctuation character ('!' to '/', ':' to '@',
---   '[' to ''', '{' to '~').
--- @field space (pattern)
---   A pattern that matches any whitespace character ('\t', '\v', '\f', '\n',
---   '\r', space).
--- @field newline (pattern)
---   A pattern that matches any set of end of line characters.
--- @field nonnewline (pattern)
---   A pattern that matches any single, non-newline character.
--- @field nonnewline_esc (pattern)
---   A pattern that matches any single, non-newline character or any set of end
---   of line characters escaped with '\'.
--- @field dec_num (pattern)
---   A pattern that matches a decimal number.
--- @field hex_num (pattern)
---   A pattern that matches a hexadecimal number.
--- @field oct_num (pattern)
---   A pattern that matches an octal number.
--- @field integer (pattern)
---   A pattern that matches either a decimal, hexadecimal, or octal number.
--- @field float (pattern)
---   A pattern that matches a floating point number.
--- @field word (pattern)
---   A pattern that matches a typical word. Words begin with a letter or
---   underscore and consist of alphanumeric and underscore characters.
--- @field FOLD_BASE (number)
---   The initial (root) fold level.
--- @field FOLD_BLANK (number)
---   Flag indicating that the line is blank.
--- @field FOLD_HEADER (number)
---   Flag indicating the line is fold point.
--- @field fold_level (table, Read-only)
---   Table of fold level bit-masks for line numbers starting from zero.
---   Fold level masks are composed of an integer level combined with any of the
---   following bits:
---
---   * `lexer.FOLD_BASE`
---     The initial fold level.
---   * `lexer.FOLD_BLANK`
---     The line is blank.
---   * `lexer.FOLD_HEADER`
---     The line is a header, or fold point.
--- @field indent_amount (table, Read-only)
---   Table of indentation amounts in character columns, for line numbers
---   starting from zero.
--- @field line_state (table)
---   Table of integer line states for line numbers starting from zero.
---   Line states can be used by lexers for keeping track of persistent states.
--- @field property (table)
---   Map of key-value string pairs.
--- @field property_expanded (table, Read-only)
---   Map of key-value string pairs with `$()` and `%()` variable replacement
---   performed in values.
--- @field property_int (table, Read-only)
---   Map of key-value pairs with values interpreted as numbers, or `0` if not
---   found.
--- @field style_at (table, Read-only)
---   Table of style names at positions in the buffer starting from 1.
-module('lexer')]=]
-
-local lpeg = require('lpeg')
-local lpeg_P, lpeg_R, lpeg_S, lpeg_V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
-local lpeg_Ct, lpeg_Cc, lpeg_Cp = lpeg.Ct, lpeg.Cc, lpeg.Cp
-local lpeg_Cmt, lpeg_C = lpeg.Cmt, lpeg.C
-local lpeg_match = lpeg.match
-
-M.path = package.path
-
-if not package.searchpath then
-  -- Searches for the given *name* in the given *path*.
-  -- This is an implementation of Lua 5.2's `package.searchpath()` function for
-  -- Lua 5.1.
-  function package.searchpath(name, path)
-    local tried = {}
-    for part in path:gmatch('[^;]+') do
-      local filename = part:gsub('%?', name)
-      local f = io.open(filename, 'r')
-      if f then
-        f:close()
-        return filename
-      end
-      tried[#tried + 1] = string.format("no file '%s'", filename)
-    end
-    return nil, table.concat(tried, '\n')
-  end
-end
-
-local string_upper = string.upper
--- Default styles.
-local default = {
-  'nothing', 'whitespace', 'comment', 'string', 'number', 'keyword',
-  'identifier', 'operator', 'error', 'preprocessor', 'constant', 'variable',
-  'function', 'class', 'type', 'label', 'regex', 'embedded'
-}
-for i = 1, #default do
-  local name, upper_name = default[i], string_upper(default[i])
-  M[upper_name], M['STYLE_'..upper_name] = name, '$(style.'..name..')'
-end
--- Predefined styles.
-local predefined = {
-  'default', 'linenumber', 'bracelight', 'bracebad', 'controlchar',
-  'indentguide', 'calltip', 'folddisplaytext'
-}
-for i = 1, #predefined do
-  local name, upper_name = predefined[i], string_upper(predefined[i])
-  M[upper_name], M['STYLE_'..upper_name] = name, '$(style.'..name..')'
-end
-
----
--- Adds pattern *rule* identified by string *id* to the ordered list of rules
--- for lexer *lexer*.
--- @param lexer The lexer to add the given rule to.
--- @param id The id associated with this rule. It does not have to be the same
---   as the name passed to `token()`.
--- @param rule The LPeg pattern of the rule.
--- @see modify_rule
--- @name add_rule
-function M.add_rule(lexer, id, rule)
-  if lexer._lexer then lexer = lexer._lexer end -- proxy; get true parent
-  if not lexer._RULES then
-    lexer._RULES = {}
-    -- Contains an ordered list (by numerical index) of rule names. This is used
-    -- in conjunction with lexer._RULES for building _TOKENRULE.
-    lexer._RULEORDER = {}
-  end
-  lexer._RULES[id] = rule
-  lexer._RULEORDER[#lexer._RULEORDER + 1] = id
-  lexer:build_grammar()
-end
-
----
--- Replaces in lexer *lexer* the existing rule identified by string *id* with
--- pattern *rule*.
--- @param lexer The lexer to modify.
--- @param id The id associated with this rule.
--- @param rule The LPeg pattern of the rule.
--- @name modify_rule
-function M.modify_rule(lexer, id, rule)
-  if lexer._lexer then lexer = lexer._lexer end -- proxy; get true parent
-  lexer._RULES[id] = rule
-  lexer:build_grammar()
-end
-
----
--- Associates string *token_name* in lexer *lexer* with Scintilla style string
--- *style*.
--- Style strings are comma-separated property settings. Available property
--- settings are:
---
---   * `font:name`: Font name.
---   * `size:int`: Font size.
---   * `bold` or `notbold`: Whether or not the font face is bold.
---   * `weight:int`: Font weight (between 1 and 999).
---   * `italics` or `notitalics`: Whether or not the font face is italic.
---   * `underlined` or `notunderlined`: Whether or not the font face is
---     underlined.
---   * `fore:color`: Font face foreground color in "#RRGGBB" or 0xBBGGRR format.
---   * `back:color`: Font face background color in "#RRGGBB" or 0xBBGGRR format.
---   * `eolfilled` or `noteolfilled`: Whether or not the background color
---     extends to the end of the line.
---   * `case:char`: Font case ('u' for uppercase, 'l' for lowercase, and 'm' for
---     mixed case).
---   * `visible` or `notvisible`: Whether or not the text is visible.
---   * `changeable` or `notchangeable`: Whether or not the text is changeable or
---     read-only.
---
--- Property settings may also contain "$(property.name)" expansions for
--- properties defined in Scintilla, theme files, etc.
--- @param lexer The lexer to add a style to.
--- @param token_name The name of the token to associated with the style.
--- @param style A style string for Scintilla.
--- @usage lexer:add_style('longstring', l.STYLE_STRING)
--- @usage lexer:add_style('deprecated_function', l.STYLE_FUNCTION..',italics')
--- @usage lexer:add_style('visible_ws',
---   l.STYLE_WHITESPACE..',back:$(color.grey)')
--- @name add_style
-function M.add_style(lexer, token_name, style)
-  local num_styles = lexer._numstyles
-  if num_styles == 32 then num_styles = num_styles + 8 end -- skip predefined
-  if num_styles >= 255 then print('Too many styles defined (255 MAX)') end
-  lexer._TOKENSTYLES[token_name], lexer._numstyles = num_styles, num_styles + 1
-  lexer._EXTRASTYLES[token_name] = style
-  -- If the lexer is a proxy or a child that embedded itself, copy this style to
-  -- the parent lexer.
-  if lexer._lexer then lexer._lexer:add_style(token_name, style) end
-end
-
----
--- Adds to lexer *lexer* a fold point whose beginning and end tokens are string
--- *token_name* tokens with string content *start_symbol* and *end_symbol*,
--- respectively.
--- In the event that *start_symbol* may or may not be a fold point depending on
--- context, and that additional processing is required, *end_symbol* may be a
--- function that ultimately returns `1` (indicating a beginning fold point),
--- `-1` (indicating an ending fold point), or `0` (indicating no fold point).
--- That function is passed the following arguments:
---
---   * `text`: The text being processed for fold points.
---   * `pos`: The position in *text* of the beginning of the line currently
---     being processed.
---   * `line`: The text of the line currently being processed.
---   * `s`: The position of *start_symbol* in *line*.
---   * `symbol`: *start_symbol* itself.
--- @param lexer The lexer to add a fold point to.
--- @param token_name The token name of text that indicates a fold point.
--- @param start_symbol The text that indicates the beginning of a fold point.
--- @param end_symbol Either the text that indicates the end of a fold point, or
---   a function that returns whether or not *start_symbol* is a beginning fold
---   point (1), an ending fold point (-1), or not a fold point at all (0).
--- @usage lexer:add_fold_point(l.OPERATOR, '{', '}')
--- @usage lexer:add_fold_point(l.KEYWORD, 'if', 'end')
--- @usage lexer:add_fold_point(l.COMMENT, '#', l.fold_line_comments('#'))
--- @usage lexer:add_fold_point('custom', function(text, pos, line, s, symbol)
---   ... end)
--- @name add_fold_point
-function M.add_fold_point(lexer, token_name, start_symbol, end_symbol)
-  if not lexer._FOLDPOINTS then lexer._FOLDPOINTS = {_SYMBOLS = {}} end
-  local symbols = lexer._FOLDPOINTS._SYMBOLS
-  if not symbols[start_symbol] then
-    symbols[#symbols + 1], symbols[start_symbol] = start_symbol, true
-  end
-  if not lexer._FOLDPOINTS[token_name] then
-    lexer._FOLDPOINTS[token_name] = {}
-  end
-  if type(end_symbol) == 'string' then
-    if not symbols[end_symbol] then
-      symbols[#symbols + 1], symbols[end_symbol] = end_symbol, true
-    end
-    lexer._FOLDPOINTS[token_name][start_symbol] = 1
-    lexer._FOLDPOINTS[token_name][end_symbol] = -1
-  else
-    lexer._FOLDPOINTS[token_name][start_symbol] = end_symbol -- function or int
-  end
-  -- If the lexer is a proxy or a child that embedded itself, copy this fold
-  -- point to the parent lexer.
-  if lexer._lexer then
-    lexer._lexer:add_fold_point(token_name, start_symbol, end_symbol)
-  end
-end
-
--- (Re)constructs `lexer._TOKENRULE`.
-local function join_tokens(lexer)
-  local patterns, order = lexer._RULES, lexer._RULEORDER
-  local token_rule = patterns[order[1]]
-  for i = 2, #order do token_rule = token_rule + patterns[order[i]] end
-  lexer._TOKENRULE = token_rule + M.token(M.DEFAULT, M.any)
-  return lexer._TOKENRULE
-end
-
--- Metatable for Scintillua grammars.
--- These grammars are just tables ultimately passed to `lpeg.P()`.
-local grammar_mt = {__index = {
-  -- Adds lexer *lexer* and any of its embedded lexers to this grammar.
-  -- @param lexer The lexer to add.
-  add_lexer = function(self, lexer)
-    local token_rule = lexer:join_tokens()
-    for i = 1, #lexer._CHILDREN do
-      local child = lexer._CHILDREN[i]
-      if child._CHILDREN then self:add_lexer(child) end
-      local rules = child._EMBEDDEDRULES[lexer._NAME]
-      local rules_token_rule = self['__'..child._NAME] or rules.token_rule
-      self[child._NAME] = (-rules.end_rule * rules_token_rule)^0 *
-                          rules.end_rule^-1 * lpeg_V(lexer._NAME)
-      local embedded_child = '_'..child._NAME
-      self[embedded_child] = rules.start_rule *
-                             (-rules.end_rule * rules_token_rule)^0 *
-                             rules.end_rule^-1
-      token_rule = lpeg_V(embedded_child) + token_rule
-    end
-    self['__'..lexer._NAME] = token_rule -- can contain embedded lexer rules
-    self[lexer._NAME] = token_rule^0
-  end
-}}
-
--- (Re)constructs `lexer._GRAMMAR`.
--- @param initial_rule The name of the rule to start lexing with. The default
---   value is `lexer._NAME`. Multilang lexers use this to start with a child
---   rule if necessary.
-local function build_grammar(lexer, initial_rule)
-  if not lexer._RULES then return end
-  if lexer._CHILDREN then
-    if not initial_rule then initial_rule = lexer._NAME end
-    local grammar = setmetatable({initial_rule}, grammar_mt)
-    grammar:add_lexer(lexer)
-    lexer._INITIALRULE = initial_rule
-    lexer._GRAMMAR = lpeg_Ct(lpeg_P(grammar))
-  else
-    lexer._GRAMMAR = lpeg_Ct(lexer:join_tokens()^0)
-  end
-end
-
----
--- Embeds child lexer *child* in parent lexer *lexer* using patterns
--- *start_rule* and *end_rule*, which signal the beginning and end of the
--- embedded lexer, respectively.
--- @param lexer The parent lexer.
--- @param child The child lexer.
--- @param start_rule The pattern that signals the beginning of the embedded
---   lexer.
--- @param end_rule The pattern that signals the end of the embedded lexer.
--- @usage html:embed(css, css_start_rule, css_end_rule)
--- @usage html:embed(lexer, php_start_rule, php_end_rule) -- from php lexer
--- @name embed
-function M.embed(lexer, child, start_rule, end_rule)
-  if lexer._lexer then lexer = lexer._lexer end -- proxy; get true parent
-  -- Add child rules.
-  if not child._EMBEDDEDRULES then child._EMBEDDEDRULES = {} end
-  if not child._RULES then error('Cannot embed lexer with no rules') end
-  child._EMBEDDEDRULES[lexer._NAME] = {
-    ['start_rule'] = start_rule,
-    token_rule = child:join_tokens(),
-    ['end_rule'] = end_rule
-  }
-  if not lexer._CHILDREN then lexer._CHILDREN = {} end
-  local children = lexer._CHILDREN
-  children[#children + 1] = child
-  -- Add child styles.
-  for token, style in pairs(child._EXTRASTYLES) do
-    lexer:add_style(token, style)
-  end
-  -- Add child fold symbols.
-  if child._FOLDPOINTS then
-    for token_name, symbols in pairs(child._FOLDPOINTS) do
-      if token_name ~= '_SYMBOLS' then
-        for symbol, v in pairs(symbols) do
-          lexer:add_fold_point(token_name, symbol, v)
-        end
-      end
-    end
-  end
-  lexer:build_grammar()
-  child._lexer = lexer -- use parent's tokens if child is embedding itself
-end
-
----
--- Lexes a chunk of text *text* (that has an initial style number of
--- *init_style*) using lexer *lexer*, returning a table of token names and
--- positions.
--- @param lexer The lexer to lex text with.
--- @param text The text in the buffer to lex.
--- @param init_style The current style. Multiple-language lexers use this to
---   determine which language to start lexing in.
--- @return table of token names and positions.
--- @name lex
-function M.lex(lexer, text, init_style)
-  if not lexer._GRAMMAR then return {M.DEFAULT, #text + 1} end
-  if not lexer._LEXBYLINE then
-    -- For multilang lexers, build a new grammar whose initial_rule is the
-    -- current language.
-    if lexer._CHILDREN then
-      for style, style_num in pairs(lexer._TOKENSTYLES) do
-        if style_num == init_style then
-          local lexer_name = style:match('^(.+)_whitespace') or lexer._NAME
-          if lexer._INITIALRULE ~= lexer_name then
-            lexer:build_grammar(lexer_name)
-          end
-          break
-        end
-      end
-    end
-    return lpeg_match(lexer._GRAMMAR, text)
-  else
-    local tokens = {}
-    local function append(tokens, line_tokens, offset)
-      for i = 1, #line_tokens, 2 do
-        tokens[#tokens + 1] = line_tokens[i]
-        tokens[#tokens + 1] = line_tokens[i + 1] + offset
-      end
-    end
-    local offset = 0
-    local grammar = lexer._GRAMMAR
-    for line in text:gmatch('[^\r\n]*\r?\n?') do
-      local line_tokens = lpeg_match(grammar, line)
-      if line_tokens then append(tokens, line_tokens, offset) end
-      offset = offset + #line
-      -- Use the default style to the end of the line if none was specified.
-      if tokens[#tokens] ~= offset then
-        tokens[#tokens + 1], tokens[#tokens + 2] = 'default', offset + 1
-      end
-    end
-    return tokens
-  end
-end
-
----
--- Determines fold points in a chunk of text *text* using lexer *lexer*,
--- returning a table of fold levels associated with line numbers.
--- *text* starts at position *start_pos* on line number *start_line* with a
--- beginning fold level of *start_level* in the buffer.
--- @param lexer The lexer to fold text with.
--- @param text The text in the buffer to fold.
--- @param start_pos The position in the buffer *text* starts at, starting at
---   zero.
--- @param start_line The line number *text* starts on.
--- @param start_level The fold level *text* starts on.
--- @return table of fold levels associated with line numbers.
--- @name fold
-function M.fold(lexer, text, start_pos, start_line, start_level)
-  local folds = {}
-  if text == '' then return folds end
-  local fold = M.property_int['fold'] > 0
-  local FOLD_BASE = M.FOLD_BASE
-  local FOLD_HEADER, FOLD_BLANK  = M.FOLD_HEADER, M.FOLD_BLANK
-  if fold and lexer._FOLDPOINTS then
-    local lines = {}
-    for p, l in (text..'\n'):gmatch('()(.-)\r?\n') do
-      lines[#lines + 1] = {p, l}
-    end
-    local fold_zero_sum_lines = M.property_int['fold.on.zero.sum.lines'] > 0
-    local fold_points = lexer._FOLDPOINTS
-    local fold_point_symbols = fold_points._SYMBOLS
-    local style_at, fold_level = M.style_at, M.fold_level
-    local line_num, prev_level = start_line, start_level
-    local current_level = prev_level
-    for i = 1, #lines do
-      local pos, line = lines[i][1], lines[i][2]
-      if line ~= '' then
-        if lexer._CASEINSENSITIVEFOLDPOINTS then line = line:lower() end
-        local level_decreased = false
-        for j = 1, #fold_point_symbols do
-          local symbol = fold_point_symbols[j]
-          local word = not symbol:find('[^%w_]')
-          local s, e = line:find(symbol, 1, true)
-          while s and e do
-            --if not word or line:find('^%f[%w_]'..symbol..'%f[^%w_]', s) then
-            if not word or not ((s > 1 and line:find('^[%w_]', s - 1)) or
-                                line:find('^[%w_]', e + 1)) then
-              local symbols = fold_points[style_at[start_pos + pos + s - 1]]
-              local level = symbols and symbols[symbol]
-              if type(level) == 'function' then
-                level = level(text, pos, line, s, symbol)
-              end
-              if type(level) == 'number' then
-                current_level = current_level + level
-                if level < 0 and current_level < prev_level then
-                  -- Potential zero-sum line. If the level were to go back up on
-                  -- the same line, the line may be marked as a fold header.
-                  level_decreased = true
-                end
-              end
-            end
-            s = line:find(fold_point_symbols[j], s + 1, true)
-          end
-        end
-        folds[line_num] = prev_level
-        if current_level > prev_level then
-          folds[line_num] = prev_level + FOLD_HEADER
-        elseif level_decreased and current_level == prev_level and
-               fold_zero_sum_lines then
-          if line_num > start_line then
-            folds[line_num] = prev_level - 1 + FOLD_HEADER
-          else
-            -- Typing within a zero-sum line.
-            local level = fold_level[line_num - 1] - 1
-            if level > FOLD_HEADER then level = level - FOLD_HEADER end
-            if level > FOLD_BLANK then level = level - FOLD_BLANK end
-            folds[line_num] = level + FOLD_HEADER
-            current_level = current_level + 1
-          end
-        end
-        if current_level < FOLD_BASE then current_level = FOLD_BASE end
-        prev_level = current_level
-      else
-        folds[line_num] = prev_level + FOLD_BLANK
-      end
-      line_num = line_num + 1
-    end
-  elseif fold and (lexer._FOLDBYINDENTATION or
-                   M.property_int['fold.by.indentation'] > 0) then
-    -- Indentation based folding.
-    -- Calculate indentation per line.
-    local indentation = {}
-    for indent, line in (text..'\n'):gmatch('([\t ]*)([^\r\n]*)\r?\n') do
-      indentation[#indentation + 1] = line ~= '' and #indent
-    end
-    -- Find the first non-blank line before start_line. If the current line is
-    -- indented, make that previous line a header and update the levels of any
-    -- blank lines inbetween. If the current line is blank, match the level of
-    -- the previous non-blank line.
-    local current_level = start_level
-    for i = start_line - 1, 0, -1 do
-      local level = M.fold_level[i]
-      if level >= FOLD_HEADER then level = level - FOLD_HEADER end
-      if level < FOLD_BLANK then
-        local indent = M.indent_amount[i]
-        if indentation[1] and indentation[1] > indent then
-          folds[i] = FOLD_BASE + indent + FOLD_HEADER
-          for j = i + 1, start_line - 1 do
-            folds[j] = start_level + FOLD_BLANK
-          end
-        elseif not indentation[1] then
-          current_level = FOLD_BASE + indent
-        end
-        break
-      end
-    end
-    -- Iterate over lines, setting fold numbers and fold flags.
-    for i = 1, #indentation do
-      if indentation[i] then
-        current_level = FOLD_BASE + indentation[i]
-        folds[start_line + i - 1] = current_level
-        for j = i + 1, #indentation do
-          if indentation[j] then
-            if FOLD_BASE + indentation[j] > current_level then
-              folds[start_line + i - 1] = current_level + FOLD_HEADER
-              current_level = FOLD_BASE + indentation[j] -- for any blanks below
-            end
-            break
-          end
-        end
-      else
-        folds[start_line + i - 1] = current_level + FOLD_BLANK
-      end
-    end
-  else
-    -- No folding, reset fold levels if necessary.
-    local current_line = start_line
-    for _ in text:gmatch('\r?\n') do
-      folds[current_line] = start_level
-      current_line = current_line + 1
-    end
-  end
-  return folds
-end
-
----
--- Creates a returns a new lexer with the given name.
--- @param name The lexer's name.
--- @param opts Table of lexer options. Options currently supported:
---   * `lex_by_line`: Whether or not the lexer only processes whole lines of
---     text (instead of arbitrary chunks of text) at a time.
---     Line lexers cannot look ahead to subsequent lines.
---     The default value is `false`.
---   * `fold_by_indentation`: Whether or not the lexer does not define any fold
---     points and that fold points should be calculated based on changes in line
---     indentation.
---     The default value is `false`.
---   * `case_insensitive_fold_points`: Whether or not fold points added via
---     `lexer:add_fold_point()` ignore case.
---     The default value is `false`.
---   * `inherit`: Lexer to inherit from.
---     The default value is `nil`.
--- @usage l.new('rhtml', {inherit = l.load('html')})
--- @name new
-function M.new(name, opts)
-  local lexer = {
-    _NAME = assert(name, 'lexer name expected'),
-    _LEXBYLINE = opts and opts['lex_by_line'],
-    _FOLDBYINDENTATION = opts and opts['fold_by_indentation'],
-    _CASEINSENSITIVEFOLDPOINTS = opts and opts['case_insensitive_fold_points'],
-    _lexer = opts and opts['inherit']
-  }
-
-  -- Create the initial maps for token names to style numbers and styles.
-  local token_styles = {}
-  for i = 1, #default do token_styles[default[i]] = i - 1 end
-  for i = 1, #predefined do token_styles[predefined[i]] = i + 31 end
-  lexer._TOKENSTYLES, lexer._numstyles = token_styles, #default
-  lexer._EXTRASTYLES = {}
-
-  return setmetatable(lexer, {__index = {
-    add_rule = M.add_rule, modify_rule = M.modify_rule, add_style = M.add_style,
-    add_fold_point = M.add_fold_point, join_tokens = join_tokens,
-    build_grammar = build_grammar, embed = M.embed, lex = M.lex, fold = M.fold
-  }})
-end
-
--- Legacy support for older lexers.
--- Processes the `lexer._rules`, `lexer._tokenstyles`, and `lexer._foldsymbols`
--- tables.
--- Since legacy lexers may be processed up to twice, ensure their default styles
--- and rules are not processed more than once.
-local function process_legacy_lexer(lexer)
-  local function warn(msg) --[[io.stderr:write(msg, "\n")]] end
-  if not lexer._LEGACY then
-    lexer._LEGACY = true
-    warn("lexers as tables are deprecated; use 'lexer.new()'")
-    local token_styles = {}
-    for i = 1, #default do token_styles[default[i]] = i - 1 end
-    for i = 1, #predefined do token_styles[predefined[i]] = i + 31 end
-    lexer._TOKENSTYLES, lexer._numstyles = token_styles, #default
-    lexer._EXTRASTYLES = {}
-    setmetatable(lexer, getmetatable(M.new('')))
-    if lexer._rules then
-      warn("lexer '_rules' table is deprecated; use 'add_rule()'")
-      for i = 1, #lexer._rules do
-        lexer:add_rule(lexer._rules[i][1], lexer._rules[i][2])
-      end
-    end
-  end
-  if lexer._tokenstyles then
-    warn("lexer '_tokenstyles' table is deprecated; use 'add_style()'")
-    for token, style in pairs(lexer._tokenstyles) do
-      -- If this legacy lexer is being processed a second time, only add styles
-      -- added since the first processing.
-      if not lexer._TOKENSTYLES[token] then lexer:add_style(token, style) end
-    end
-  end
-  if lexer._foldsymbols then
-    warn("lexer '_foldsymbols' table is deprecated; use 'add_fold_point()'")
-    for token_name, symbols in pairs(lexer._foldsymbols) do
-      if type(symbols) == 'table' and token_name ~= '_patterns' then
-        for symbol, v in pairs(symbols) do
-          lexer:add_fold_point(token_name, symbol, v)
-        end
-      end
-    end
-    if lexer._foldsymbols._case_insensitive then
-      lexer._CASEINSENSITIVEFOLDPOINTS = true
-    end
-  end
-end
-
-local lexers = {} -- cache of loaded lexers
----
--- Initializes or loads and returns the lexer of string name *name*.
--- Scintilla calls this function in order to load a lexer. Parent lexers also
--- call this function in order to load child lexers and vice-versa. The user
--- calls this function in order to load a lexer when using Scintillua as a Lua
--- library.
--- @param name The name of the lexing language.
--- @param alt_name The alternate name of the lexing language. This is useful for
---   embedding the same child lexer with multiple sets of start and end tokens.
--- @param cache Flag indicating whether or not to load lexers from the cache.
---   This should only be `true` when initially loading a lexer (e.g. not from
---   within another lexer for embedding purposes).
---   The default value is `false`.
--- @return lexer object
--- @name load
-function M.load(name, alt_name, cache)
-  if cache and lexers[alt_name or name] then return lexers[alt_name or name] end
-
-  -- When using Scintillua as a stand-alone module, the `property` and
-  -- `property_int` tables do not exist (they are not useful). Create them in
-  -- order prevent errors from occurring.
-  if not M.property then
-    M.property, M.property_int = {}, setmetatable({}, {
-      __index = function(t, k) return tonumber(M.property[k]) or 0 end,
-      __newindex = function() error('read-only property') end
-    })
-  end
-
-  -- Load the language lexer with its rules, styles, etc.
-  -- However, replace the default `WHITESPACE` style name with a unique
-  -- whitespace style name (and then automatically add it afterwards), since
-  -- embedded lexing relies on these unique whitespace style names. Note that
-  -- loading embedded lexers changes `WHITESPACE` again, so when adding it
-  -- later, do not reference the potentially incorrect value.
-  M.WHITESPACE = (alt_name or name)..'_whitespace'
-  local lexer = dofile(assert(package.searchpath(name, M.path)))
-  assert(lexer, string.format("'%s.lua' did not return a lexer", name))
-  if alt_name then lexer._NAME = alt_name end
-  if not getmetatable(lexer) or lexer._LEGACY then
-    -- A legacy lexer may need to be processed a second time in order to pick up
-    -- any `_tokenstyles` or `_foldsymbols` added after `l.embed_lexer()`.
-    process_legacy_lexer(lexer)
-    if lexer._lexer and lexer._lexer._LEGACY then
-      process_legacy_lexer(lexer._lexer) -- mainly for `_foldsymbols` edits
-    end
-  end
-  lexer:add_style((alt_name or name)..'_whitespace', M.STYLE_WHITESPACE)
-
-  -- If the lexer is a proxy or a child that embedded itself, set the parent to
-  -- be the main lexer.
-  if lexer._lexer then lexer = lexer._lexer end
-
-  lexers[alt_name or name] = lexer
-  return lexer
-end
-
--- The following are utility functions lexers will have access to.
-
--- Common patterns.
-M.any = lpeg_P(1)
-M.ascii = lpeg_R('\000\127')
-M.extend = lpeg_R('\000\255')
-M.alpha = lpeg_R('AZ', 'az')
-M.digit = lpeg_R('09')
-M.alnum = lpeg_R('AZ', 'az', '09')
-M.lower = lpeg_R('az')
-M.upper = lpeg_R('AZ')
-M.xdigit = lpeg_R('09', 'AF', 'af')
-M.cntrl = lpeg_R('\000\031')
-M.graph = lpeg_R('!~')
-M.print = lpeg_R(' ~')
-M.punct = lpeg_R('!/', ':@', '[\'', '{~')
-M.space = lpeg_S('\t\v\f\n\r ')
-
-M.newline = lpeg_S('\r\n\f')^1
-M.nonnewline = 1 - M.newline
-M.nonnewline_esc = 1 - (M.newline + '\\') + '\\' * M.any
-
-M.dec_num = M.digit^1
-M.hex_num = '0' * lpeg_S('xX') * M.xdigit^1
-M.oct_num = '0' * lpeg_R('07')^1
-M.integer = lpeg_S('+-')^-1 * (M.hex_num + M.oct_num + M.dec_num)
-M.float = lpeg_S('+-')^-1 *
-          ((M.digit^0 * '.' * M.digit^1 + M.digit^1 * '.' * M.digit^0) *
-           (lpeg_S('eE') * lpeg_S('+-')^-1 * M.digit^1)^-1 +
-           (M.digit^1 * lpeg_S('eE') * lpeg_S('+-')^-1 * M.digit^1))
-
-M.word = (M.alpha + '_') * (M.alnum + '_')^0
-
----
--- Creates and returns a token pattern with token name *name* and pattern
--- *patt*.
--- If *name* is not a predefined token name, its style must be defined in the
--- lexer's `_tokenstyles` table.
--- @param name The name of token. If this name is not a predefined token name,
---   then a style needs to be assiciated with it in the lexer's `_tokenstyles`
---   table.
--- @param patt The LPeg pattern associated with the token.
--- @return pattern
--- @usage local ws = token(l.WHITESPACE, l.space^1)
--- @usage local annotation = token('annotation', '@' * l.word)
--- @name token
-function M.token(name, patt)
-  return lpeg_Cc(name) * patt * lpeg_Cp()
-end
-
----
--- Creates and returns a pattern that matches a range of text bounded by
--- *chars* characters.
--- This is a convenience function for matching more complicated delimited ranges
--- like strings with escape characters and balanced parentheses. *single_line*
--- indicates whether or not the range must be on a single line, *no_escape*
--- indicates whether or not to ignore '\' as an escape character, and *balanced*
--- indicates whether or not to handle balanced ranges like parentheses and
--- requires *chars* to be composed of two characters.
--- @param chars The character(s) that bound the matched range.
--- @param single_line Optional flag indicating whether or not the range must be
---   on a single line.
--- @param no_escape Optional flag indicating whether or not the range end
---   character may be escaped by a '\\' character.
--- @param balanced Optional flag indicating whether or not to match a balanced
---   range, like the "%b" Lua pattern. This flag only applies if *chars*
---   consists of two different characters (e.g. "()").
--- @return pattern
--- @usage local dq_str_escapes = l.delimited_range('"')
--- @usage local dq_str_noescapes = l.delimited_range('"', false, true)
--- @usage local unbalanced_parens = l.delimited_range('()')
--- @usage local balanced_parens = l.delimited_range('()', false, false, true)
--- @see nested_pair
--- @name delimited_range
-function M.delimited_range(chars, single_line, no_escape, balanced)
-  local s = chars:sub(1, 1)
-  local e = #chars == 2 and chars:sub(2, 2) or s
-  local range
-  local b = balanced and s or ''
-  local n = single_line and '\n' or ''
-  if no_escape then
-    local invalid = lpeg_S(e..n..b)
-    range = M.any - invalid
-  else
-    local invalid = lpeg_S(e..n..b) + '\\'
-    range = M.any - invalid + '\\' * M.any
-  end
-  if balanced and s ~= e then
-    return lpeg_P{s * (range + lpeg_V(1))^0 * e}
-  else
-    return s * range^0 * lpeg_P(e)^-1
-  end
-end
-
----
--- Creates and returns a pattern that matches pattern *patt* only at the
--- beginning of a line.
--- @param patt The LPeg pattern to match on the beginning of a line.
--- @return pattern
--- @usage local preproc = token(l.PREPROCESSOR, l.starts_line('#') *
---   l.nonnewline^0)
--- @name starts_line
-function M.starts_line(patt)
-  return lpeg_Cmt(lpeg_C(patt), function(input, index, match, ...)
-    local pos = index - #match
-    if pos == 1 then return index, ... end
-    local char = input:sub(pos - 1, pos - 1)
-    if char == '\n' or char == '\r' or char == '\f' then return index, ... end
-  end)
-end
-
----
--- Creates and returns a pattern that verifies that string set *s* contains the
--- first non-whitespace character behind the current match position.
--- @param s String character set like one passed to `lpeg.S()`.
--- @return pattern
--- @usage local regex = l.last_char_includes('+-*!%^&|=,([{') *
---   l.delimited_range('/')
--- @name last_char_includes
-function M.last_char_includes(s)
-  s = '['..s:gsub('[-%%%[]', '%%%1')..']'
-  return lpeg_P(function(input, index)
-    if index == 1 then return index end
-    local i = index
-    while input:sub(i - 1, i - 1):match('[ \t\r\n\f]') do i = i - 1 end
-    if input:sub(i - 1, i - 1):match(s) then return index end
-  end)
-end
-
----
--- Returns a pattern that matches a balanced range of text that starts with
--- string *start_chars* and ends with string *end_chars*.
--- With single-character delimiters, this function is identical to
--- `delimited_range(start_chars..end_chars, false, true, true)`.
--- @param start_chars The string starting a nested sequence.
--- @param end_chars The string ending a nested sequence.
--- @return pattern
--- @usage local nested_comment = l.nested_pair('/*', '*/')
--- @see delimited_range
--- @name nested_pair
-function M.nested_pair(start_chars, end_chars)
-  local s, e = start_chars, lpeg_P(end_chars)^-1
-  return lpeg_P{s * (M.any - s - end_chars + lpeg_V(1))^0 * e}
-end
-
----
--- Creates and returns a pattern that matches any single word in string *words*.
--- *case_insensitive* indicates whether or not to ignore case when matching
--- words.
--- This is a convenience function for simplifying a set of ordered choice word
--- patterns.
--- @param words A string list of words separated by spaces.
--- @param case_insensitive Optional boolean flag indicating whether or not the
---   word match is case-insensitive. The default value is `false`.
--- @param word_chars Unused legacy parameter.
--- @return pattern
--- @usage local keyword = token(l.KEYWORD, word_match[[foo bar baz]])
--- @usage local keyword = token(l.KEYWORD, word_match([[foo-bar foo-baz
---   bar-foo bar-baz baz-foo baz-bar]], true))
--- @name word_match
-function M.word_match(words, case_insensitive, word_chars)
-  local word_list = {}
-  if type(words) == 'table' then
-    -- Legacy `word_match(word_list, word_chars, case_insensitive)` form.
-    words = table.concat(words, ' ')
-    word_chars, case_insensitive = case_insensitive, word_chars
-  end
-  for word in words:gmatch('%S+') do
-    word_list[case_insensitive and word:lower() or word] = true
-    for char in word:gmatch('[^%w_]') do
-      if not (word_chars or ''):find(char, 1, true) then
-        word_chars = (word_chars or '')..char
-      end
-    end
-  end
-  local chars = M.alnum + '_'
-  if (word_chars or '') ~= '' then chars = chars + lpeg_S(word_chars) end
-  return lpeg_Cmt(chars^1, function(input, index, word)
-    if case_insensitive then word = word:lower() end
-    return word_list[word] and index or nil
-  end)
-end
-
--- Deprecated legacy function. Use `parent:embed()` instead.
--- Embeds child lexer *child* in parent lexer *parent* using patterns
--- *start_rule* and *end_rule*, which signal the beginning and end of the
--- embedded lexer, respectively.
--- @param parent The parent lexer.
--- @param child The child lexer.
--- @param start_rule The pattern that signals the beginning of the embedded
---   lexer.
--- @param end_rule The pattern that signals the end of the embedded lexer.
--- @usage l.embed_lexer(M, css, css_start_rule, css_end_rule)
--- @usage l.embed_lexer(html, M, php_start_rule, php_end_rule)
--- @usage l.embed_lexer(html, ruby, ruby_start_rule, ruby_end_rule)
--- @see embed
--- @name embed_lexer
-function M.embed_lexer(parent, child, start_rule, end_rule)
-  if not getmetatable(parent) then process_legacy_lexer(parent) end
-  if not getmetatable(child) then process_legacy_lexer(child) end
-  parent:embed(child, start_rule, end_rule)
-end
-
--- Determines if the previous line is a comment.
--- This is used for determining if the current comment line is a fold point.
--- @param prefix The prefix string defining a comment.
--- @param text The text passed to a fold function.
--- @param pos The pos passed to a fold function.
--- @param line The line passed to a fold function.
--- @param s The s passed to a fold function.
-local function prev_line_is_comment(prefix, text, pos, line, s)
-  local start = line:find('%S')
-  if start < s and not line:find(prefix, start, true) then return false end
-  local p = pos - 1
-  if text:sub(p, p) == '\n' then
-    p = p - 1
-    if text:sub(p, p) == '\r' then p = p - 1 end
-    if text:sub(p, p) ~= '\n' then
-      while p > 1 and text:sub(p - 1, p - 1) ~= '\n' do p = p - 1 end
-      while text:sub(p, p):find('^[\t ]$') do p = p + 1 end
-      return text:sub(p, p + #prefix - 1) == prefix
-    end
-  end
-  return false
-end
-
--- Determines if the next line is a comment.
--- This is used for determining if the current comment line is a fold point.
--- @param prefix The prefix string defining a comment.
--- @param text The text passed to a fold function.
--- @param pos The pos passed to a fold function.
--- @param line The line passed to a fold function.
--- @param s The s passed to a fold function.
-local function next_line_is_comment(prefix, text, pos, line, s)
-  local p = text:find('\n', pos + s)
-  if p then
-    p = p + 1
-    while text:sub(p, p):find('^[\t ]$') do p = p + 1 end
-    return text:sub(p, p + #prefix - 1) == prefix
-  end
-  return false
-end
-
----
--- Returns a fold function (to be passed to `lexer:add_fold_point()`) that folds
--- consecutive line comments that start with string *prefix*.
--- @param prefix The prefix string defining a line comment.
--- @usage lexer:add_fold_point(l.COMMENT, '--', l.fold_line_comments('--'))
--- @usage lexer:add_fold_point(l.COMMENT, '//', l.fold_line_comments('//'))
--- @name fold_line_comments
-function M.fold_line_comments(prefix)
-  local property_int = M.property_int
-  return function(text, pos, line, s)
-    if property_int['fold.line.comments'] == 0 then return 0 end
-    if s > 1 and line:match('^%s*()') < s then return 0 end
-    local prev_line_comment = prev_line_is_comment(prefix, text, pos, line, s)
-    local next_line_comment = next_line_is_comment(prefix, text, pos, line, s)
-    if not prev_line_comment and next_line_comment then return 1 end
-    if prev_line_comment and not next_line_comment then return -1 end
-    return 0
-  end
-end
-
-M.property_expanded = setmetatable({}, {
-  -- Returns the string property value associated with string property *key*,
-  -- replacing any "$()" and "%()" expressions with the values of their keys.
-  __index = function(t, key)
-    return M.property[key]:gsub('[$%%]%b()', function(key)
-      return t[key:sub(3, -2)]
-    end)
-  end,
-  __newindex = function() error('read-only property') end
-})
-
---[[ The functions and fields below were defined in C.
-
----
--- Returns the line number of the line that contains position *pos*, which
--- starts from 1.
--- @param pos The position to get the line number of.
--- @return number
-local function line_from_position(pos) end
-]]
-
-return M
diff --git a/lexlua/mumps.lua b/lexlua/mumps.lua
deleted file mode 100644
index 8a7d7d8f1..000000000
--- a/lexlua/mumps.lua
+++ /dev/null
@@ -1,112 +0,0 @@
--- Copyright 2015-2018 Mitchell mitchell.att.foicica.com. See License.txt.
--- MUMPS (M) LPeg lexer.
-
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
-
-local M = {_NAME = 'mumps'}
-
--- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local comment = token(l.COMMENT, ';' * l.nonnewline_esc^0)
-
--- Strings.
-local string = token(l.STRING, l.delimited_range('"', true))
-
--- Numbers.
-local number = token(l.NUMBER, l.float + l.integer) -- TODO: float?
-
--- Keywords.
-local keyword = token(l.KEYWORD, word_match({
-  -- Abbreviations.
-  'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'q',
-  'r', 's', 'u', 'v', 'w', 'x',
-  -- Full.
-  'break', 'close', 'do', 'else', 'for', 'goto', 'halt', 'hang', 'if', 'job',
-  'kill', 'lock', 'merge', 'new', 'open', 'quit', 'read', 'set', 'use', 'view',
-  'write', 'xecute',
-  -- Cache- or GTM-specific.
-  'catch', 'continue', 'elseif', 'tcommit', 'throw', 'trollback', 'try',
-  'tstart', 'while',
-}, nil, true))
-
--- Functions.
-local func = token(l.FUNCTION, '$' * word_match({
-  -- Abbreviations.
-  'a', 'c', 'd', 'e', 'f', 'fn', 'g', 'j', 'l', 'n', 'na', 'o', 'p', 'q', 'ql',
-  'qs', 'r', 're', 's', 'st', 't', 'tr', 'v',
-  -- Full.
-  'ascii', 'char', 'data', 'extract', 'find', 'fnumber', 'get', 'justify',
-  'length', 'next', 'name', 'order', 'piece', 'query', 'qlength', 'qsubscript',
-  'random', 'reverse', 'select', 'stack', 'text', 'translate', 'view',
-  -- Z function abbreviations.
-  'zd', 'zdh', 'zdt', 'zdth', 'zh', 'zt', 'zth', 'zu', 'zp',
-  -- Z functions.
-  'zabs', 'zarccos', 'zarcsin', 'zarctan', 'zcos', 'zcot', 'zcsc', 'zdate',
-  'zdateh', 'zdatetime', 'zdatetimeh', 'zexp', 'zhex', 'zln', 'zlog', 'zpower',
-  'zsec', 'zsin', 'zsqr', 'ztan', 'ztime', 'ztimeh', 'zutil', 'zf', 'zprevious',
-  -- Cache- or GTM-specific.
-  'bit', 'bitcount', 'bitfind', 'bitlogic', 'case', 'classmethod', 'classname',
-  'decimal', 'double', 'factor', 'i', 'increment', 'inumber', 'isobject',
-  'isvaliddouble', 'isvalidnum', 'li', 'list', 'lb', 'listbuild', 'ld',
-  'listdata', 'lf', 'listfind', 'lfs', 'listfromstring', 'lg', 'listget', 'll',
-  'listlength', 'listnext', 'ls', 'listsame', 'lts', 'listtostring', 'lv',
-  'listvalid', 'locate', 'match', 'method', 'nc', 'nconvert', 'normalize',
-  'now', 'num', 'number', 'parameter', 'prefetchoff', 'prefetchon', 'property',
-  'replace', 'sc', 'sconvert', 'sortbegin', 'sortend', 'wa', 'wascii', 'wc',
-  'wchar', 'we', 'wextract', 'wf', 'wfind', 'wiswide', 'wl', 'wlength', 'wre',
-  'wreverse', 'xecute'
-}, nil, true))
-
--- Variables.
-local variable = token(l.VARIABLE, '$' * l.word_match({
-  -- Abbreviations.
-  'ec', 'es', 'et', 'h', 'i', 'j', 'k', 'p', 'q', 's', 'st', 't', 'tl',
-  -- Full.
-  'device', 'ecode', 'estack', 'etrap', 'halt', 'horolog', 'io', 'job',
-  'namespace', 'principal', 'quit', 'roles', 'storage', 'stack', 'system',
-  'test', 'this', 'tlevel', 'username', 'x', 'y',
-  -- Z variable abbreviations.
-  'za', 'zb', 'zc', 'ze', 'zh', 'zi', 'zj', 'zm', 'zn', 'zo', 'zp', 'zr', 'zs',
-  'zt', 'zts', 'ztz', 'zv',
-  -- Z variables.
-  'zchild', 'zeof', 'zerror', 'zhorolog', 'zio', 'zjob', 'zmode', 'zname',
-  'znspace', 'zorder', 'zparent', 'zpi', 'zpos', 'zreference', 'zstorage',
-  'ztimestamp', 'ztimezone', 'ztrap', 'zversion',
-}, nil, true))
-
--- Function entity.
-local entity = token(l.LABEL, l.starts_line(('%' + l.alpha) * l.alnum^0))
-
--- Support functions.
-local support_function = '$$' * ('%' + l.alpha) * l.alnum^0 *
-                         (('%' + l.alpha) * l.alnum^0)^-1
-
--- Identifiers.
-local identifier = token(l.IDENTIFIER, l.alpha * l.alnum^0)
-
--- Operators.
-local operator = token(l.OPERATOR, S('+-/*<>!=_@#&|?:\\\',()[]'))
-
-M._rules = {
-  {'whitespace', ws},
-  {'keyword', keyword},
-  {'variable', variable},
-  {'identifier', identifier},
-  {'string', string},
-  {'comment', comment},
-  {'number', number},
-  {'operator', operator},
-}
-
-M._foldsymbols = {
-  _patterns = {'%l+', '[{}]', '/%*', '%*/', '//'},
-  [l.PREPROCESSOR] = {['if'] = 1, ifdef = 1, ifndef = 1, endif = -1},
-  [l.OPERATOR] = {['{'] = 1, ['}'] = -1},
-  [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')}
-}
-
-return M
diff --git a/lexlua/ps.lua.orig b/lexlua/ps.lua.orig
deleted file mode 100644
index c6a98faa9..000000000
--- a/lexlua/ps.lua.orig
+++ /dev/null
@@ -1,167 +0,0 @@
--- Copyright 2017 Marcio Baraco <marciorps@gmail.com>. See LICENSE.
--- Postscript LPeg lexer.
-
-local l = require('lexer')
-local token, word_match = l.token, l.word_match
-local P, R, S = lpeg.P, lpeg.R, lpeg.S
-
-local M = {_NAME = 'ps'}
-
--- Whitespace.
-local ws = token(l.WHITESPACE, l.space^1)
-
--- Comments.
-local comment = token(l.COMMENT, '%' * l.nonnewline^0)
-
--- Strings.
-local nested_string = l.delimited_range('()', false, false, true)
-local hex_string = P('<') * (l.xdigit + l.space)^0 * P('>')^-1
-local enc_string = P('<~') * (R('!u') + l.space)^0 * P('~>')
-local str = token(l.STRING, nested_string + hex_string + enc_string)
-
--- Numbers.
-local frac = (P('.') * l.digit^1)
-local expo = (S('eE') * S('+-')^-1  * l.digit^1)
-local decm = S('+-')^-1 * l.digit ^ 1 * frac^-1 * expo^-1
-local radx = l.digit^-2 * '#' * l.alnum^1
--- TODO: Accept only chars that fit radix, ie [01] for 2#, hex for 16# and so.
-local number = token(l.NUMBER, decm + radx)
-
--- PostScript allows almost all characters in names.
-local word = (l.graph - S('()<>[]{}/%'))^1
--- Names.
-local identifier = token(l.IDENTIFIER, word)
--- Deferred Names.
-local label = token(l.LABEL, '/' * word)
--- Immediately Evaluated Names.
-local preproc = token(l.PREPROCESSOR, '//' * word)
-
--- Object constructors.
-local operator = token(l.OPERATOR, S('[]{}=') + P('<<') + P('>>') + P('=='))
-
--- Operators:
--- + l.KEYWORD for basic ops
--- + l.FUNCTION for graphic ops
--- + l.CLASS for weird ps ops
-local keyword = token(l.KEYWORD, word_match{
-  -- Control operators.
-  'exec', 'eexec', 'if', 'ifelse', 'for', 'repeat', 'loop', 'exit', 'stop',
-  'stopped', 'countexecstack', 'execstack', 'quit', 'start',
-  -- Stack manipulation operators.
-  'pop', 'exch', 'dup', 'copy', 'index', 'roll', 'clear', 'count', 'mark',
-  'cleartomark', 'counttomark',
-  -- Array and operators.
-  'array', 'string', 'length', 'get', 'put', 'getinterval', 'putinterval',
-  'aload', 'astore', 'packedarray', 'setpacking', 'currentpacking', 'forall',
-  'anchorsearch', 'search', 'token',
-  -- Dictionary operators.
-  'dict', 'maxlength', 'begin', 'end', 'def', 'undef', 'load', 'store', 'known',
-  'where', 'currentdict', 'errordict', 'systemdict', 'userdict', 'globaldict',
-  'shareddict', 'statusdict', 'countdictstack', 'cleardictstack', 'dictstack',
-  -- Type, attribute and conversion operators.
-  'type', 'cvlit', 'cvx', 'cvi', 'cvn', 'cvrs', 'cvs', 'cvr', 'xcheck',
-  'executeonly', 'noaccess', 'readonly', 'rcheck', 'wcheck',
-  -- Arithmetic and math operators.
-  'add', 'div', 'idiv', 'mod', 'mul', 'sub', 'abs', 'neg', 'ceiling', 'floor',
-  'round', 'truncate', 'sqrt', 'atan', 'cos', 'sin', 'exp', 'ln', 'log', 'rand',
-  'srand', 'rrand',
-  -- Relational, boolean and bitwise operators.
-  'eq', 'ne', 'ge', 'gt', 'le', 'lt', 'and', 'not', 'or', 'xor', 'true',
-  'false', 'bitshift',
-  -- Coordinate system and matrix operators.
-  'matrix', 'initmatrix', 'identmatrix', 'defaultmatrix', 'currentmatrix',
-  'setmatrix', 'translate', 'scale', 'rotate', 'concat', 'concatmatrix',
-  'transform', 'dtransform', 'itransform', 'idtransform', 'invertmatrix',
-})
-local func = token(l.FUNCTION, word_match{
-  -- Path construction operators.
-  'newpath', 'currentpoint', 'moveto', 'rmoveto', 'lineto', 'rlineto', 'arc',
-  'arcn', 'arct', 'arcto', 'curveto', 'rcurveto', 'closepath', 'flattenpath',
-  'reversepath', 'strokepath', 'ustrokepath', 'charpath', 'uappend', 'clippath',
-  'setbbox', 'pathbbox', 'pathforall', 'upath', 'ucache', 'initclip', 'clip',
-  'eoclip', 'rectclip',
-  -- Glyph and font operators.
-  'definefont', 'composefont', 'undefinefont', 'findfont', 'scalefont',
-  'makefont', 'setfont', 'rootfont', 'currentfont', 'selectfont', 'show',
-  'ashow', 'widthshow', 'awidthshow', 'xshow', 'yshow', 'xyshow', 'glyphshow',
-  'stringwidth', 'cshow', 'kshow', 'findencoding', 'FontDirectory',
-  'GlobalFontDirectory', 'SharedFontDirectory', 'StandardEncoding',
-  'ISOLatin1Encoding', 'setcachedevice', 'setcachedevice2', 'setcharwidth',
-  -- CID Font operators.
-  'addglyph', 'beginbfchar', 'beginbfrange', 'begincidchar', 'begincidrange',
-  'begincmap', 'begincodespacerange', 'beginnotdefchar', 'beginnotdefrange',
-  'beginrearrangedfont', 'beginusematrix', 'endbfchar', 'endbfrange',
-  'endcidchar', 'endcidrange', 'endcmap', 'endcodespacerange', 'endnotdefchar',
-  'endnotdefrange', 'endrearrangedfont', 'endusermatrix', 'removeall',
-  'removeglyphs', 'StartData', 'usecmap', 'usefont',
-  -- Painting operations.
-  'erasepage', 'stroke', 'fill', 'eofill', 'rectstroke', 'rectfill', 'ustroke',
-  'ufill', 'ueofill', 'shfill', 'image', 'imagemask', 'colorimage',
-  -- Insideness testing operators.
-  'infill', 'ineofill', 'inufill', 'inueofill', 'instroke', 'inustroke',
-  -- Form and pattern operators.
-  'makepattern', 'setpattern', 'execform',
-  -- Graphics state operators.
-  'gsave', 'grestore', 'clipsave', 'cliprestore', 'grestoreall', 'initgraphics',
-  'gstate', 'setgstate', 'currentgstate', 'setlinewidth', 'currentlinewidth',
-  'setlinecap', 'currentlinecap', 'setlinejoin', 'currentlinejoin',
-  'setmiterlimit', 'currentmiterlimit', 'setstrokeadjust',
-  'currentstrokeadjust', 'setdash', 'currentdash', 'setcolorspace',
-  'currentcolorspace', 'setcolor', 'setgray', 'currentgray', 'sethsbcolor',
-  'currenthsbcolor', 'setrgbcolor', 'currentrgbcolor', 'setcmykcolor',
-  'currentcmykcolor', 'sethalftone', 'currenthalftone', 'setscreen',
-  'currentscreen', 'setcolorscreen', 'currentcolorscreen', 'settransfer',
-  'currenttransfer', 'setcolortransfer', 'currentcolortransfer',
-  'setblackgeneration', 'currentblackgeneration', 'setundercolorremoval',
-  'currentundercolorremoval', 'setcolorrendering', 'currentcolorrendering',
-  'setflat', 'currentflat', 'setoverprint', 'currentoverprint', 'setsmoothness',
-  'currentsmoothness', 'currentcolor',
-  -- Device setup operators.
-  'showpage', 'copypage', 'setpagedevice', 'currentpagedevice', 'nulldevice',
-  'currenttrapparams', 'settrapparams', 'settrapzone',
-})
-local misc = token(l.CLASS, word_match{
-  -- Miscellaneous operators
-  'defineresource', 'undefineresource', 'findresource', 'findcolorrendering',
-  'resourcestatus', 'resourceforall', 'GetHalftoneName', 'GetPageDeviceName',
-  'GetSubstituteCRD', 'save', 'restore', 'setglobal', 'setshared',
-  'currentglobal', 'gcheck', 'scheck', 'startjob', 'defineuserobject',
-  'execuserobject', 'undefineuserobject', 'UserObjects', 'bind', 'null',
-  'version', 'realtime', 'usertime', 'languagelevel', 'product', 'revision',
-  'serialnumber', 'executive', 'echo', 'prompt', 'setsystemparams',
-  'currentsystemparams', 'setuserparams', 'currentuserparams', 'setdevparams',
-  'currentdevparams', 'vmreclaim', 'setvmthreshold', 'vmstatus', 'cachestatus',
-  'setcachelimit', 'setcacheparams', 'currentcacheparams', 'setucacheparams',
-  'ucachestatus', 'currentshared', 'exitserver', 'serverdict',
-  -- File operators
-  'file', 'filter', 'closefile', 'read', 'write', 'readhexstring',
-  'writehexstring', 'readstring', 'writestring', 'readline', 'bytesavailable',
-  'flush', 'flushfile', 'resetfile', 'status', 'run', 'currentfile',
-  'deletefile', 'renamefile', 'filenameforall', 'setfileposition',
-  'fileposition', 'print', 'stack', 'pstack', 'printobject', 'writeobject',
-  'setobjectformat', 'currentobjectformat',
-  -- Errors.
-  'configurationerror', 'dictfull', 'dictstackoverflow', 'dictstackunderflow',
-  'execstackoverflow', 'handleerror', 'interrupt', 'invalidaccess',
-  'invalidexit', 'invalidfileaccess', 'invalidfont', 'invalidrestore',
-  'ioerror', 'limitcheck', 'nocurrentpoint', 'rangecheck', 'stackoverflow',
-  'stackunderflow', 'syntaxerror', 'timeout', 'typecheck', 'undefined',
-  'undefinedfilename', 'undefinedresource', 'undefinedresult', 'unmatchedmark',
-  'unregistered', 'VMerror',
-})
-
-M._rules = {
-  {'whitespace', ws},
-  {'comment', comment},
-  {'number', number},
-  {'preprocessor', preproc},
-  {'label', label},
-  {'keyword', keyword},
-  {'function', func},
-  {'class', misc},
-  {'operator', operator},
-  {'string', str},
-  {'identifier', identifier},
-}
-
-return M