aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authormitchell <unknown>2020-04-25 16:26:31 -0400
committermitchell <unknown>2020-04-25 16:26:31 -0400
commitfad15f79b1230b3076be515d6894c8919562809b (patch)
tree72c848ef02c3331de5ca54eff7adaea3a9a6fb88
parent1fd02a367dec125c0b49dd9246a0928433866b96 (diff)
downloadscintilla-mirror-fad15f79b1230b3076be515d6894c8919562809b.tar.gz
Reformatted Lua LPeg lexers and added new convenience functions and pattern.
`lexer.range()` replaces `lexer.delimited_range()` and `lexer.nested_pair()`. `lexer.to_eol()` replaces `patt * lexer.nonnewline^0` constructs. `lexer.number` replaces `lexer.float + lexer.integer`. Also added unit tests for lexer functions.
-rw-r--r--doc/LPegLexer.html233
-rw-r--r--lexlua/actionscript.lua13
-rw-r--r--lexlua/ada.lua10
-rw-r--r--lexlua/ansi_c.lua29
-rw-r--r--lexlua/antlr.lua9
-rw-r--r--lexlua/apdl.lua10
-rw-r--r--lexlua/apl.lua10
-rw-r--r--lexlua/applescript.lua12
-rw-r--r--lexlua/asm.lua19
-rw-r--r--lexlua/asp.lua2
-rw-r--r--lexlua/autoit.lua18
-rw-r--r--lexlua/awk.lua31
-rw-r--r--lexlua/bash.lua27
-rw-r--r--lexlua/batch.lua12
-rw-r--r--lexlua/bibtex.lua6
-rw-r--r--lexlua/boo.lua21
-rw-r--r--lexlua/caml.lua10
-rw-r--r--lexlua/chuck.lua12
-rw-r--r--lexlua/cmake.lua6
-rw-r--r--lexlua/coffeescript.lua18
-rw-r--r--lexlua/context.lua23
-rw-r--r--lexlua/cpp.lua25
-rw-r--r--lexlua/crystal.lua47
-rw-r--r--lexlua/csharp.lua23
-rw-r--r--lexlua/css.lua12
-rw-r--r--lexlua/dart.lua17
-rw-r--r--lexlua/desktop.lua19
-rw-r--r--lexlua/diff.lua6
-rw-r--r--lexlua/django.lua7
-rw-r--r--lexlua/dmd.lua44
-rw-r--r--lexlua/dockerfile.lua13
-rw-r--r--lexlua/dot.lua9
-rw-r--r--lexlua/eiffel.lua9
-rw-r--r--lexlua/elixir.lua78
-rw-r--r--lexlua/erlang.lua15
-rw-r--r--lexlua/faust.lua11
-rw-r--r--lexlua/fish.lua15
-rw-r--r--lexlua/forth.lua24
-rw-r--r--lexlua/fortran.lua20
-rw-r--r--lexlua/fsharp.lua18
-rw-r--r--lexlua/gap.lua7
-rw-r--r--lexlua/gettext.lua5
-rw-r--r--lexlua/gherkin.lua13
-rw-r--r--lexlua/glsl.lua24
-rw-r--r--lexlua/gnuplot.lua10
-rw-r--r--lexlua/go.lua13
-rw-r--r--lexlua/groovy.lua21
-rw-r--r--lexlua/gtkrc.lua11
-rw-r--r--lexlua/haskell.lua13
-rw-r--r--lexlua/html.lua33
-rw-r--r--lexlua/icon.lua10
-rw-r--r--lexlua/idl.lua13
-rw-r--r--lexlua/inform.lua9
-rw-r--r--lexlua/ini.lua14
-rw-r--r--lexlua/io_lang.lua14
-rw-r--r--lexlua/java.lua14
-rw-r--r--lexlua/javascript.lua18
-rw-r--r--lexlua/json.lua9
-rw-r--r--lexlua/latex.lua15
-rw-r--r--lexlua/ledger.lua19
-rw-r--r--lexlua/less.lua4
-rw-r--r--lexlua/lexer.lua128
-rw-r--r--lexlua/lilypond.lua5
-rw-r--r--lexlua/lisp.lua9
-rw-r--r--lexlua/litcoffee.lua2
-rw-r--r--lexlua/logtalk.lua2
-rw-r--r--lexlua/lua.lua26
-rw-r--r--lexlua/makefile.lua37
-rw-r--r--lexlua/man.lua15
-rw-r--r--lexlua/markdown.lua131
-rw-r--r--lexlua/matlab.lua17
-rw-r--r--lexlua/mediawiki.lua31
-rw-r--r--lexlua/moonscript.lua21
-rw-r--r--lexlua/myrddin.lua18
-rw-r--r--lexlua/nemerle.lua12
-rw-r--r--lexlua/nim.lua21
-rw-r--r--lexlua/nsis.lua11
-rw-r--r--lexlua/objective_c.lua13
-rw-r--r--lexlua/pascal.lua16
-rw-r--r--lexlua/perl.lua63
-rw-r--r--lexlua/php.lua18
-rw-r--r--lexlua/pico8.lua2
-rw-r--r--lexlua/pike.lua18
-rw-r--r--lexlua/pkgbuild.lua32
-rw-r--r--lexlua/powershell.lua11
-rw-r--r--lexlua/prolog.lua38
-rw-r--r--lexlua/props.lua14
-rw-r--r--lexlua/protobuf.lua10
-rw-r--r--lexlua/ps.lua8
-rw-r--r--lexlua/pure.lua6
-rw-r--r--lexlua/python.lua20
-rw-r--r--lexlua/rc.lua15
-rw-r--r--lexlua/rebol.lua14
-rw-r--r--lexlua/rest.lua97
-rw-r--r--lexlua/rexx.lua15
-rw-r--r--lexlua/rstats.lua10
-rw-r--r--lexlua/ruby.lua51
-rw-r--r--lexlua/rust.lua54
-rw-r--r--lexlua/sass.lua2
-rw-r--r--lexlua/scala.lua13
-rw-r--r--lexlua/scheme.lua8
-rw-r--r--lexlua/smalltalk.lua10
-rw-r--r--lexlua/sml.lua34
-rw-r--r--lexlua/snobol4.lua12
-rw-r--r--lexlua/sql.lua13
-rw-r--r--lexlua/taskpaper.lua10
-rw-r--r--lexlua/tcl.lua23
-rw-r--r--lexlua/template.txt9
-rw-r--r--lexlua/tex.lua8
-rw-r--r--lexlua/texinfo.lua33
-rw-r--r--lexlua/toml.lua39
-rw-r--r--lexlua/txt2tags.lua151
-rw-r--r--lexlua/vala.lua15
-rw-r--r--lexlua/vb.lua9
-rw-r--r--lexlua/vbscript.lua9
-rw-r--r--lexlua/vcard.lua23
-rw-r--r--lexlua/verilog.lua11
-rw-r--r--lexlua/vhdl.lua10
-rw-r--r--lexlua/wsf.lua17
-rw-r--r--lexlua/xml.lua26
-rw-r--r--lexlua/xtend.lua14
-rw-r--r--lexlua/yaml.lua74
-rw-r--r--test/test_lexlua.lua41
123 files changed, 1398 insertions, 1459 deletions
diff --git a/doc/LPegLexer.html b/doc/LPegLexer.html
index e31a091b1..3f553e9f9 100644
--- a/doc/LPegLexer.html
+++ b/doc/LPegLexer.html
@@ -226,6 +226,13 @@
as fold points. For example, the C line <code>} else {</code> would be
marked as a fold point. The default is <code>0</code>.</td>
</tr>
+
+ <tr>
+ <td><code>fold.compact</code></td>
+
+ <td>If <code>fold.compact</code> is set to <code>1</code>, blank lines
+ after an ending fold point are included in that fold.
+ </tr>
</tbody>
</table>
@@ -672,7 +679,7 @@ operator 30
<a href="#lexer.punct"><code>lexer.punct</code></a>, <a href="#lexer.space"><code>lexer.space</code></a>, <a href="#lexer.newline"><code>lexer.newline</code></a>,
<a href="#lexer.nonnewline"><code>lexer.nonnewline</code></a>, <a href="#lexer.nonnewline_esc"><code>lexer.nonnewline_esc</code></a>, <a href="#lexer.dec_num"><code>lexer.dec_num</code></a>,
<a href="#lexer.hex_num"><code>lexer.hex_num</code></a>, <a href="#lexer.oct_num"><code>lexer.oct_num</code></a>, <a href="#lexer.integer"><code>lexer.integer</code></a>,
- <a href="#lexer.float"><code>lexer.float</code></a>, and <a href="#lexer.word"><code>lexer.word</code></a>. You may use your own token names if
+ <a href="#lexer.float"><code>lexer.float</code></a>, <a href="#lexer.number"><code>lexer.number</code></a>, and <a href="#lexer.word"><code>lexer.word</code></a>. You may use your own token names if
none of the above fit your language, but an advantage to using predefined
token names is that your lexer's tokens will inherit the universal syntax
highlighting color theme used by your text editor.</p>
@@ -725,9 +732,8 @@ operator 30
<p>Line-style comments with a prefix character(s) are easy to express with LPeg:</p>
<pre><code>
- local shell_comment = token(lexer.COMMENT, '#' * lexer.nonnewline^0)
- local c_line_comment = token(lexer.COMMENT,
- '//' * lexer.nonnewline_esc^0)
+ local shell_comment = token(lexer.COMMENT, lexer.to_eol('#'))
+ local c_line_comment = token(lexer.COMMENT, lexer.to_eol('//', true))
</code></pre>
<p>The comments above start with a '#' or "//" and go to the end of the line.
@@ -738,8 +744,7 @@ operator 30
express:</p>
<pre><code>
- local c_comment = token(lexer.COMMENT,
- '/*' * (lexer.any - '*/')^0 * P('*/')^-1)
+ local c_comment = token(lexer.COMMENT, lexer.range('/*', '*/'))
</code></pre>
<p>This comment starts with a "/*" sequence and contains anything up to and
@@ -748,24 +753,14 @@ operator 30
<p><strong>Strings</strong></p>
- <p>It is tempting to think that a string is not much different from the block
- comment shown above in that both have start and end delimiters:</p>
-
- <pre><code>
- local dq_str = '"' * (lexer.any - '"')^0 * P('"')^-1
- local sq_str = "'" * (lexer.any - "'")^0 * P("'")^-1
- local simple_string = token(lexer.STRING, dq_str + sq_str)
- </code></pre>
-
- <p>However, most programming languages allow escape sequences in strings such
- that a sequence like "\&quot;" in a double-quoted string indicates that the
- '&quot;' is not the end of the string. The above token incorrectly matches
- such a string. Instead, use the <a href="#lexer.delimited_range"><code>lexer.delimited_range()</code></a> convenience
- function.</p>
+ <p>Most programming languages allow escape sequences in strings such that a
+ sequence like &ldquo;\&quot;&rdquo; in a double-quoted string indicates that the
+ &lsquo;&quot;&rsquo; is not the end of the string. <a href="#lexer.range"><code>lexer.range()</code></a> handles escapes
+ inherently.</p>
<pre><code>
- local dq_str = lexer.delimited_range('"')
- local sq_str = lexer.delimited_range("'")
+ local dq_str = lexer.range('"')
+ local sq_str = lexer.range("'")
local string = token(lexer.STRING, dq_str + sq_str)
</code></pre>
@@ -775,10 +770,10 @@ operator 30
<p><strong>Numbers</strong></p>
<p>Most programming languages have the same format for integer and float tokens,
- so it might be as simple as using a couple of predefined LPeg patterns:</p>
+ so it might be as simple as using a predefined LPeg pattern:</p>
<pre><code>
- local number = token(lexer.NUMBER, lexer.float + lexer.integer)
+ local number = token(lexer.NUMBER, lexer.number)
</code></pre>
<p>However, some languages allow postfix characters on integers.</p>
@@ -1391,11 +1386,11 @@ operator 30
lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[foo bar baz]]))
lex:add_rule('custom', token('custom', P('quux')))
- lex:add_style('custom', lexer.STYLE_KEYWORD..',bold')
+ lex:add_style('custom', lexer.STYLE_KEYWORD .. ',bold')
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
- lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"')))
- lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0))
- lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+ lex:add_rule('string', token(lexer.STRING, lexer.range('"')))
+ lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
+ lex:add_rule('number', token(lexer.NUMBER, lexer.number))
lex:add_rule('operator', token(lexer.OPERATOR, S('+-*/%^=&lt;&gt;,.()[]{}')))
lex:add_fold_point(lexer.OPERATOR, '{', '}')
@@ -1463,7 +1458,7 @@ operator 30
<h4>Acknowledgements</h4>
<p>Thanks to Peter Odding for his <a href="http://lua-users.org/lists/lua-l/2007-04/msg00116.html">lexer post</a> on the Lua mailing list
- that inspired me, and thanks to Roberto Ierusalimschy for LPeg.</p>
+ that provided inspiration, and thanks to Roberto Ierusalimschy for LPeg.</p>
<h2>Lua <code>lexer</code> module API fields</h2>
@@ -1869,6 +1864,13 @@ operator 30
<p>A pattern that matches any single, non-newline character or any set of end
of line characters escaped with '\'.</p>
+ <p><a id="lexer.number"></a></p>
+
+ <h3><code>lexer.number</code> (pattern)</h3>
+
+ <p>A pattern that matches a typical number, either a floating point, decimal,
+ hexadecimal, or octal number.</p>
+
<p><a id="lexer.oct_num"></a></p>
<h3><code>lexer.oct_num</code> (pattern)</h3>
@@ -2071,58 +2073,6 @@ operator 30
</ul>
- <p><a id="lexer.delimited_range"></a></p>
-
- <h3><code>lexer.delimited_range</code> (chars, single_line, no_escape, balanced)</h3>
-
- <p>Creates and returns a pattern that matches a range of text bounded by
- <em>chars</em> characters.
- This is a convenience function for matching more complicated delimited ranges
- like strings with escape characters and balanced parentheses. <em>single_line</em>
- indicates whether or not the range must be on a single line, <em>no_escape</em>
- indicates whether or not to ignore '\' as an escape character, and <em>balanced</em>
- indicates whether or not to handle balanced ranges like parentheses and
- requires <em>chars</em> to be composed of two characters.</p>
-
- <p>Fields:</p>
-
- <ul>
- <li><code>chars</code>: The character(s) that bound the matched range.</li>
- <li><code>single_line</code>: Optional flag indicating whether or not the range must be
- on a single line.</li>
- <li><code>no_escape</code>: Optional flag indicating whether or not the range end
- character may be escaped by a '\' character.</li>
- <li><code>balanced</code>: Optional flag indicating whether or not to match a balanced
- range, like the "%b" Lua pattern. This flag only applies if <em>chars</em>
- consists of two different characters (e.g. "()").</li>
- </ul>
-
-
- <p>Usage:</p>
-
- <ul>
- <li><code>local dq_str_escapes = lexer.delimited_range('"')</code></li>
- <li><code>local dq_str_noescapes = lexer.delimited_range('"', false, true)</code></li>
- <li><code>local unbalanced_parens = lexer.delimited_range('()')</code></li>
- <li><code>local balanced_parens = lexer.delimited_range('()', false, false,
- true)</code></li>
- </ul>
-
-
- <p>Return:</p>
-
- <ul>
- <li>pattern</li>
- </ul>
-
-
- <p>See also:</p>
-
- <ul>
- <li><a href="#lexer.nested_pair"><code>lexer.nested_pair</code></a></li>
- </ul>
-
-
<p><a id="lexer.embed"></a></p>
<h3><code>lexer.embed</code> (lexer, child, start_rule, end_rule)</h3>
@@ -2241,7 +2191,7 @@ operator 30
<ul>
<li><code>local regex = lexer.last_char_includes('+-*!%^&amp;|=,([{') *
- lexer.delimited_range('/')</code></li>
+ lexer.range('/')</code></li>
</ul>
@@ -2344,44 +2294,6 @@ operator 30
</ul>
- <p><a id="lexer.nested_pair"></a></p>
-
- <h3><code>lexer.nested_pair</code> (start_chars, end_chars)</h3>
-
- <p>Returns a pattern that matches a balanced range of text that starts with
- string <em>start_chars</em> and ends with string <em>end_chars</em>.
- With single-character delimiters, this function is identical to
- <code>delimited_range(start_chars..end_chars, false, true, true)</code>.</p>
-
- <p>Fields:</p>
-
- <ul>
- <li><code>start_chars</code>: The string starting a nested sequence.</li>
- <li><code>end_chars</code>: The string ending a nested sequence.</li>
- </ul>
-
-
- <p>Usage:</p>
-
- <ul>
- <li><code>local nested_comment = lexer.nested_pair('/*', '*/')</code></li>
- </ul>
-
-
- <p>Return:</p>
-
- <ul>
- <li>pattern</li>
- </ul>
-
-
- <p>See also:</p>
-
- <ul>
- <li><a href="#lexer.delimited_range"><code>lexer.delimited_range</code></a></li>
- </ul>
-
-
<p><a id="lexer.new"></a></p>
<h3><code>lexer.new</code> (name, opts)</h3>
@@ -2420,6 +2332,54 @@ operator 30
</ul>
+ <p><a id="lexer.range"></a></p>
+
+ <h3><code>lexer.range</code>(<em>s, e, single_line, escapes, balanced</em>)</h3>
+
+ <p>Creates and returns a pattern that matches a range of text bounded by strings
+ or patterns <em>s</em> and <em>e</em>.
+ This is a convenience function for matching more complicated ranges like
+ strings with escape characters, balanced parentheses, and block comments
+ (nested or not). <em>e</em> is optional and defaults to <em>s</em>. <em>single_line</em> indicates
+ whether or not the range must be on a single line; <em>escapes</em> indicates
+ whether or not to allow &lsquo;\&rsquo; as an escape character; and <em>balanced</em> indicates
+ whether or not to handle balanced ranges like parentheses, and requires <em>s</em>
+ and <em>e</em> to be different.</p>
+
+ <p>Parameters:</p>
+
+ <ul>
+ <li><em><code>s</code></em>: String or pattern start of a range.</li>
+ <li><em><code>e</code></em>: Optional string or pattern end of a range. The default value is <em>s</em>.</li>
+ <li><em><code>single_line</code></em>: Optional flag indicating whether or not the range must be
+ on a single line.</li>
+ <li><em><code>escapes</code></em>: Optional flag indicating whether or not the range end may
+ be escaped by a &lsquo;\&rsquo; character.
+ The default value is <code>false</code> unless <em>s</em> and <em>e</em> are identical, single-character strings.
+ In that case, the default value is <code>true</code>.</li>
+ <li><em><code>balanced</code></em>: Optional flag indicating whether or not to match a balanced
+ range, like the &ldquo;%b&rdquo; Lua pattern. This flag only applies if <em>s</em> and <em>e</em> are
+ different.</li>
+ </ul>
+
+
+ <p>Usage:</p>
+
+ <ul>
+ <li><code>local dq_str_escapes = lexer.range('"')</code></li>
+ <li><code>local dq_str_noescapes = lexer.range('"', false, false)</code></li>
+ <li><code>local unbalanced_parens = lexer.range('(', ')')</code></li>
+ <li><code>local balanced_parens = lexer.range('(', ')', false, false, true)</code></li>
+ </ul>
+
+
+ <p>Return:</p>
+
+ <ul>
+ <li>pattern</li>
+ </ul>
+
+
<p><a id="lexer.starts_line"></a></p>
<h3><code>lexer.starts_line</code> (patt)</h3>
@@ -2449,6 +2409,39 @@ operator 30
</ul>
+ <p><a id="lexer.to_eol"></a></p>
+
+ <h3><code>lexer.to_eol</code>(<em>prefix, escape</em>)</h3>
+
+ <p>Creates and returns a pattern that matches from string or pattern <em>prefix</em>
+ until the end of the line.
+ <em>escape</em> indicates whether the end of the line can be escaped with a &lsquo;\&rsquo;
+ character.</p>
+
+ <p>Parameters:</p>
+
+ <ul>
+ <li><em><code>prefix</code></em>: String or pattern prefix to start matching at.</li>
+ <li><em><code>escape</code></em>: Optional flag indicating whether or not newlines can be escaped
+ by a &lsquo;\&rsquo; character. The default value is <code>false</code>.</li>
+ </ul>
+
+
+ <p>Usage:</p>
+
+ <ul>
+ <li><code>local line_comment = lexer.to_eol('//')</code></li>
+ <li><code>local line_comment = lexer.to_eol(P('#') + ';')</code></li>
+ </ul>
+
+
+ <p>Return:</p>
+
+ <ul>
+ <li>pattern</li>
+ </ul>
+
+
<p><a id="lexer.token"></a></p>
<h3><code>lexer.token</code> (name, patt)</h3>
diff --git a/lexlua/actionscript.lua b/lexlua/actionscript.lua
index 69dcc80fc..401ae77c0 100644
--- a/lexlua/actionscript.lua
+++ b/lexlua/actionscript.lua
@@ -33,19 +33,18 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-local sq_str = lexer.delimited_range("'", true)
-local dq_str = lexer.delimited_range('"', true)
-local ml_str = '<![CDATA[' * (lexer.any - ']]>')^0 * ']]>'
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+local ml_str = lexer.range('<![CDATA[', ']]>')
lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + ml_str))
-- Comments.
-local line_comment = '//' * lexer.nonnewline^0
-local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1
+local line_comment = lexer.to_eol('//')
+local block_comment = lexer.range('/*', '*/')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) *
- S('LlUuFf')^-2))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('LlUuFf')^-2))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/*%&|^~.,;?()[]{}')))
diff --git a/lexlua/ada.lua b/lexlua/ada.lua
index bdcbfe313..3a4c385f4 100644
--- a/lexlua/ada.lua
+++ b/lexlua/ada.lua
@@ -37,19 +37,15 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING,
- lexer.delimited_range('"', true, true)))
+lex:add_rule('string', token(lexer.STRING, lexer.range('"', true, false)))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '--' * lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('--')))
-- Numbers.
-local hex_num = 'O' * S('xX') * (lexer.xdigit + '_')^1
local integer = lexer.digit^1 * ('_' * lexer.digit^1)^0
local float = integer^1 * ('.' * integer^0)^-1 * S('eE') * S('+-')^-1 * integer
-lex:add_rule('number', token(lexer.NUMBER, hex_num +
- S('+-')^-1 * (float + integer) *
- S('LlUuFf')^-3))
+lex:add_rule('number', token(lexer.NUMBER, S('+-')^-1 * (float + integer)))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S(':;=<>&+-*/.()')))
diff --git a/lexlua/ansi_c.lua b/lexlua/ansi_c.lua
index ddb59e3a0..b0c646b5c 100644
--- a/lexlua/ansi_c.lua
+++ b/lexlua/ansi_c.lua
@@ -8,7 +8,8 @@ local P, R, S = lpeg.P, lpeg.R, lpeg.S
local lex = lexer.new('ansi_c')
-- Whitespace.
-lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
+local ws = token(lexer.WHITESPACE, lexer.space^1)
+lex:add_rule('whitespace', ws)
-- Keywords.
lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
@@ -48,30 +49,26 @@ lex:add_rule('constants', token(lexer.CONSTANT, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-local sq_str = P('L')^-1 * lexer.delimited_range("'", true)
-local dq_str = P('L')^-1 * lexer.delimited_range('"', true)
+local sq_str = P('L')^-1 * lexer.range("'", true)
+local dq_str = P('L')^-1 * lexer.range('"', true)
lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Comments.
-local line_comment = '//' * lexer.nonnewline_esc^0
-local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +
- '#if' * S(' \t')^0 * '0' * lexer.space *
- (lexer.any - '#endif')^0 * P('#endif')^-1
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/') +
+ lexer.range('#if' * S(' \t')^0 * '0' * lexer.space, '#endif')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Preprocessor.
-local preproc_word = word_match[[
+local include = token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * 'include') *
+ (ws * token(lexer.STRING, lexer.range('<', '>', true)))^-1
+local preproc = token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * word_match[[
define elif else endif if ifdef ifndef line pragma undef
-]]
-lex:add_rule('preprocessor',
- (token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * preproc_word) +
- token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * 'include') *
- (token(lexer.WHITESPACE, S('\t ')^1) *
- token(lexer.STRING,
- lexer.delimited_range('<>', true, true)))^-1))
+]])
+lex:add_rule('preprocessor', include + preproc)
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>~!=^&|?~:;,.()[]{}')))
diff --git a/lexlua/antlr.lua b/lexlua/antlr.lua
index 184ef10e1..53cd2d57f 100644
--- a/lexlua/antlr.lua
+++ b/lexlua/antlr.lua
@@ -31,18 +31,17 @@ lex:add_rule('func', token(lexer.FUNCTION, 'assert'))
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Comments.
-local line_comment = '//' * lexer.nonnewline^0
-local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1
+local line_comment = lexer.to_eol('//')
+local block_comment = lexer.range('/*', '*/')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Actions.
lex:add_rule('action', token(lexer.OPERATOR, P('{')) *
- token('action', (1 - P('}'))^0) *
- token(lexer.OPERATOR, P('}'))^-1)
+ token('action', (1 - P('}'))^0) * token(lexer.OPERATOR, P('}'))^-1)
lex:add_style('action', lexer.STYLE_NOTHING)
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true)))
+lex:add_rule('string', token(lexer.STRING, lexer.range("'", true)))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('$@:;|.=+*?~!^>-()[]{}')))
diff --git a/lexlua/apdl.lua b/lexlua/apdl.lua
index 22dc64b83..be70f2a5d 100644
--- a/lexlua/apdl.lua
+++ b/lexlua/apdl.lua
@@ -46,21 +46,19 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING,
- lexer.delimited_range("'", true, true)))
+lex:add_rule('string', token(lexer.STRING, lexer.range("'", true, false)))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Functions.
-lex:add_rule('function', token(lexer.FUNCTION,
- lexer.delimited_range('%', true, true)))
+lex:add_rule('function', token(lexer.FUNCTION, lexer.range('%', true, false)))
-- Labels.
lex:add_rule('label', token(lexer.LABEL, lexer.starts_line(':') * lexer.word))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '!' * lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('!')))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('+-*/$=,;()')))
diff --git a/lexlua/apl.lua b/lexlua/apl.lua
index 285ae0737..ad0ec2d99 100644
--- a/lexlua/apl.lua
+++ b/lexlua/apl.lua
@@ -12,11 +12,11 @@ lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Comments.
lex:add_rule('comment', token(lexer.COMMENT, (P('⍝') + '#') *
- lexer.nonnewline^0))
+ lexer.nonnewline^0))
-- Strings.
-local sq_str = lexer.delimited_range("'", false, true)
-local dq_str = lexer.delimited_range('"')
+local sq_str = lexer.range("'", false, false)
+local dq_str = lexer.range('"')
lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Numbers.
@@ -26,12 +26,12 @@ local exp = S('eE')
local img = S('jJ')
local sgn = P('¯')^-1
local float = sgn * (dig^0 * rad * dig^1 + dig^1 * rad * dig^0 + dig^1) *
- (exp * sgn *dig^1)^-1
+ (exp * sgn *dig^1)^-1
lex:add_rule('number', token(lexer.NUMBER, float * img * float + float))
-- Keywords.
lex:add_rule('keyword', token(lexer.KEYWORD, P('⍞') + 'χ' + '⍺' + '⍶' + '⍵' +
- '⍹' + '⎕' * R('AZ', 'az')^0))
+ '⍹' + '⎕' * R('AZ', 'az')^0))
-- Names.
local n1l = R('AZ', 'az')
diff --git a/lexlua/applescript.lua b/lexlua/applescript.lua
index 3f21f1512..cbdf95072 100644
--- a/lexlua/applescript.lua
+++ b/lexlua/applescript.lua
@@ -46,19 +46,19 @@ lex:add_rule('constant', token(lexer.CONSTANT, word_match[[
]], true))
-- Identifiers.
-lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alpha + '_') *
- lexer.alnum^0))
+lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.alpha *
+ (lexer.alnum + '_')^0))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"', true)))
+lex:add_rule('string', token(lexer.STRING, lexer.range('"', true)))
-- Comments.
-local line_comment = '--' * lexer.nonnewline^0
-local block_comment = '(*' * (lexer.any - '*)')^0 * P('*)')^-1
+local line_comment = lexer.to_eol('--')
+local block_comment = lexer.range('(*', '*)')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('+-^*/&<>=:,(){}')))
diff --git a/lexlua/asm.lua b/lexlua/asm.lua
index 42558fa05..12ebdf3cf 100644
--- a/lexlua/asm.lua
+++ b/lexlua/asm.lua
@@ -314,7 +314,7 @@ local constants = word_match[[
__float80e__ __float80m__ __Infinity__ __NaN__ __QNaN__ __SNaN__
]]
lex:add_rule('constant', token(lexer.CONSTANT, constants +
- '$' * P('$')^-1 * -word))
+ '$' * P('$')^-1 * -word))
-- Labels.
lex:add_rule('label', token(lexer.LABEL, word * ':'))
@@ -323,18 +323,18 @@ lex:add_rule('label', token(lexer.LABEL, word * ':'))
lex:add_rule('identifier', token(lexer.IDENTIFIER, word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) +
- lexer.delimited_range('"', true)))
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, ';' * lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol(';')))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float +
- lexer.integer * S('hqb')^-1))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('hqb')^-1))
-- Preprocessor.
-local preproc_word = word_match[[
+local pp_word = word_match[[
arg assign clear define defstr deftok depend elif elifctx elifdef elifempty
elifenv elifid elifidn elifidni elifmacro elifn elifnctx elifndef elifnempty
elifnenv elifnid elifnidn elifnidni elifnmacro elifnnum elifnstr elifntoken
@@ -345,9 +345,8 @@ local preproc_word = word_match[[
ixdefine line local macro pathsearch pop push rep repl rmacro rotate stacksize
strcat strlen substr undef unmacro use warning while xdefine
]]
-local preproc_symbol = '??' + S('!$+?') + '%' * -lexer.space + R('09')^1
-lex:add_rule('preproc', token(lexer.PREPROCESSOR, '%' * (preproc_word +
- preproc_symbol)))
+local pp_symbol = '??' + S('!$+?') + '%' * -lexer.space + R('09')^1
+lex:add_rule('preproc', token(lexer.PREPROCESSOR, '%' * (pp_word + pp_symbol)))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|~:,()[]')))
diff --git a/lexlua/asp.lua b/lexlua/asp.lua
index 05ba0a9b1..0f5fb21ea 100644
--- a/lexlua/asp.lua
+++ b/lexlua/asp.lua
@@ -25,7 +25,7 @@ local vbs_start_rule = #(P('<') * script_element * (P(function(input, index)
end
end) + '>')) * html.embed_start_tag -- <script language="vbscript">
local vbs_end_rule = #('</' * script_element * lexer.space^0 * '>') *
- html.embed_end_tag -- </script>
+ html.embed_end_tag -- </script>
lex:embed(vbs, vbs_start_rule, vbs_end_rule)
-- Fold points.
diff --git a/lexlua/autoit.lua b/lexlua/autoit.lua
index 5bb5752f9..3b6d3ae6a 100644
--- a/lexlua/autoit.lua
+++ b/lexlua/autoit.lua
@@ -97,12 +97,10 @@ lex:add_rule('function', token(lexer.FUNCTION, word_match([[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Comments.
-local line_comment = ';' * lexer.nonnewline_esc^0
-local block_comment1 = '#comments-start' * (lexer.any - '#comments-end')^0 *
- P('#comments-end')^-1
-local block_comment2 = '#cs' * (lexer.any - '#ce')^0 * P('#ce')^-1
-lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment1 +
- block_comment2))
+local line_comment = lexer.to_eol(';')
+local block_comment = lexer.range('#comments-start', '#comments-end') +
+ lexer.range('#cs', '#ce')
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Preprocessor.
lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, '#' * word_match([[
@@ -111,9 +109,9 @@ lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, '#' * word_match([[
]], true)))
-- Strings.
-local dq_str = lexer.delimited_range('"', true, true)
-local sq_str = lexer.delimited_range("'", true, true)
-local inc = lexer.delimited_range('<>', true, true, true)
+local dq_str = lexer.range('"', true, false)
+local sq_str = lexer.range("'", true, false)
+local inc = lexer.range('<', '>', true, false, true)
lex:add_rule('string', token(lexer.STRING, dq_str + sq_str + inc))
-- Macros.
@@ -124,7 +122,7 @@ lex:add_style('macro', lexer.STYLE_PREPROCESSOR)
lex:add_rule('variable', token(lexer.VARIABLE, '$' * (lexer.alnum + '_')^1))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('+-^*/&<>=?:()[]')))
diff --git a/lexlua/awk.lua b/lexlua/awk.lua
index 36329acc3..40ff501e7 100644
--- a/lexlua/awk.lua
+++ b/lexlua/awk.lua
@@ -160,7 +160,7 @@ local function scanString(input, index)
return i + 1
elseif input:sub(i, i) == BACKSLASH then
i = i + 1
- -- lexer.delimited_range() doesn't handle CRLF.
+ -- lexer.range() doesn't handle CRLF.
if input:sub(i, i + 1) == CRLF then i = i + 1 end
end
i = i + 1
@@ -229,18 +229,17 @@ lex:add_rule('comment', token(lexer.COMMENT, '#' * P(scanComment)))
lex:add_rule('string', token(lexer.STRING, DQUOTE * P(scanString)))
-- No leading sign because it might be binary.
-local float = ((lexer.digit^1 * ('.' * lexer.digit^0)^-1) +
- ('.' * lexer.digit^1)) *
- (S('eE') * S('+-')^-1 * lexer.digit^1)^-1
+local float =
+ ((lexer.digit^1 * ('.' * lexer.digit^0)^-1) + ('.' * lexer.digit^1)) *
+ (S('eE') * S('+-')^-1 * lexer.digit^1)^-1
-- Fields. E.g. $1, $a, $(x), $a(x), $a[x], $"1", $$a, etc.
-lex:add_rule('field',
- token('field', P('$') * S('$+-')^0 *
- (float +
- lexer.word^0 * '(' * P(scanFieldDelimiters) +
- lexer.word^1 * ('[' * P(scanFieldDelimiters))^-1 +
- '"' * P(scanString) +
- '/' * P(eatRegex) * '/')))
+lex:add_rule('field', token('field', P('$') * S('$+-')^0 * (
+ float +
+ lexer.word^0 * '(' * P(scanFieldDelimiters) +
+ lexer.word^1 * ('[' * P(scanFieldDelimiters))^-1 +
+ '"' * P(scanString) +
+ '/' * P(eatRegex) * '/')))
lex:add_style('field', lexer.STYLE_LABEL)
-- Regular expressions.
@@ -250,18 +249,18 @@ lex:add_style('field', lexer.STYLE_LABEL)
-- sequences like '\S', '\s' have special meanings with Gawk. Tokens that
-- contain them are displayed differently.
lex:add_rule('gawkRegex', token('gawkRegex', SLASH * P(scanGawkRegex)))
-lex:add_style('gawkRegex', lexer.STYLE_PREPROCESSOR..',underlined')
+lex:add_style('gawkRegex', lexer.STYLE_PREPROCESSOR .. ',underlined')
lex:add_rule('regex', token(lexer.REGEX, SLASH * P(scanRegex)))
-- Operators.
lex:add_rule('gawkOperator', token('gawkOperator', P("|&") + "@" + "**=" +
- "**"))
-lex:add_style('gawkOperator', lexer.STYLE_OPERATOR..',underlined')
+ "**"))
+lex:add_style('gawkOperator', lexer.STYLE_OPERATOR .. ',underlined')
lex:add_rule('operator', token(lexer.OPERATOR, S('!%&()*+,-/:;<=>?[\\]^{|}~')))
-- Numbers.
lex:add_rule('gawkNumber', token('gawkNumber', lexer.hex_num + lexer.oct_num))
-lex:add_style('gawkNumber', lexer.STYLE_NUMBER..',underlined')
+lex:add_style('gawkNumber', lexer.STYLE_NUMBER .. ',underlined')
lex:add_rule('number', token(lexer.NUMBER, float))
-- Keywords.
@@ -282,7 +281,7 @@ lex:add_rule('gawkBuiltInVariable', token('gawkBuiltInVariable', word_match[[
ARGIND BINMODE ERRNO FIELDWIDTHS FPAT FUNCTAB IGNORECASE LINT PREC PROCINFO
ROUNDMODE RT SYMTAB TEXTDOMAIN
]]))
-lex:add_style('gawkBuiltInVariable', lexer.STYLE_CONSTANT..',underlined')
+lex:add_style('gawkBuiltInVariable', lexer.STYLE_CONSTANT .. ',underlined')
-- Functions.
lex:add_rule('function', token(lexer.FUNCTION, lexer.word * #P('(')))
diff --git a/lexlua/bash.lua b/lexlua/bash.lua
index d3a3953e8..fb214bca8 100644
--- a/lexlua/bash.lua
+++ b/lexlua/bash.lua
@@ -23,29 +23,28 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-local sq_str = lexer.delimited_range("'", false, true)
-local dq_str = lexer.delimited_range('"')
-local ex_str = lexer.delimited_range('`')
+local sq_str = lexer.range("'", false, false)
+local dq_str = lexer.range('"')
+local ex_str = lexer.range('`')
local heredoc = '<<' * P(function(input, index)
- local s, e, _, delimiter =
- input:find('%-?(["\']?)([%a_][%w_]*)%1[\n\r\f;]+', index)
- if s == index and delimiter then
- local _, e = input:find('[\n\r\f]+'..delimiter, e)
- return e and e + 1 or #input + 1
- end
+ local _, e, _, delimiter = input:find(
+ '^%-?(["\']?)([%a_][%w_]*)%1[\n\r\f;]+', index)
+ if not delimiter then return end
+ _, e = input:find('[\n\r\f]+' .. delimiter, e)
+ return e and e + 1 or #input + 1
end)
lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + ex_str + heredoc))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Variables.
-lex:add_rule('variable', token(lexer.VARIABLE,
- '$' * (S('!#?*@$') + lexer.digit^1 + lexer.word +
- lexer.delimited_range('{}', true, true))))
+lex:add_rule('variable', token(lexer.VARIABLE, '$' * (
+ S('!#?*@$') + lexer.digit^1 + lexer.word + lexer.range('{', '}', true)
+)))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/*^&|~.,:;?()[]{}')))
diff --git a/lexlua/batch.lua b/lexlua/batch.lua
index a8dcadda9..314f4cd3a 100644
--- a/lexlua/batch.lua
+++ b/lexlua/batch.lua
@@ -26,19 +26,19 @@ lex:add_rule('function', token(lexer.FUNCTION, word_match([[
]], true)))
-- Comments.
-local rem = (P('REM') + 'rem') * lexer.space
-lex:add_rule('comment', token(lexer.COMMENT, (rem + '::') * lexer.nonnewline^0))
+local rem = (P('REM') + 'rem') * #lexer.space
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol(rem + '::')))
-- Identifiers.
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"', true)))
+lex:add_rule('string', token(lexer.STRING, lexer.range('"', true)))
-- Variables.
-lex:add_rule('variable', token(lexer.VARIABLE,
- '%' * (lexer.digit + '%' * lexer.alpha) +
- lexer.delimited_range('%', true, true)))
+local arg = '%' * lexer.digit + '%~' * lexer.alnum^1
+local variable = lexer.range('%', true, false)
+lex:add_rule('variable', token(lexer.VARIABLE, arg + variable))
-- Labels.
lex:add_rule('label', token(lexer.LABEL, ':' * lexer.word))
diff --git a/lexlua/bibtex.lua b/lexlua/bibtex.lua
index 162156103..0eee5801d 100644
--- a/lexlua/bibtex.lua
+++ b/lexlua/bibtex.lua
@@ -22,9 +22,9 @@ lex:add_style('field', lexer.STYLE_CONSTANT)
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING,
- lexer.delimited_range('"') +
- lexer.delimited_range('{}', false, true, true)))
+local dq_str = lexer.range('"')
+local br_str = lexer.range('{', '}', false, false, true)
+lex:add_rule('string', token(lexer.STRING, dq_str + br_str))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S(',=')))
diff --git a/lexlua/boo.lua b/lexlua/boo.lua
index 907f72c19..926351f04 100644
--- a/lexlua/boo.lua
+++ b/lexlua/boo.lua
@@ -41,22 +41,23 @@ lex:add_rule('function', token(lexer.FUNCTION, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-local sq_str = lexer.delimited_range("'", true)
-local dq_str = lexer.delimited_range('"', true)
-local triple_dq_str = '"""' * (lexer.any - '"""')^0 * P('"""')^-1
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+local tq_str = lexer.range('"""')
+local string = token(lexer.STRING, tq_str + sq_str + dq_str)
local regex_str = #P('/') * lexer.last_char_includes('!%^&*([{-=+|:;,?<>~') *
- lexer.delimited_range('/', true)
-lex:add_rule('string', token(lexer.STRING, triple_dq_str + sq_str + dq_str) +
- token(lexer.REGEX, regex_str))
+ lexer.range('/', true)
+local regex = token(lexer.REGEX, regex_str)
+lex:add_rule('string', string + regex)
-- Comments.
-local line_comment = '#' * lexer.nonnewline_esc^0
-local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1
+local line_comment = lexer.to_eol('#', true)
+local block_comment = lexer.range('/*', '*/')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) *
- (S('msdhsfFlL') + 'ms')^-1))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number *
+ (S('msdhsfFlL') + 'ms')^-1))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~`')))
diff --git a/lexlua/caml.lua b/lexlua/caml.lua
index 5d668d133..a65d5552a 100644
--- a/lexlua/caml.lua
+++ b/lexlua/caml.lua
@@ -47,14 +47,16 @@ lex:add_rule('function', token(lexer.FUNCTION, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) +
- lexer.delimited_range('"', true)))
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, lexer.nested_pair('(*', '*)')))
+lex:add_rule('comment', token(lexer.COMMENT,
+ lexer.range('(*', '*)', false, false, true)))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('=<>+-*/.,:;~!#%^&|?[](){}')))
diff --git a/lexlua/chuck.lua b/lexlua/chuck.lua
index 1db6913d9..2f918ed73 100644
--- a/lexlua/chuck.lua
+++ b/lexlua/chuck.lua
@@ -48,23 +48,23 @@ lex:add_style('time', lexer.STYLE_NUMBER)
-- Special special value.
lex:add_rule('now', token('now', P('now')))
-lex:add_style('now', lexer.STYLE_CONSTANT..',bold')
+lex:add_style('now', lexer.STYLE_CONSTANT .. ',bold')
-- Strings.
-local sq_str = P('L')^-1 * lexer.delimited_range("'", true)
-local dq_str = P('L')^-1 * lexer.delimited_range('"', true)
+local sq_str = P('L')^-1 * lexer.range("'", true)
+local dq_str = P('L')^-1 * lexer.range('"', true)
lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Identifiers.
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Comments.
-local line_comment = '//' * lexer.nonnewline_esc^0
-local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}@')))
diff --git a/lexlua/cmake.lua b/lexlua/cmake.lua
index ef3b598b0..2c78d5d25 100644
--- a/lexlua/cmake.lua
+++ b/lexlua/cmake.lua
@@ -111,7 +111,7 @@ lex:add_rule('variable', token(lexer.VARIABLE, word_match[[
MINGW MSVC MSVC60 MSVC70 MSVC71 MSVC80 MSVC_IDE POST_BUILD PRE_BUILD
PROJECT_BINARY_DIR PROJECT_NAME PROJECT_SOURCE_DIR RUN_CONFIGURE TARGET
UNIX WIN32
-]] + P('$') * lexer.delimited_range('{}', false, true)))
+]] + P('$') * lexer.range('{', '}')))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, word_match[[
@@ -123,10 +123,10 @@ lex:add_rule('operator', token(lexer.OPERATOR, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"')))
+lex:add_rule('string', token(lexer.STRING, lexer.range('"')))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
-- Fold points.
lex:add_fold_point(lexer.KEYWORD, 'IF', 'ENDIF')
diff --git a/lexlua/coffeescript.lua b/lexlua/coffeescript.lua
index 62023be60..b5edd1120 100644
--- a/lexlua/coffeescript.lua
+++ b/lexlua/coffeescript.lua
@@ -20,25 +20,27 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
-- Fields: object properties and methods.
lex:add_rule('field', token(lexer.FUNCTION, '.' * (S('_$') + lexer.alpha) *
- (S('_$') + lexer.alnum)^0))
+ (S('_$') + lexer.alnum)^0))
-- Identifiers.
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+local string = token(lexer.STRING, sq_str + dq_str)
local regex_str = #P('/') * lexer.last_char_includes('+-*%<>!=^&|?~:;,([{') *
- lexer.delimited_range('/', true) * S('igm')^0
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") +
- lexer.delimited_range('"')) +
- token(lexer.REGEX, regex_str))
+ lexer.range('/', true) * S('igm')^0
+local regex = token(lexer.REGEX, regex_str)
+lex:add_rule('string', string + regex)
-- Comments.
-local block_comment = '###' * (lexer.any - '###')^0 * P('###')^-1
-local line_comment = '#' * lexer.nonnewline_esc^0
+local block_comment = lexer.range('###')
+local line_comment = lexer.to_eol('#', true)
lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;,.()[]{}')))
diff --git a/lexlua/context.lua b/lexlua/context.lua
index 87811164b..904354c55 100644
--- a/lexlua/context.lua
+++ b/lexlua/context.lua
@@ -12,32 +12,31 @@ local beginend = (P('begin') + 'end')
local startstop = (P('start') + 'stop')
-- Whitespace.
-local ws = token(lexer.WHITESPACE, lexer.space^1)
-lex:add_rule('whitespace', ws)
+lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Comments.
-local comment = token(lexer.COMMENT, '%' * lexer.nonnewline^0)
-lex:add_rule('comment', comment)
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('%')))
-- Sections.
local wm_section = word_match[[
chapter part section subject subsection subsubject subsubsection subsubsubject
subsubsubsection subsubsubsubject title
]]
-local section = token(lexer.CLASS,
- '\\' * (wm_section + (startstop * wm_section)))
+local section = token(lexer.CLASS, '\\' *
+ (wm_section + (startstop * wm_section)))
lex:add_rule('section', section)
-- TeX and ConTeXt mkiv environments.
-local environment = token(lexer.STRING,
- '\\' * (beginend + startstop) * lexer.alpha^1)
+local environment = token(lexer.STRING, '\\' * (beginend + startstop) *
+ lexer.alpha^1)
lex:add_rule('environment', environment)
-- Commands.
-local command = token(lexer.KEYWORD,
- '\\' * (lexer.alpha^1 * P('\\') * lexer.space^1 +
- lexer.alpha^1 +
- S('!"#$%&\',./;=[\\]_{|}~`^-')))
+local command = token(lexer.KEYWORD, '\\' * (
+ lexer.alpha^1 * P('\\') * lexer.space^1 +
+ lexer.alpha^1 +
+ S('!"#$%&\',./;=[\\]_{|}~`^-')
+))
lex:add_rule('command', command)
-- Operators.
diff --git a/lexlua/cpp.lua b/lexlua/cpp.lua
index e501ce99c..61d18454f 100644
--- a/lexlua/cpp.lua
+++ b/lexlua/cpp.lua
@@ -8,7 +8,8 @@ local P, R, S = lpeg.P, lpeg.R, lpeg.S
local lex = lexer.new('cpp')
-- Whitespace.
-lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
+local ws = token(lexer.WHITESPACE, lexer.space^1)
+lex:add_rule('whitespace', ws)
-- Keywords.
lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
@@ -33,16 +34,16 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[
]]))
-- Strings.
-local sq_str = P('L')^-1 * lexer.delimited_range("'", true)
-local dq_str = P('L')^-1 * lexer.delimited_range('"', true)
+local sq_str = P('L')^-1 * lexer.range("'", true)
+local dq_str = P('L')^-1 * lexer.range('"', true)
lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Identifiers.
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Comments.
-local line_comment = '//' * lexer.nonnewline_esc^0
-local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
@@ -53,17 +54,13 @@ local integer = S('+-')^-1 * (hex + bin + dec)
lex:add_rule('number', token(lexer.NUMBER, lexer.float + integer))
-- Preprocessor.
-local preproc_word = word_match[[
+local include = token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * 'include') *
+ (ws * token(lexer.STRING, lexer.range('<', '>', true)))^-1
+local preproc = token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * word_match[[
define elif else endif error if ifdef ifndef import line pragma undef using
warning
-]]
-lex:add_rule('preprocessor',
- #lexer.starts_line('#') *
- (token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * preproc_word) +
- token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * 'include') *
- (token(lexer.WHITESPACE, S('\t ')^1) *
- token(lexer.STRING,
- lexer.delimited_range('<>', true, true)))^-1))
+]])
+lex:add_rule('preprocessor', include + preproc)
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;,.()[]{}')))
diff --git a/lexlua/crystal.lua b/lexlua/crystal.lua
index 6f81dcc7e..85e111e90 100644
--- a/lexlua/crystal.lua
+++ b/lexlua/crystal.lua
@@ -40,9 +40,9 @@ local literal_delimitted = P(function(input, index)
if delimiter_matches[delimiter] then
-- Handle nested delimiter/matches in strings.
local s, e = delimiter, delimiter_matches[delimiter]
- patt = lexer.delimited_range(s..e, false, false, true)
+ patt = lexer.range(s, e, false, true, true)
else
- patt = lexer.delimited_range(delimiter)
+ patt = lexer.range(delimiter)
end
match_pos = lpeg.match(patt, input, index)
return match_pos or #input + 1
@@ -50,27 +50,27 @@ local literal_delimitted = P(function(input, index)
end)
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline_esc^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#', true)))
-- Strings.
-local cmd_str = lexer.delimited_range('`')
-local sq_str = lexer.delimited_range("'")
-local dq_str = lexer.delimited_range('"')
+local cmd_str = lexer.range('`')
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
local heredoc = '<<' * P(function(input, index)
- local s, e, indented, _, delimiter =
- input:find('(%-?)(["`]?)([%a_][%w_]*)%2[\n\r\f;]+', index)
- if s == index and delimiter then
- local end_heredoc = (#indented > 0 and '[\n\r\f]+ *' or '[\n\r\f]+')
- local _, e = input:find(end_heredoc..delimiter, e)
- return e and e + 1 or #input + 1
- end
+ local _, e, indented, _, delimiter = input:find(
+ '^(%-?)(["`]?)([%a_][%w_]*)%2[\n\r\f;]+', index)
+ if not delimiter then return end
+ local end_heredoc = (#indented > 0 and '[\n\r\f]+ *' or '[\n\r\f]+')
+ _, e = input:find(end_heredoc .. delimiter, e)
+ return e and e + 1 or #input + 1
end)
+local string = token(lexer.STRING, (sq_str + dq_str + heredoc + cmd_str) *
+ S('f')^-1)
-- TODO: regex_str fails with `obj.method /patt/` syntax.
local regex_str = #P('/') * lexer.last_char_includes('!%^&*([{-=+|:;,?<>~') *
- lexer.delimited_range('/', true, false) * S('iomx')^0
-lex:add_rule('string', token(lexer.STRING, (sq_str + dq_str + heredoc +
- cmd_str) * S('f')^-1) +
- token(lexer.REGEX, regex_str))
+ lexer.range('/', true) * S('iomx')^0
+local regex = token(lexer.REGEX, regex_str)
+lex:add_rule('string', string + regex)
-- Numbers.
local dec = lexer.digit^1 * ('_' * lexer.digit^1)^0 * S('ri')^-1
@@ -79,15 +79,18 @@ local integer = S('+-')^-1 * (bin + lexer.hex_num + lexer.oct_num + dec)
-- TODO: meta, control, etc. for numeric_literal.
local numeric_literal = '?' * (lexer.any - lexer.space) * -word_char
lex:add_rule('number', token(lexer.NUMBER, lexer.float * S('ri')^-1 + integer +
- numeric_literal))
+ numeric_literal))
-- Variables.
-local global_var = '$' * (word + S('!@L+`\'=~/\\,.;<>_*"$?:') + lexer.digit +
- '-' * S('0FadiIKlpvw'))
+local global_var = '$' * (
+ word + S('!@L+`\'=~/\\,.;<>_*"$?:') +
+ lexer.digit +
+ '-' * S('0FadiIKlpvw')
+)
local class_var = '@@' * word
local inst_var = '@' * word
lex:add_rule('variable', token(lexer.VARIABLE, global_var + class_var +
- inst_var))
+ inst_var))
-- Symbols.
lex:add_rule('symbol', token('symbol', ':' * P(function(input, index)
@@ -101,7 +104,7 @@ lex:add_rule('operator', token(lexer.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~')))
-- Fold points.
local function disambiguate(text, pos, line, s)
return line:sub(1, s - 1):match('^%s*$') and
- not text:sub(1, pos - 1):match('\\[ \t]*\r?\n$') and 1 or 0
+ not text:sub(1, pos - 1):match('\\[ \t]*\r?\n$') and 1 or 0
end
lex:add_fold_point(lexer.KEYWORD, 'begin', 'end')
lex:add_fold_point(lexer.KEYWORD, 'case', 'end')
diff --git a/lexlua/csharp.lua b/lexlua/csharp.lua
index 83f17e411..3a63a20cf 100644
--- a/lexlua/csharp.lua
+++ b/lexlua/csharp.lua
@@ -31,27 +31,24 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Comments.
-local line_comment = '//' * lexer.nonnewline_esc^0
-local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Strings.
-local sq_str = lexer.delimited_range("'", true)
-local dq_str = lexer.delimited_range('"', true)
-local ml_str = P('@')^-1 * lexer.delimited_range('"', false, true)
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+local ml_str = P('@')^-1 * lexer.range('"', false, false)
lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + ml_str))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) *
- S('lLdDfFMm')^-1))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('lLdDfFmM')^-1))
-- Preprocessor.
-local preproc_word = word_match[[
- define elif else endif error if line undef warning region endregion
-]]
-lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, lexer.starts_line('#') *
- S('\t ')^0 *
- preproc_word))
+lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, '#' * S('\t ')^0 *
+ word_match[[
+ define elif else endif error if line undef warning region endregion
+ ]]))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('~!.,:;+-*/<>=\\^|&%?()[]{}')))
diff --git a/lexlua/css.lua b/lexlua/css.lua
index 521540d2c..c11833750 100644
--- a/lexlua/css.lua
+++ b/lexlua/css.lua
@@ -124,8 +124,8 @@ lex:add_rule('color', token('color', word_match[[
lex:add_style('color', lexer.STYLE_NUMBER)
-- Identifiers.
-lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.alpha *
- (lexer.alnum + S('_-'))^0))
+local word = lexer.alpha * (lexer.alnum + S('_-'))^0
+lex:add_rule('identifier', token(lexer.IDENTIFIER, word))
-- Pseudo classes and pseudo elements.
lex:add_rule('pseudoclass', ':' * token('pseudoclass', word_match[[
@@ -141,12 +141,12 @@ lex:add_rule('pseudoelement', '::' * token('pseudoelement', word_match[[
lex:add_style('pseudoelement', lexer.STYLE_CONSTANT)
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") +
- lexer.delimited_range('"')))
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '/*' * (lexer.any - '*/')^0 *
- P('*/')^-1))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.range('/*', '*/')))
-- Numbers.
local unit = token('unit', word_match[[
diff --git a/lexlua/dart.lua b/lexlua/dart.lua
index c9aa80200..fca648485 100644
--- a/lexlua/dart.lua
+++ b/lexlua/dart.lua
@@ -25,22 +25,21 @@ lex:add_rule('builtin', token(lexer.CONSTANT, word_match[[
]]))
-- Strings.
-local sq_str = S('r')^-1 * lexer.delimited_range("'", true)
-local dq_str = S('r')^-1 * lexer.delimited_range('"', true)
-local sq_str_multiline = S('r')^-1 * "'''" * (lexer.any - "'''")^0 * P("'''")^-1
-local dq_str_multiline = S('r')^-1 * '"""' * (lexer.any - '"""')^0 * P('"""')^-1
-lex:add_rule('string', token(lexer.STRING, sq_str_multiline + dq_str_multiline +
- sq_str + dq_str))
+local sq_str = S('r')^-1 * lexer.range("'", true)
+local dq_str = S('r')^-1 * lexer.range('"', true)
+local tq_str = S('r')^-1 * (lexer.range("'''") + lexer.range('"""'))
+lex:add_rule('string', token(lexer.STRING, tq_str + sq_str + dq_str))
-- Identifiers.
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '//' * lexer.nonnewline_esc^0 +
- lexer.nested_pair('/*', '*/')))
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/', false, false, true)
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.hex_num))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('#?=!<>+-*$/%&|^~.,;()[]{}')))
diff --git a/lexlua/desktop.lua b/lexlua/desktop.lua
index f8dca5c00..8229d2c75 100644
--- a/lexlua/desktop.lua
+++ b/lexlua/desktop.lua
@@ -23,28 +23,27 @@ lex:add_rule('value', token('value', word_match[[true false]]))
lex:add_style('value', lexer.STYLE_CONSTANT)
-- Identifiers.
-lex:add_rule('identifier', lexer.token(lexer.IDENTIFIER,
- lexer.alpha * (lexer.alnum + S('_-'))^0))
+local word = lexer.alpha * (lexer.alnum + S('_-'))^0
+lex:add_rule('identifier', lexer.token(lexer.IDENTIFIER, word))
+
+local bracketed = lexer.range('[', ']')
-- Group headers.
-lex:add_rule('header',
- lexer.starts_line(token('header',
- lexer.delimited_range('[]', false, true))))
+lex:add_rule('header', lexer.starts_line(token('header', bracketed)))
lex:add_style('header', lexer.STYLE_LABEL)
-- Locales.
-lex:add_rule('locale', token('locale',
- lexer.delimited_range('[]', false, true)))
+lex:add_rule('locale', token('locale', bracketed))
lex:add_style('locale', lexer.STYLE_CLASS)
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"')))
+lex:add_rule('string', token(lexer.STRING, lexer.range('"')))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer)))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Field codes.
lex:add_rule('code', lexer.token('code', P('%') * S('fFuUdDnNickvm')))
diff --git a/lexlua/diff.lua b/lexlua/diff.lua
index 19c110938..2b3b171f1 100644
--- a/lexlua/diff.lua
+++ b/lexlua/diff.lua
@@ -10,14 +10,14 @@ local lex = lexer.new('diff', {lex_by_line = true})
-- Text, separators, and file headers.
lex:add_rule('index', token(lexer.COMMENT, 'Index: ' * lexer.any^0 * -1))
lex:add_rule('separator', token(lexer.COMMENT, ('---' + P('*')^4 + P('=')^1) *
- lexer.space^0 * -1))
+ lexer.space^0 * -1))
lex:add_rule('header', token('header', (P('*** ') + '--- ' + '+++ ') *
- lexer.any^1))
+ lexer.any^1))
lex:add_style('header', lexer.STYLE_COMMENT)
-- Location.
lex:add_rule('location', token(lexer.NUMBER, ('@@' + lexer.digit^1 + '****') *
- lexer.any^1))
+ lexer.any^1))
-- Additions, deletions, and changes.
lex:add_rule('addition', token('addition', S('>+') * lexer.any^0))
diff --git a/lexlua/django.lua b/lexlua/django.lua
index 74a6a7da2..96a9b454b 100644
--- a/lexlua/django.lua
+++ b/lexlua/django.lua
@@ -32,16 +32,15 @@ lex:add_rule('function', token(lexer.FUNCTION, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING,
- lexer.delimited_range('"', false, true)))
+lex:add_rule('string', token(lexer.STRING, lexer.range('"', false, false)))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S(':,.|')))
-- Embed Django in HTML.
local html = lexer.load('html')
-local html_comment = '<!--' * (lexer.any - '-->')^0 * P('-->')^-1
-local django_comment = '{#' * (lexer.any - lexer.newline - '#}')^0 * P('#}')^-1
+local html_comment = lexer.range('<!--', '-->')
+local django_comment = lexer.range('{#', '#}', true)
html:modify_rule('comment', token(lexer.COMMENT, html_comment + django_comment))
local django_start_rule = token('django_tag', '{' * S('{%'))
local django_end_rule = token('django_tag', S('%}') * '}')
diff --git a/lexlua/dmd.lua b/lexlua/dmd.lua
index 08757a594..968b01e6c 100644
--- a/lexlua/dmd.lua
+++ b/lexlua/dmd.lua
@@ -12,25 +12,25 @@ local M = {_NAME = 'dmd'}
local ws = token(lexer.WHITESPACE, lexer.space^1)
-- Comments.
-local line_comment = '//' * lexer.nonnewline_esc^0
-local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1
-local nested_comment = lexer.nested_pair('/+', '+/')
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
+local nested_comment = lexer.range('/+', '+/', false, false, true)
local comment = token(lexer.COMMENT, line_comment + block_comment +
- nested_comment)
+ nested_comment)
-- Strings.
-local sq_str = lexer.delimited_range("'", true) * S('cwd')^-1
-local dq_str = lexer.delimited_range('"') * S('cwd')^-1
-local lit_str = 'r' * lexer.delimited_range('"', false, true) * S('cwd')^-1
-local bt_str = lexer.delimited_range('`', false, true) * S('cwd')^-1
-local hex_str = 'x' * lexer.delimited_range('"') * S('cwd')^-1
+local sq_str = lexer.range("'", true) * S('cwd')^-1
+local dq_str = lexer.range('"') * S('cwd')^-1
+local lit_str = 'r' * lexer.range('"', false, false) * S('cwd')^-1
+local bt_str = lexer.range('`', false, false) * S('cwd')^-1
+local hex_str = 'x' * lexer.range('"') * S('cwd')^-1
local other_hex_str = '\\x' * (lexer.xdigit * lexer.xdigit)^1
-local del_str = lexer.nested_pair('q"[', ']"') * S('cwd')^-1 +
- lexer.nested_pair('q"(', ')"') * S('cwd')^-1 +
- lexer.nested_pair('q"{', '}"') * S('cwd')^-1 +
- lexer.nested_pair('q"<', '>"') * S('cwd')^-1
-local string = token(lexer.STRING, del_str + sq_str + dq_str + lit_str +
- bt_str + hex_str + other_hex_str)
+local str = sq_str + dq_str + lit_str + bt_str + hex_str + other_hex_str
+for left, right in pairs{['['] = ']', ['('] = ')', ['{'] = '}', ['<'] = '>'} do
+ str = str + lexer.range('q"' .. left, right .. '"', false, false, true) *
+ S('cwd')^-1
+end
+local string = token(lexer.STRING, str)
-- Numbers.
local dec = lexer.digit^1 * ('_' * lexer.digit^1)^0
@@ -72,7 +72,7 @@ local constant = token(lexer.CONSTANT, word_match{
})
local class_sequence = token(lexer.TYPE, P('class') + P('struct')) * ws^1 *
- token(lexer.CLASS, lexer.word)
+ token(lexer.CLASS, lexer.word)
-- Identifiers.
local identifier = token(lexer.IDENTIFIER, lexer.word)
@@ -126,17 +126,17 @@ local versions_list = token('versions', word_match{
})
local versions = token(lexer.KEYWORD, 'version') * lexer.space^0 *
- token(lexer.OPERATOR, '(') * lexer.space^0 * versions_list
+ token(lexer.OPERATOR, '(') * lexer.space^0 * versions_list
local scopes = token(lexer.KEYWORD, 'scope') * lexer.space^0 *
- token(lexer.OPERATOR, '(') * lexer.space^0 * scopes_list
+ token(lexer.OPERATOR, '(') * lexer.space^0 * scopes_list
local traits = token(lexer.KEYWORD, '__traits') * lexer.space^0 *
- token(lexer.OPERATOR, '(') * lexer.space^0 * traits_list
+ token(lexer.OPERATOR, '(') * lexer.space^0 * traits_list
-local func = token(lexer.FUNCTION, lexer.word) *
- #(lexer.space^0 * (P('!') * lexer.word^-1 * lexer.space^-1)^-1 *
- P('('))
+local func = token(lexer.FUNCTION, lexer.word) * #(
+ lexer.space^0 * (P('!') * lexer.word^-1 * lexer.space^-1)^-1 * P('(')
+)
M._rules = {
{'whitespace', ws},
diff --git a/lexlua/dockerfile.lua b/lexlua/dockerfile.lua
index ddf9e53f8..6aeef3a06 100644
--- a/lexlua/dockerfile.lua
+++ b/lexlua/dockerfile.lua
@@ -20,20 +20,19 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Variable.
-lex:add_rule('variable', token(lexer.VARIABLE,
- S('$')^1 * (S('{')^1 * lexer.word * S('}')^1 +
- lexer.word)))
+lex:add_rule('variable', token(lexer.VARIABLE, S('$')^1 *
+ (S('{')^1 * lexer.word * S('}')^1 + lexer.word)))
-- Strings.
-local sq_str = lexer.delimited_range("'", false, true)
-local dq_str = lexer.delimited_range('"')
+local sq_str = lexer.range("'", false, false)
+local dq_str = lexer.range('"')
lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('\\[],=:{}')))
diff --git a/lexlua/dot.lua b/lexlua/dot.lua
index e71079cbf..906ce6b48 100644
--- a/lexlua/dot.lua
+++ b/lexlua/dot.lua
@@ -32,12 +32,13 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") +
- lexer.delimited_range('"')))
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Comments.
-local line_comment = '//' * lexer.nonnewline_esc^0
-local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
diff --git a/lexlua/eiffel.lua b/lexlua/eiffel.lua
index 1bc134a89..7dda50bad 100644
--- a/lexlua/eiffel.lua
+++ b/lexlua/eiffel.lua
@@ -29,14 +29,15 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) +
- lexer.delimited_range('"', true)))
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '--' * lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('--')))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/*%&|^~.,:;?()[]{}')))
diff --git a/lexlua/elixir.lua b/lexlua/elixir.lua
index cc9675d78..18c62aaa9 100644
--- a/lexlua/elixir.lua
+++ b/lexlua/elixir.lua
@@ -12,58 +12,56 @@ local lex = lexer.new('elixir', {fold_by_indentation = true})
lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Sigils.
-local sigil11 = P("~") * S("CRSW") * lexer.delimited_range('<>', false, true)
-local sigil12 = P("~") * S("CRSW") * lexer.delimited_range('{}', false, true)
-local sigil13 = P("~") * S("CRSW") * lexer.delimited_range('[]', false, true)
-local sigil14 = P("~") * S("CRSW") * lexer.delimited_range('()', false, true)
-local sigil15 = P("~") * S("CRSW") * lexer.delimited_range('|', false, true)
-local sigil16 = P("~") * S("CRSW") * lexer.delimited_range('/', false, true)
-local sigil17 = P("~") * S("CRSW") * lexer.delimited_range('"', false, true)
-local sigil18 = P("~") * S("CRSW") * lexer.delimited_range("'", false, true)
-local sigil19 = P("~") * S("CRSW") * '"""' * (lexer.any - '"""')^0 * P('"""')^-1
-local sigil10 = P("~") * S("CRSW") * "'''" * (lexer.any - "'''")^0 * P("'''")^-1
-local sigil21 = P("~") * S("crsw") * lexer.delimited_range('<>', false, false)
-local sigil22 = P("~") * S("crsw") * lexer.delimited_range('{}', false, false)
-local sigil23 = P("~") * S("crsw") * lexer.delimited_range('[]', false, false)
-local sigil24 = P("~") * S("crsw") * lexer.delimited_range('()', false, false)
-local sigil25 = P("~") * S("crsw") * lexer.delimited_range('|', false, false)
-local sigil26 = P("~") * S("crsw") * lexer.delimited_range('/', false, false)
-local sigil27 = P("~") * S("crsw") * lexer.delimited_range('"', false, false)
-local sigil28 = P("~") * S("crsw") * lexer.delimited_range("'", false, false)
-local sigil29 = P("~") * S("csrw") * '"""' * (lexer.any - '"""')^0 * P('"""')^-1
-local sigil20 = P("~") * S("csrw") * "'''" * (lexer.any - "'''")^0 * P("'''")^-1
+local sigil11 = P("~") * S("CRSW") * lexer.range('<', '>', false)
+local sigil12 = P("~") * S("CRSW") * lexer.range('{', '}', false)
+local sigil13 = P("~") * S("CRSW") * lexer.range('[', ']', false)
+local sigil14 = P("~") * S("CRSW") * lexer.range('(', ')', false)
+local sigil15 = P("~") * S("CRSW") * lexer.range('|', false, false)
+local sigil16 = P("~") * S("CRSW") * lexer.range('/', false, false)
+local sigil17 = P("~") * S("CRSW") * lexer.range('"', false, false)
+local sigil18 = P("~") * S("CRSW") * lexer.range("'", false, false)
+local sigil19 = P("~") * S("CRSW") * lexer.range('"""')
+local sigil10 = P("~") * S("CRSW") * lexer.range("'''")
+local sigil21 = P("~") * S("crsw") * lexer.range('<', '>', false, true)
+local sigil22 = P("~") * S("crsw") * lexer.range('{', '}', false, true)
+local sigil23 = P("~") * S("crsw") * lexer.range('[', ']', false, true)
+local sigil24 = P("~") * S("crsw") * lexer.range('(', ')', false, true)
+local sigil25 = P("~") * S("crsw") * lexer.range('|', false)
+local sigil26 = P("~") * S("crsw") * lexer.range('/', false)
+local sigil27 = P("~") * S("crsw") * lexer.range('"', false)
+local sigil28 = P("~") * S("crsw") * lexer.range("'", false)
+local sigil29 = P("~") * S("crsw") * lexer.range('"""')
+local sigil20 = P("~") * S("crsw") * lexer.range("'''")
local sigil_token = token(lexer.REGEX, sigil10 + sigil19 + sigil11 + sigil12 +
- sigil13 + sigil14 + sigil15 + sigil16 +
- sigil17 + sigil18 + sigil20 + sigil29 +
- sigil21 + sigil22 + sigil23 + sigil24 +
- sigil25 + sigil26 + sigil27 + sigil28)
+ sigil13 + sigil14 + sigil15 + sigil16 + sigil17 + sigil18 + sigil20 +
+ sigil29 + sigil21 + sigil22 + sigil23 + sigil24 + sigil25 + sigil26 +
+ sigil27 + sigil28)
local sigiladdon_token = token(lexer.EMBEDDED, R('az', 'AZ')^0)
lex:add_rule('sigil', sigil_token * sigiladdon_token)
-- Atoms.
-local atom1 = B(1 - P(':')) * P(':') * lexer.delimited_range('"', false)
+local atom1 = B(1 - P(':')) * P(':') * lexer.range('"', false)
local atom2 = B(1 - P(':')) * P(':') * R('az', 'AZ') *
- R('az', 'AZ', '__', '@@', '09')^0 * S('?!')^-1
+ R('az', 'AZ', '__', '@@', '09')^0 * S('?!')^-1
local atom3 = B(1 - R('az', 'AZ', '__', '09', '::')) *
- R('AZ') * R('az', 'AZ', '__', '@@', '09')^0 * S('?!')^-1
+ R('AZ') * R('az', 'AZ', '__', '@@', '09')^0 * S('?!')^-1
lex:add_rule('atom', token(lexer.CONSTANT, atom1 + atom2 + atom3))
-- Strings.
-local dq_str = lexer.delimited_range('"', false)
-local triple_dq_str = '"""' * (lexer.any - '"""')^0 * P('"""')^-1
+local dq_str = lexer.range('"')
+local triple_dq_str = lexer.range('"""')
lex:add_rule('string', token(lexer.STRING, triple_dq_str + dq_str))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline_esc^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#', true)))
-- Attributes.
lex:add_rule('attribute', token(lexer.LABEL, B(1 - R('az', 'AZ', '__')) *
- P('@') * R('az','AZ') *
- R('az','AZ','09','__')^0))
+ P('@') * R('az','AZ') * R('az','AZ','09','__')^0))
-- Booleans.
lex:add_rule('boolean', token(lexer.NUMBER, P(':')^-1 *
- word_match[[true false nil]]))
+ word_match[[true false nil]]))
-- Functions.
lex:add_rule('function', token(lexer.FUNCTION, word_match[[
@@ -83,16 +81,14 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
-- Operators
local operator1 = word_match[[and or not when xor in]]
local operator2 = P('!==') + '!=' + '!' + '=~' + '===' + '==' + '=' + '<<<' +
- '<<' + '<=' + '<-' + '<' + '>>>' + '>>' + '>=' + '>' + '->' +
- '--' + '-' + '++' + '+' + '&&&' + '&&' + '&' + '|||' + '||' +
- '|>' + '|' + '..' + '.' + '^^^' + '^' + '\\\\' + '::' + '*' +
- '/' + '~~~' + '@'
+ '<<' + '<=' + '<-' + '<' + '>>>' + '>>' + '>=' + '>' + '->' + '--' + '-' +
+ '++' + '+' + '&&&' + '&&' + '&' + '|||' + '||' + '|>' + '|' + '..' + '.' +
+ '^^^' + '^' + '\\\\' + '::' + '*' + '/' + '~~~' + '@'
lex:add_rule('operator', token(lexer.OPERATOR, operator1 + operator2))
-- Identifiers
lex:add_rule('identifier', token(lexer.IDENTIFIER, R('az', '__') *
- R('az', 'AZ', '__', '09')^0 *
- S('?!')^-1))
+ R('az', 'AZ', '__', '09')^0 * S('?!')^-1))
-- Numbers
local dec = lexer.digit * (lexer.digit + P("_"))^0
@@ -100,8 +96,8 @@ local bin = '0b' * S('01')^1
local oct = '0o' * R('07')^1
local integer = bin + lexer.hex_num + oct + dec
local float = lexer.digit^1 * P(".") * lexer.digit^1 * S("eE") *
- (S('+-')^-1 * lexer.digit^1)^-1
+ (S('+-')^-1 * lexer.digit^1)^-1
lex:add_rule('number', B(1 - R('az', 'AZ', '__')) * S('+-')^-1 *
- token(lexer.NUMBER, float + integer))
+ token(lexer.NUMBER, float + integer))
return lex
diff --git a/lexlua/erlang.lua b/lexlua/erlang.lua
index 943e31b36..0d2a3b19b 100644
--- a/lexlua/erlang.lua
+++ b/lexlua/erlang.lua
@@ -42,11 +42,11 @@ lex:add_rule('function', token(lexer.FUNCTION, word_match[[
-- Identifiers.
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.lower *
- ('_' + lexer.alnum)^0))
+ ('_' + lexer.alnum)^0))
-- Variables.
lex:add_rule('variable', token(lexer.VARIABLE, P('_')^0 * lexer.upper *
- ('_' + lexer.alnum)^0))
+ ('_' + lexer.alnum)^0))
-- Directives.
lex:add_rule('directive', token('directive', '-' * word_match[[
@@ -56,15 +56,16 @@ lex:add_rule('directive', token('directive', '-' * word_match[[
lex:add_style('directive', lexer.STYLE_PREPROCESSOR)
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) +
- lexer.delimited_range('"') +
- '$' * lexer.any * lexer.alnum^0))
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str +
+ '$' * lexer.any * lexer.alnum^0))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '%' * lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('%')))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('-<>.;=/|+*:,!()[]{}')))
diff --git a/lexlua/faust.lua b/lexlua/faust.lua
index c51956cc4..2e579dfd8 100644
--- a/lexlua/faust.lua
+++ b/lexlua/faust.lua
@@ -21,11 +21,11 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"', true)))
+lex:add_rule('string', token(lexer.STRING, lexer.range('"', true)))
-- Comments.
-local line_comment = '//' * lexer.nonnewline^0
-local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1
+local line_comment = lexer.to_eol('//')
+local block_comment = lexer.range('/*', '*/')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
@@ -37,11 +37,10 @@ lex:add_rule('number', token(lexer.NUMBER, flt + int))
-- Pragmas.
lex:add_rule('pragma', token(lexer.PREPROCESSOR, P('<mdoc>') *
- (lexer.any - P('</mdoc>'))^0 *
- P('</mdoc>')^-1))
+ (lexer.any - P('</mdoc>'))^0 * P('</mdoc>')^-1))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR,
- S('+-/*%<>~!=^&|?~:;,.()[]{}@#$`\\\'')))
+ S('+-/*%<>~!=^&|?~:;,.()[]{}@#$`\\\'')))
return lex
diff --git a/lexlua/fish.lua b/lexlua/fish.lua
index 6585e240b..5ec829391 100644
--- a/lexlua/fish.lua
+++ b/lexlua/fish.lua
@@ -25,24 +25,23 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Variables.
-lex:add_rule('variable', token(lexer.VARIABLE,
- '$' * (lexer.word +
- lexer.delimited_range('{}', true, true))))
+lex:add_rule('variable', token(lexer.VARIABLE, '$' * (lexer.word +
+ lexer.range('{', '}', true))))
-- Strings.
-local sq_str = lexer.delimited_range("'", false, true)
-local dq_str = lexer.delimited_range('"')
+local sq_str = lexer.range("'", false, false)
+local dq_str = lexer.range('"')
lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Shebang.
-lex:add_rule('shebang', token('shebang', '#!/' * lexer.nonnewline^0))
+lex:add_rule('shebang', token('shebang', lexer.to_eol('#!/')))
lex:add_style('shebang', lexer.STYLE_LABEL)
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/*^&|~.,:;?()[]{}')))
diff --git a/lexlua/forth.lua b/lexlua/forth.lua
index df1dcdcb5..7f1d92ace 100644
--- a/lexlua/forth.lua
+++ b/lexlua/forth.lua
@@ -12,14 +12,14 @@ local lex = lexer.new('forth')
lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Strings.
-local c_str = 'c' * lexer.delimited_range('"', true, true)
-local s_str = 's' * lexer.delimited_range('"', true, true)
-local s_bs_str = 's\\' * lexer.delimited_range('"', true, false)
-local dot_str = '.' * lexer.delimited_range('"', true, true)
-local dot_paren_str = '.' * lexer.delimited_range('()', true, true, false)
-local abort_str = 'abort' * lexer.delimited_range('"', true, true)
+local c_str = 'c' * lexer.range('"', true, false)
+local s_str = 's' * lexer.range('"', true, false)
+local s_bs_str = 's\\' * lexer.range('"', true)
+local dot_str = '.' * lexer.range('"', true, false)
+local dot_paren_str = '.' * lexer.range('(', ')', true)
+local abort_str = 'abort' * lexer.range('"', true, false)
lex:add_rule('string', token(lexer.STRING, c_str + s_str + s_bs_str + dot_str +
- dot_paren_str + abort_str))
+ dot_paren_str + abort_str))
-- Keywords.
lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
@@ -38,17 +38,17 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
]], true))
-- Identifiers.
-lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alnum +
- S('+-*=<>.?/\'%,_$#'))^1))
+lex:add_rule('identifier', token(lexer.IDENTIFIER,
+ (lexer.alnum + S('+-*=<>.?/\'%,_$#'))^1))
-- Comments.
-local line_comment = S('|\\') * lexer.nonnewline^0
-local block_comment = '(' * (lexer.any - ')')^0 * P(')')^-1
+local line_comment = lexer.to_eol(S('|\\'))
+local block_comment = lexer.range('(', ')')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
lex:add_rule('number', token(lexer.NUMBER, P('-')^-1 * lexer.digit^1 *
- (S('./') * lexer.digit^1)^-1))
+ (S('./') * lexer.digit^1)^-1))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S(':;<>+*-/[]#')))
diff --git a/lexlua/fortran.lua b/lexlua/fortran.lua
index a508a9625..e07068833 100644
--- a/lexlua/fortran.lua
+++ b/lexlua/fortran.lua
@@ -11,14 +11,13 @@ local lex = lexer.new('fortran')
lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Comments.
-local c_comment = lexer.starts_line(S('Cc')) * lexer.nonnewline^0
-local d_comment = lexer.starts_line(S('Dd')) * lexer.nonnewline^0
-local ex_comment = lexer.starts_line('!') * lexer.nonnewline^0
-local ast_comment = lexer.starts_line('*') * lexer.nonnewline^0
-local line_comment = '!' * lexer.nonnewline^0
+local c_comment = lexer.to_eol(lexer.starts_line(S('Cc')))
+local d_comment = lexer.to_eol(lexer.starts_line(S('Dd')))
+local ex_comment = lexer.to_eol(lexer.starts_line('!'))
+local ast_comment = lexer.to_eol(lexer.starts_line('*'))
+local line_comment = lexer.to_eol('!')
lex:add_rule('comment', token(lexer.COMMENT, c_comment + d_comment +
- ex_comment + ast_comment +
- line_comment))
+ ex_comment + ast_comment + line_comment))
-- Keywords.
lex:add_rule('keyword', token(lexer.KEYWORD, word_match([[
@@ -55,15 +54,14 @@ lex:add_rule('type', token(lexer.TYPE, word_match([[
]], true)))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) *
- -lexer.alpha))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number * -lexer.alpha))
-- Identifiers.
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.alnum^1))
-- Strings.
-local sq_str = lexer.delimited_range("'", true, true)
-local dq_str = lexer.delimited_range('"', true, true)
+local sq_str = lexer.range("'", true, false)
+local dq_str = lexer.range('"', true, false)
lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Operators.
diff --git a/lexlua/fsharp.lua b/lexlua/fsharp.lua
index d8ecdc628..b80bf37e4 100644
--- a/lexlua/fsharp.lua
+++ b/lexlua/fsharp.lua
@@ -34,26 +34,28 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) +
- lexer.delimited_range('"', true)))
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '//' * lexer.nonnewline^0 +
- lexer.nested_pair('(*', '*)')))
+local line_comment = lexer.to_eol('//')
+local block_comment = lexer.range('(*', '*)', false, false, true)
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, (lexer.float +
- lexer.integer * S('uUlL')^-1)))
+lex:add_rule('number', token(lexer.NUMBER,
+ (lexer.float + lexer.integer * S('uUlL')^-1)))
-- Preprocessor.
local preproc_word = word_match[[
else endif endregion if ifdef ifndef light region
]]
lex:add_rule('preproc', token(lexer.PREPROCESSOR, lexer.starts_line('#') *
- S('\t ')^0 * preproc_word))
+ S('\t ')^0 * preproc_word))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR,
- S('=<>+-*/^.,:;~!@#%^&|?[](){}')))
+ S('=<>+-*/^.,:;~!@#%^&|?[](){}')))
return lex
diff --git a/lexlua/gap.lua b/lexlua/gap.lua
index 49e69fb24..15cbc75dd 100644
--- a/lexlua/gap.lua
+++ b/lexlua/gap.lua
@@ -20,11 +20,12 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) +
- lexer.delimited_range('"', true)))
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
-- Numbers.
lex:add_rule('number', token(lexer.NUMBER, lexer.digit^1 * -lexer.alpha))
diff --git a/lexlua/gettext.lua b/lexlua/gettext.lua
index a51133c5e..3c06487b3 100644
--- a/lexlua/gettext.lua
+++ b/lexlua/gettext.lua
@@ -22,10 +22,9 @@ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
lex:add_rule('variable', token(lexer.VARIABLE, S('%$@') * lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"', true)))
+lex:add_rule('string', token(lexer.STRING, lexer.range('"', true)))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '#' * S(': .~') *
- lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#' * S(': .~'))))
return lex
diff --git a/lexlua/gherkin.lua b/lexlua/gherkin.lua
index 2fe38c57d..eedfe3436 100644
--- a/lexlua/gherkin.lua
+++ b/lexlua/gherkin.lua
@@ -16,26 +16,27 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
]]))
-- Strings.
-local doc_str = '"""' * (lexer.any - '"""')^0 * P('"""')^-1
-local dq_str = lexer.delimited_range('"')
+local doc_str = lexer.range('"""')
+local dq_str = lexer.range('"')
lex:add_rule('string', token(lexer.STRING, doc_str + dq_str))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
-- Numbers.
-local number = token(lexer.NUMBER, lexer.float + lexer.integer)
+local number = token(lexer.NUMBER, lexer.number)
-- Tags.
lex:add_rule('tag', token('tag', '@' * lexer.word^0))
lex:add_style('tag', lexer.STYLE_LABEL)
-- Placeholders.
-lex:add_rule('placeholder', token('placeholder', lexer.nested_pair('<', '>')))
+lex:add_rule('placeholder', token('placeholder',
+ lexer.range('<', '>', false, false, true)))
lex:add_style('placeholder', lexer.STYLE_VARIABLE)
-- Examples.
-lex:add_rule('example', token('example', '|' * lexer.nonnewline^0))
+lex:add_rule('example', token('example', lexer.to_eol('|')))
lex:add_style('example', lexer.STYLE_NUMBER)
return lex
diff --git a/lexlua/glsl.lua b/lexlua/glsl.lua
index 31440f0a6..d6bef2b44 100644
--- a/lexlua/glsl.lua
+++ b/lexlua/glsl.lua
@@ -19,21 +19,15 @@ lex:modify_rule('keyword', token(lexer.KEYWORD, word_match[[
]]) + lex:get_rule('keyword'))
-- Types.
-lex:modify_rule('type',
- token(lexer.TYPE,
- S('bdiu')^-1 * 'vec' * R('24') +
- P('d')^-1 * 'mat' * R('24') * ('x' * R('24')^-1) +
- S('iu')^-1 * 'sampler' * R('13') * 'D' +
- 'sampler' * R('12') * 'D' * P('Array')^-1 * 'Shadow' +
- S('iu')^-1 * 'sampler' * (R('12') * 'DArray' +
- word_match[[
- Cube 2DRect Buffer 2DMS 2DMSArray 2DMSCubeArray
- ]]) +
- word_match[[
- samplerCubeShadow sampler2DRectShadow
- samplerCubeArrayShadow
- ]]) +
- lex:get_rule('type') +
+lex:modify_rule('type', token(lexer.TYPE, S('bdiu')^-1 * 'vec' * R('24') +
+ P('d')^-1 * 'mat' * R('24') * ('x' * R('24')^-1) +
+ S('iu')^-1 * 'sampler' * R('13') * 'D' +
+ 'sampler' * R('12') * 'D' * P('Array')^-1 * 'Shadow' +
+ S('iu')^-1 * 'sampler' * (R('12') * 'DArray' + word_match[[
+ Cube 2DRect Buffer 2DMS 2DMSArray 2DMSCubeArray
+ ]]) +
+ word_match[[samplerCubeShadow sampler2DRectShadow samplerCubeArrayShadow]]) +
+ lex:get_rule('type') +
-- Functions.
token(lexer.FUNCTION, word_match[[
diff --git a/lexlua/gnuplot.lua b/lexlua/gnuplot.lua
index 5c14f49ae..ca9ca5bad 100644
--- a/lexlua/gnuplot.lua
+++ b/lexlua/gnuplot.lua
@@ -45,13 +45,13 @@ lex:add_rule('variable', token(lexer.VARIABLE, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") +
- lexer.delimited_range('"') +
- lexer.delimited_range('[]', true) +
- lexer.delimited_range('{}', true)))
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+local br_str = lexer.range('[', ']', true) + lexer.range('{', '}', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + br_str))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('-+~!$*%=<>&|^?:()')))
diff --git a/lexlua/go.lua b/lexlua/go.lua
index e4a04a3a6..397908a0a 100644
--- a/lexlua/go.lua
+++ b/lexlua/go.lua
@@ -37,19 +37,18 @@ lex:add_rule('function', token(lexer.FUNCTION, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-local sq_str = lexer.delimited_range("'", true)
-local dq_str = lexer.delimited_range('"', true)
-local raw_str = lexer.delimited_range('`', false, true)
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+local raw_str = lexer.range('`', false, false)
lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + raw_str))
-- Comments.
-local line_comment = '//' * lexer.nonnewline^0
-local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1
+local line_comment = lexer.to_eol('//')
+local block_comment = lexer.range('/*', '*/')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) *
- P('i')^-1))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number * P('i')^-1))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('+-*/%&|^<>=!:;.,()[]{}')))
diff --git a/lexlua/groovy.lua b/lexlua/groovy.lua
index 3d1398a38..07e0586a0 100644
--- a/lexlua/groovy.lua
+++ b/lexlua/groovy.lua
@@ -40,23 +40,22 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Comments.
-local line_comment = '//' * lexer.nonnewline_esc^0
-local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Strings.
-local sq_str = lexer.delimited_range("'")
-local dq_str = lexer.delimited_range('"')
-local triple_sq_str = "'''" * (lexer.any - "'''")^0 * P("'''")^-1
-local triple_dq_str = '"""' * (lexer.any - '"""')^0 * P('"""')^-1
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+local tq_str = lexer.range("'''") + lexer.range('"""')
+local string = token(lexer.STRING, tq_str + sq_str + dq_str)
local regex_str = #P('/') * lexer.last_char_includes('=~|!<>+-*?&,:;([{') *
- lexer.delimited_range('/', true)
-lex:add_rule('string', token(lexer.STRING, triple_sq_str + triple_dq_str +
- sq_str + dq_str) +
- token(lexer.REGEX, regex_str))
+ lexer.range('/', true)
+local regex = token(lexer.REGEX, regex_str)
+lex:add_rule('string', string + regex)
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('=~|!<>+-/*?&.,:;()[]{}')))
diff --git a/lexlua/gtkrc.lua b/lexlua/gtkrc.lua
index c500c033c..fbeaa86ba 100644
--- a/lexlua/gtkrc.lua
+++ b/lexlua/gtkrc.lua
@@ -35,18 +35,19 @@ lex:add_rule('function', token(lexer.FUNCTION, word_match[[
-- Identifiers.
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.alpha *
- (lexer.alnum + S('_-'))^0))
+ (lexer.alnum + S('_-'))^0))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) +
- lexer.delimited_range('"', true)))
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
-- Numbers.
lex:add_rule('number', token(lexer.NUMBER, lexer.digit^1 *
- ('.' * lexer.digit^1)^-1))
+ ('.' * lexer.digit^1)^-1))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S(':=,*()[]{}')))
diff --git a/lexlua/haskell.lua b/lexlua/haskell.lua
index 524773301..2b36473a3 100644
--- a/lexlua/haskell.lua
+++ b/lexlua/haskell.lua
@@ -22,22 +22,23 @@ local op = lexer.punct - S('()[]{}')
-- Types & type constructors.
lex:add_rule('type', token(lexer.TYPE, (lexer.upper * word) +
- (":" * (op^1 - ":"))))
+ (":" * (op^1 - ":"))))
-- Identifiers.
lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alpha + '_') * word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) +
- lexer.delimited_range('"')))
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Comments.
-local line_comment = '--' * lexer.nonnewline_esc^0
-local block_comment = '{-' * (lexer.any - '-}')^0 * P('-}')^-1
+local line_comment = lexer.to_eol('--', true)
+local block_comment = lexer.range('{-', '-}')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, op))
diff --git a/lexlua/html.lua b/lexlua/html.lua
index b77d8453c..465a828b6 100644
--- a/lexlua/html.lua
+++ b/lexlua/html.lua
@@ -12,12 +12,11 @@ local ws = token(lexer.WHITESPACE, lexer.space^1)
lex:add_rule('whitespace', ws)
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '<!--' * (lexer.any - '-->')^0 *
- P('-->')^-1))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.range('<!--', '-->')))
-- Doctype.
-lex:add_rule('doctype', token('doctype', '<!' * word_match([[doctype]], true) *
- (lexer.any - '>')^1 * '>'))
+lex:add_rule('doctype', token('doctype',
+ lexer.range('<!' * word_match([[doctype]], true), '>')))
lex:add_style('doctype', lexer.STYLE_COMMENT)
-- Elements.
@@ -37,12 +36,12 @@ local paired_element = token('element', '<' * P('/')^-1 * word_match([[
]], true))
local known_element = single_element + paired_element
local unknown_element = token('unknown_element', '<' * P('/')^-1 *
- (lexer.alnum + '-')^1)
+ (lexer.alnum + '-')^1)
local element = known_element + unknown_element
lex:add_rule('element', element)
lex:add_style('single_element', lexer.STYLE_KEYWORD)
lex:add_style('element', lexer.STYLE_KEYWORD)
-lex:add_style('unknown_element', lexer.STYLE_KEYWORD..',italics')
+lex:add_style('unknown_element', lexer.STYLE_KEYWORD .. ',italics')
-- Closing tags.
local tag_close = token('element', P('/')^-1 * '>')
@@ -66,7 +65,7 @@ local unknown_attribute = token('unknown_attribute', (lexer.alnum + '-')^1)
local attribute = (known_attribute + unknown_attribute) * #(lexer.space^0 * '=')
lex:add_rule('attribute', attribute)
lex:add_style('attribute', lexer.STYLE_TYPE)
-lex:add_style('unknown_attribute', lexer.STYLE_TYPE..',italics')
+lex:add_style('unknown_attribute', lexer.STYLE_TYPE .. ',italics')
-- TODO: performance is terrible on large files.
local in_tag = P(function(input, index)
@@ -83,17 +82,16 @@ local equals = token(lexer.OPERATOR, '=') --* in_tag
-- Strings.
local string = #S('\'"') * lexer.last_char_includes('=') *
- token(lexer.STRING, lexer.delimited_range("'") +
- lexer.delimited_range('"'))
+ token(lexer.STRING, lexer.range("'") + lexer.range('"'))
lex:add_rule('string', string)
-- Numbers.
lex:add_rule('number', #lexer.digit * lexer.last_char_includes('=') *
- token(lexer.NUMBER, lexer.digit^1 * P('%')^-1))--*in_tag)
+ token(lexer.NUMBER, lexer.digit^1 * P('%')^-1))--*in_tag)
-- Entities.
lex:add_rule('entity', token('entity', '&' * (lexer.any - lexer.space - ';')^1 *
- ';'))
+ ';'))
lex:add_style('entity', lexer.STYLE_COMMENT)
-- Fold points.
@@ -113,15 +111,13 @@ lex:add_fold_point(lexer.COMMENT, '<!--', '-->')
-- Tags that start embedded languages.
-- Export these patterns for proxy lexers (e.g. ASP) that need them.
lex.embed_start_tag = element *
- (ws * attribute * ws^-1 * equals * ws^-1 * string)^0 *
- ws^-1 * tag_close
+ (ws * attribute * ws^-1 * equals * ws^-1 * string)^0 * ws^-1 * tag_close
lex.embed_end_tag = element * tag_close
-- Embedded CSS (<style type="text/css"> ... </style>).
local css = lexer.load('css')
local style_element = word_match([[style]], true)
-local css_start_rule = #(P('<') * style_element *
- ('>' + P(function(input, index)
+local css_start_rule = #('<' * style_element * ('>' + P(function(input, index)
if input:find('^%s+type%s*=%s*(["\'])text/css%1', index) then
return index
end
@@ -132,8 +128,7 @@ lex:embed(css, css_start_rule, css_end_rule)
-- Embedded JavaScript (<script type="text/javascript"> ... </script>).
local js = lexer.load('javascript')
local script_element = word_match([[script]], true)
-local js_start_rule = #(P('<') * script_element *
- ('>' + P(function(input, index)
+local js_start_rule = #('<' * script_element * ('>' + P(function(input, index)
if input:find('^%s+type%s*=%s*(["\'])text/javascript%1', index) then
return index
end
@@ -142,13 +137,13 @@ local js_end_rule = #('</' * script_element * ws^-1 * '>') * lex.embed_end_tag
local js_line_comment = '//' * (lexer.nonnewline_esc - js_end_rule)^0
local js_block_comment = '/*' * (lexer.any - '*/' - js_end_rule)^0 * P('*/')^-1
js:modify_rule('comment', token(lexer.COMMENT, js_line_comment +
- js_block_comment))
+ js_block_comment))
lex:embed(js, js_start_rule, js_end_rule)
-- Embedded CoffeeScript (<script type="text/coffeescript"> ... </script>).
local cs = lexer.load('coffeescript')
local script_element = word_match([[script]], true)
-local cs_start_rule = #(P('<') * script_element * P(function(input, index)
+local cs_start_rule = #('<' * script_element * P(function(input, index)
if input:find('^[^>]+type%s*=%s*(["\'])text/coffeescript%1', index) then
return index
end
diff --git a/lexlua/icon.lua b/lexlua/icon.lua
index 08a4e3f62..a850f03a1 100644
--- a/lexlua/icon.lua
+++ b/lexlua/icon.lua
@@ -32,16 +32,16 @@ lex:add_style('special_keyword', lexer.STYLE_TYPE)
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") +
- lexer.delimited_range('"')))
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline_esc^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#', true)))
-- Numbers.
local radix_literal = P('-')^-1 * lexer.dec_num * S('rR') * lexer.alnum^1
-lex:add_rule('number', token(lexer.NUMBER, radix_literal + lexer.float +
- lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, radix_literal + lexer.number))
-- Preprocessor.
local preproc_word = word_match[[
diff --git a/lexlua/idl.lua b/lexlua/idl.lua
index f28652a37..83fb65a8c 100644
--- a/lexlua/idl.lua
+++ b/lexlua/idl.lua
@@ -27,23 +27,24 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) +
- lexer.delimited_range('"', true)))
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Comments.
-local line_comment = '//' * lexer.nonnewline_esc^0
-local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Preprocessor.
local preproc_word = word_match[[
define undef ifdef ifndef if elif else endif include warning pragma
]]
lex:add_rule('preproc', token(lexer.PREPROCESSOR, lexer.starts_line('#') *
- preproc_word))
+ preproc_word))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('!<>=+-/*%&|^~.,:;?()[]{}')))
diff --git a/lexlua/inform.lua b/lexlua/inform.lua
index 1ac6dcd8b..2b6193a15 100644
--- a/lexlua/inform.lua
+++ b/lexlua/inform.lua
@@ -54,17 +54,18 @@ lex:add_style('action', lexer.STYLE_VARIABLE)
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") +
- lexer.delimited_range('"')))
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '!' * lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('!')))
-- Numbers.
local inform_hex = '$' * lexer.xdigit^1
local inform_bin = '$$' * S('01')^1
lex:add_rule('number', token(lexer.NUMBER, lexer.integer + inform_hex +
- inform_bin))
+ inform_bin))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('@~=+-*/%^#=<>;:,.{}[]()&|?')))
diff --git a/lexlua/ini.lua b/lexlua/ini.lua
index d5445ba12..d56539d42 100644
--- a/lexlua/ini.lua
+++ b/lexlua/ini.lua
@@ -17,19 +17,19 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
-- Identifiers.
lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alpha + '_') *
- (lexer.alnum + S('_.'))^0))
+ (lexer.alnum + S('_.'))^0))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") +
- lexer.delimited_range('"')))
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Labels.
-lex:add_rule('label', token(lexer.LABEL,
- lexer.delimited_range('[]', true, true)))
+lex:add_rule('label', token(lexer.LABEL, lexer.range('[', ']', true)))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, lexer.starts_line(S(';#')) *
- lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT,
+ lexer.to_eol(lexer.starts_line(S(';#')))))
-- Numbers.
local dec = lexer.digit^1 * ('_' * lexer.digit^1)^0
diff --git a/lexlua/io_lang.lua b/lexlua/io_lang.lua
index 44143dc6f..573bd7ca8 100644
--- a/lexlua/io_lang.lua
+++ b/lexlua/io_lang.lua
@@ -26,22 +26,22 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-local sq_str = lexer.delimited_range("'")
-local dq_str = lexer.delimited_range('"')
-local tq_str = '"""' * (lexer.any - '"""')^0 * P('"""')^-1
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+local tq_str = lexer.range('"""')
lex:add_rule('string', token(lexer.STRING, tq_str + sq_str + dq_str))
-- Comments.
-local line_comment = (P('#') + '//') * lexer.nonnewline^0
-local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1
+local line_comment = lexer.to_eol(P('#') + '//')
+local block_comment = lexer.range('/*', '*/')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR,
- S('`~@$%^&*-+/=\\<>?.,:;()[]{}')))
+ S('`~@$%^&*-+/=\\<>?.,:;()[]{}')))
-- Fold points.
lex:add_fold_point(lexer.OPERATOR, '(', ')')
diff --git a/lexlua/java.lua b/lexlua/java.lua
index cefac563f..ad83c5745 100644
--- a/lexlua/java.lua
+++ b/lexlua/java.lua
@@ -14,7 +14,7 @@ lex:add_rule('whitespace', ws)
-- Classes.
lex:add_rule('classdef', token(lexer.KEYWORD, P('class')) * ws *
- token(lexer.CLASS, lexer.word))
+ token(lexer.CLASS, lexer.word))
-- Keywords.
lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
@@ -39,17 +39,17 @@ lex:add_rule('function', token(lexer.FUNCTION, lexer.word) * #P('('))
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) +
- lexer.delimited_range('"', true)))
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Comments.
-local line_comment = '//' * lexer.nonnewline_esc^0
-local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) *
- S('LlFfDd')^-1))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('LlFfDd')^-1))
-- Annotations.
lex:add_rule('annotation', token('annotation', '@' * lexer.word))
diff --git a/lexlua/javascript.lua b/lexlua/javascript.lua
index b8bbfd242..a2733a1cb 100644
--- a/lexlua/javascript.lua
+++ b/lexlua/javascript.lua
@@ -24,20 +24,22 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Comments.
-local line_comment = '//' * lexer.nonnewline_esc^0
-local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Strings.
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+local bq_str = lexer.range('`')
+local string = token(lexer.STRING, sq_str + dq_str + bq_str)
local regex_str = #P('/') * lexer.last_char_includes('+-*%^!=&|?:;,([{<>') *
- lexer.delimited_range('/', true) * S('igm')^0
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") +
- lexer.delimited_range('"') +
- lexer.delimited_range('`')) +
- token(lexer.REGEX, regex_str))
+ lexer.range('/', true) * S('igm')^0
+local regex = token(lexer.REGEX, regex_str)
+lex:add_rule('string', string + regex)
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%^!=&|?:;,.()[]{}<>')))
diff --git a/lexlua/json.lua b/lexlua/json.lua
index 787fc99a5..c6feb3a60 100644
--- a/lexlua/json.lua
+++ b/lexlua/json.lua
@@ -12,15 +12,16 @@ local lex = lexer.new('json')
lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) +
- lexer.delimited_range('"', true)))
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Keywords.
lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[true false null]]))
-- Comments.
-local line_comment = '//' * lexer.nonnewline_esc^0
-local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
diff --git a/lexlua/latex.lua b/lexlua/latex.lua
index c4df8e641..bb2f70823 100644
--- a/lexlua/latex.lua
+++ b/lexlua/latex.lua
@@ -13,10 +13,9 @@ local lex = lexer.new('latex')
lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Comments.
-local line_comment = '%' * lexer.nonnewline^0
-local block_comment = '\\begin' * P(' ')^0 * '{comment}' *
- (lexer.any - '\\end' * P(' ')^0 * '{comment}')^0 *
- P('\\end' * P(' ')^0 * '{comment}')^-1
+local line_comment = lexer.to_eol('%')
+local block_comment = lexer.range('\\begin' * P(' ')^0 * '{comment}',
+ '\\end' * P(' ')^0 * '{comment}')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Math environments.
@@ -24,15 +23,13 @@ local math_word = word_match[[
align displaymath eqnarray equation gather math multline
]]
local math_begin_end = (P('begin') + P('end')) * P(' ')^0 *
- '{' * math_word * P('*')^-1 * '}'
+ '{' * math_word * P('*')^-1 * '}'
lex:add_rule('math', token('math', '$' + '\\' * (S('[]()') + math_begin_end)))
lex:add_style('math', lexer.STYLE_FUNCTION)
-- LaTeX environments.
lex:add_rule('environment', token('environment', '\\' *
- (P('begin') + P('end')) *
- P(' ')^0 * '{' * lexer.word *
- P('*')^-1 * '}'))
+ (P('begin') + P('end')) * P(' ')^0 * '{' * lexer.word * P('*')^-1 * '}'))
lex:add_style('environment', lexer.STYLE_KEYWORD)
-- Sections.
@@ -43,7 +40,7 @@ lex:add_style('section', lexer.STYLE_CLASS)
-- Commands.
lex:add_rule('command', token('command', '\\' *
- (lexer.alpha^1 + S('#$&~_^%{}'))))
+ (lexer.alpha^1 + S('#$&~_^%{}'))))
lex:add_style('command', lexer.STYLE_KEYWORD)
-- Operators.
diff --git a/lexlua/ledger.lua b/lexlua/ledger.lua
index 01e270d1d..2daaab46a 100644
--- a/lexlua/ledger.lua
+++ b/lexlua/ledger.lua
@@ -10,32 +10,31 @@ local lex = lexer.new('ledger', {lex_by_line = true})
local delim = P('\t') + P(' ')
-- Account.
-lex:add_rule('account', token(lexer.VARIABLE,
- lexer.starts_line(S(' \t')^1 *
- (lexer.print - delim)^1)))
+lex:add_rule('account', token(lexer.VARIABLE, lexer.starts_line(S(' \t')^1 *
+ (lexer.print - delim)^1)))
-- Amount.
lex:add_rule('amount', token(lexer.NUMBER, delim * (1 - S(';\r\n'))^1))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, S(';#') * lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol(S(';#'))))
-- Whitespace.
lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Strings.
-local sq_str = lexer.delimited_range("'")
-local dq_str = lexer.delimited_range('"')
-local label = lexer.delimited_range('[]', true, true)
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+local label = lexer.range('[', ']', true)
lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + label))
-- Date.
lex:add_rule('date', token(lexer.CONSTANT,
- lexer.starts_line((lexer.digit + S('/-'))^1)))
+ lexer.starts_line((lexer.digit + S('/-'))^1)))
-- Automated transactions.
lex:add_rule('auto_tx', token(lexer.PREPROCESSOR,
- lexer.starts_line(S('=~') * lexer.nonnewline^0)))
+ lexer.to_eol(lexer.starts_line(S('=~')))))
-- Directives.
local directive_word = word_match[[
@@ -43,6 +42,6 @@ local directive_word = word_match[[
endfixed include payee apply tag test year
]] + S('AYNDCIiOobh')
lex:add_rule('directive', token(lexer.KEYWORD,
- lexer.starts_line(S('!@')^-1 * directive_word)))
+ lexer.starts_line(S('!@')^-1 * directive_word)))
return lex
diff --git a/lexlua/less.lua b/lexlua/less.lua
index 5534369c6..ee8fc8896 100644
--- a/lexlua/less.lua
+++ b/lexlua/less.lua
@@ -9,11 +9,11 @@ local S = lpeg.S
local lex = lexer.new('less', {inherit = lexer.load('css')})
-- Line comments.
-lex:add_rule('line_comment', token(lexer.COMMENT, '//' * lexer.nonnewline^0))
+lex:add_rule('line_comment', token(lexer.COMMENT, lexer.to_eol('//')))
-- Variables.
lex:add_rule('variable', token(lexer.VARIABLE, '@' *
- (lexer.alnum + S('_-{}'))^1))
+ (lexer.alnum + S('_-{}'))^1))
-- Fold points.
lex:add_fold_point(lexer.COMMENT, '//', lexer.fold_line_comments('//'))
diff --git a/lexlua/lexer.lua b/lexlua/lexer.lua
index 68183aa29..d133eb11d 100644
--- a/lexlua/lexer.lua
+++ b/lexlua/lexer.lua
@@ -139,10 +139,10 @@ local M = {}
-- [`lexer.punct`](), [`lexer.space`](), [`lexer.newline`](),
-- [`lexer.nonnewline`](), [`lexer.nonnewline_esc`](), [`lexer.dec_num`](),
-- [`lexer.hex_num`](), [`lexer.oct_num`](), [`lexer.integer`](),
--- [`lexer.float`](), and [`lexer.word`](). You may use your own token names if
--- none of the above fit your language, but an advantage to using predefined
--- token names is that your lexer's tokens will inherit the universal syntax
--- highlighting color theme used by your text editor.
+-- [`lexer.float`](), [`lexer.number`](), and [`lexer.word`](). You may use your
+-- own token names if none of the above fit your language, but an advantage to
+-- using predefined token names is that your lexer's tokens will inherit the
+-- universal syntax highlighting color theme used by your text editor.
--
-- ##### Example Tokens
--
@@ -185,9 +185,8 @@ local M = {}
--
-- Line-style comments with a prefix character(s) are easy to express with LPeg:
--
--- local shell_comment = token(lexer.COMMENT, '#' * lexer.nonnewline^0)
--- local c_line_comment = token(lexer.COMMENT,
--- '//' * lexer.nonnewline_esc^0)
+-- local shell_comment = token(lexer.COMMENT, lexer.to_eol('#'))
+-- local c_line_comment = token(lexer.COMMENT, lexer.to_eol('//', true))
--
-- The comments above start with a '#' or "//" and go to the end of the line.
-- The second comment recognizes the next line also as a comment if the current
@@ -196,8 +195,7 @@ local M = {}
-- C-style "block" comments with a start and end delimiter are also easy to
-- express:
--
--- local c_comment = token(lexer.COMMENT, '/*' * (lexer.any - '*/')^0 *
--- P('*/')^-1)
+-- local c_comment = token(lexer.COMMENT, lexer.range('/*', '*/'))
--
-- This comment starts with a "/\*" sequence and contains anything up to and
-- including an ending "\*/" sequence. The ending "\*/" is optional so the lexer
@@ -205,21 +203,13 @@ local M = {}
--
-- **Strings**
--
--- It is tempting to think that a string is not much different from the block
--- comment shown above in that both have start and end delimiters:
+-- Most programming languages allow escape sequences in strings such that a
+-- sequence like "\\&quot;" in a double-quoted string indicates that the
+-- '&quot;' is not the end of the string. [`lexer.range()`]() handles escapes
+-- inherently.
--
--- local dq_str = '"' * (lexer.any - '"')^0 * P('"')^-1
--- local sq_str = "'" * (lexer.any - "'")^0 * P("'")^-1
--- local simple_string = token(lexer.STRING, dq_str + sq_str)
---
--- However, most programming languages allow escape sequences in strings such
--- that a sequence like "\\&quot;" in a double-quoted string indicates that the
--- '&quot;' is not the end of the string. The above token incorrectly matches
--- such a string. Instead, use the [`lexer.delimited_range()`]() convenience
--- function.
---
--- local dq_str = lexer.delimited_range('"')
--- local sq_str = lexer.delimited_range("'")
+-- local dq_str = lexer.range('"')
+-- local sq_str = lexer.range("'")
-- local string = token(lexer.STRING, dq_str + sq_str)
--
-- In this case, the lexer treats '\' as an escape character in a string
@@ -228,9 +218,9 @@ local M = {}
-- **Numbers**
--
-- Most programming languages have the same format for integer and float tokens,
--- so it might be as simple as using a couple of predefined LPeg patterns:
+-- so it might be as simple as using a predefined LPeg pattern:
--
--- local number = token(lexer.NUMBER, lexer.float + lexer.integer)
+-- local number = token(lexer.NUMBER, lexer.number)
--
-- However, some languages allow postfix characters on integers.
--
@@ -714,9 +704,9 @@ local M = {}
-- lex:add_rule('custom', token('custom', P('quux')))
-- lex:add_style('custom', lexer.STYLE_KEYWORD .. ',bold')
-- lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
--- lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"')))
--- lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0))
--- lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+-- lex:add_rule('string', token(lexer.STRING, lexer.range('"')))
+-- lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
+-- lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- lex:add_rule('operator', token(lexer.OPERATOR, S('+-*/%^=<>,.()[]{}')))
--
-- lex:add_fold_point(lexer.OPERATOR, '{', '}')
@@ -769,7 +759,7 @@ local M = {}
-- #### Acknowledgements
--
-- Thanks to Peter Odding for his [lexer post][] on the Lua mailing list
--- that inspired me, and thanks to Roberto Ierusalimschy for LPeg.
+-- that provided inspiration, and thanks to Roberto Ierusalimschy for LPeg.
--
-- [lexer post]: http://lua-users.org/lists/lua-l/2007-04/msg00116.html
-- @field DEFAULT (string)
@@ -906,6 +896,9 @@ local M = {}
-- A pattern that matches either a decimal, hexadecimal, or octal number.
-- @field float (pattern)
-- A pattern that matches a floating point number.
+-- @field number (pattern)
+-- A pattern that matches a typical number, either a floating point, decimal,
+-- hexadecimal, or octal number.
-- @field word (pattern)
-- A pattern that matches a typical word. Words begin with a letter or
-- underscore and consist of alphanumeric and underscore characters.
@@ -965,7 +958,8 @@ local function searchpath(name, path)
local tried = {}
for part in path:gmatch('[^;]+') do
local filename = part:gsub('%?', name)
- if loadfile(filename) then return filename end
+ local ok, errmsg = loadfile(filename)
+ if ok or not errmsg:find('cannot open') then return filename end
tried[#tried + 1] = string.format("no file '%s'", filename)
end
return nil, table.concat(tried, '\n')
@@ -1605,6 +1599,7 @@ M.float = lpeg_S('+-')^-1 * (
(M.digit^0 * '.' * M.digit^1 + M.digit^1 * '.' * M.digit^0 * -lpeg_P('.')) *
(lpeg_S('eE') * lpeg_S('+-')^-1 * M.digit^1)^-1 +
(M.digit^1 * lpeg_S('eE') * lpeg_S('+-')^-1 * M.digit^1))
+M.number = M.float + M.integer
M.word = (M.alpha + '_') * (M.alnum + '_')^0
@@ -1625,6 +1620,69 @@ function M.token(name, patt)
end
---
+-- Creates and returns a pattern that matches from string or pattern *prefix*
+-- until the end of the line.
+-- *escape* indicates whether the end of the line can be escaped with a '\'
+-- character.
+-- @param prefix String or pattern prefix to start matching at.
+-- @param escape Optional flag indicating whether or not newlines can be escaped
+-- by a '\' character. The default value is `false`.
+-- @return pattern
+-- @usage local line_comment = lexer.to_eol('//')
+-- @usage local line_comment = lexer.to_eol(P('#') + ';')
+-- @name to_eol
+function M.to_eol(prefix, escape)
+ return prefix * (not escape and M.nonnewline or M.nonnewline_esc)^0
+end
+
+---
+-- Creates and returns a pattern that matches a range of text bounded by strings
+-- or patterns *s* and *e*.
+-- This is a convenience function for matching more complicated ranges like
+-- strings with escape characters, balanced parentheses, and block comments
+-- (nested or not). *e* is optional and defaults to *s*. *single_line* indicates
+-- whether or not the range must be on a single line; *escapes* indicates
+-- whether or not to allow '\' as an escape character; and *balanced* indicates
+-- whether or not to handle balanced ranges like parentheses, and requires *s*
+-- and *e* to be different.
+-- @param s String or pattern start of a range.
+-- @param e Optional string or pattern end of a range. The default value is *s*.
+-- @param single_line Optional flag indicating whether or not the range must be
+-- on a single line.
+-- @param escapes Optional flag indicating whether or not the range end may
+-- be escaped by a '\' character.
+-- The default value is `false` unless *s* and *e* are identical,
+-- single-character strings. In that case, the default value is `true`.
+-- @param balanced Optional flag indicating whether or not to match a balanced
+-- range, like the "%b" Lua pattern. This flag only applies if *s* and *e* are
+-- different.
+-- @return pattern
+-- @usage local dq_str_escapes = lexer.range('"')
+-- @usage local dq_str_noescapes = lexer.range('"', false, false)
+-- @usage local unbalanced_parens = lexer.range('(', ')')
+-- @usage local balanced_parens = lexer.range('(', ')', false, false, true)
+-- @name range
+function M.range(s, e, single_line, escapes, balanced)
+ if type(e) ~= 'string' and type(e) ~= 'userdata' then
+ e, single_line, escapes, balanced = s, e, single_line, escapes
+ end
+ local any = M.any - e
+ if single_line then any = any - '\n' end
+ if balanced then any = any - s end
+ if escapes == nil then
+ -- Only allow escapes by default for ranges with identical, single-character
+ -- string delimiters.
+ escapes = type(s) == 'string' and #s == 1 and s == e
+ end
+ if escapes then any = any - '\\' + '\\' * M.any end
+ if balanced and s ~= e then
+ return lpeg_P{s * (any + lpeg_V(1))^0 * lpeg_P(e)^-1}
+ else
+ return s * any^0 * lpeg_P(e)^-1
+ end
+end
+
+-- Deprecated function. Use `lexer.range()` instead.
-- Creates and returns a pattern that matches a range of text bounded by
-- *chars* characters.
-- This is a convenience function for matching more complicated delimited ranges
@@ -1647,9 +1705,10 @@ end
-- @usage local unbalanced_parens = lexer.delimited_range('()')
-- @usage local balanced_parens = lexer.delimited_range('()', false, false,
-- true)
--- @see nested_pair
+-- @see range
-- @name delimited_range
function M.delimited_range(chars, single_line, no_escape, balanced)
+ print("lexer.delimited_range() is deprecated, use lexer.range()")
local s = chars:sub(1, 1)
local e = #chars == 2 and chars:sub(2, 2) or s
local range
@@ -1692,7 +1751,7 @@ end
-- @param s String character set like one passed to `lpeg.S()`.
-- @return pattern
-- @usage local regex = lexer.last_char_includes('+-*!%^&|=,([{') *
--- lexer.delimited_range('/')
+-- lexer.range('/')
-- @name last_char_includes
function M.last_char_includes(s)
s = string.format('[%s]', s:gsub('[-%%%[]', '%%%1'))
@@ -1704,7 +1763,7 @@ function M.last_char_includes(s)
end)
end
----
+-- Deprecated function. Use `lexer.range()` instead.
-- Returns a pattern that matches a balanced range of text that starts with
-- string *start_chars* and ends with string *end_chars*.
-- With single-character delimiters, this function is identical to
@@ -1713,9 +1772,10 @@ end
-- @param end_chars The string ending a nested sequence.
-- @return pattern
-- @usage local nested_comment = lexer.nested_pair('/*', '*/')
--- @see delimited_range
+-- @see range
-- @name nested_pair
function M.nested_pair(start_chars, end_chars)
+ print("lexer.nested_pair() is deprecated, use lexer.range()")
local s, e = start_chars, lpeg_P(end_chars)^-1
return lpeg_P{s * (M.any - s - end_chars + lpeg_V(1))^0 * e}
end
diff --git a/lexlua/lilypond.lua b/lexlua/lilypond.lua
index a7775016a..645a64fbc 100644
--- a/lexlua/lilypond.lua
+++ b/lexlua/lilypond.lua
@@ -18,12 +18,11 @@ lex:add_rule('keyword', token(lexer.KEYWORD, '\\' * lexer.word))
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING,
- lexer.delimited_range('"', false, true)))
+lex:add_rule('string', token(lexer.STRING, lexer.range('"', false, false)))
-- Comments.
-- TODO: block comment.
-lex:add_rule('comment', token(lexer.COMMENT, '%' * lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('%')))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S("{}'~<>|")))
diff --git a/lexlua/lisp.lua b/lexlua/lisp.lua
index 374956b1a..88d6488f0 100644
--- a/lexlua/lisp.lua
+++ b/lexlua/lisp.lua
@@ -36,17 +36,16 @@ local word = lexer.alpha * (lexer.alnum + '_' + '-')^0
lex:add_rule('identifier', token(lexer.IDENTIFIER, word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, "'" * word +
- lexer.delimited_range('"')))
+lex:add_rule('string', token(lexer.STRING, "'" * word + lexer.range('"')))
-- Comments.
-local line_comment = ';' * lexer.nonnewline^0
-local block_comment = '#|' * (lexer.any - '|#')^0 * P('|#')^-1
+local line_comment = lexer.to_eol(';')
+local block_comment = lexer.range('#|', '|#')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
lex:add_rule('number', token(lexer.NUMBER, P('-')^-1 * lexer.digit^1 *
- (S('./') * lexer.digit^1)^-1))
+ (S('./') * lexer.digit^1)^-1))
-- Entities.
lex:add_rule('entity', token('entity', '&' * word))
diff --git a/lexlua/litcoffee.lua b/lexlua/litcoffee.lua
index 916b4e78f..499a788c7 100644
--- a/lexlua/litcoffee.lua
+++ b/lexlua/litcoffee.lua
@@ -17,6 +17,6 @@ lex:embed(coffeescript, coffee_start_rule, coffee_end_rule)
-- Use 'markdown_whitespace' instead of lexer.WHITESPACE since the latter would
-- expand to 'litcoffee_whitespace'.
lex:modify_rule('whitespace', token('markdown_whitespace', S(' \t')^1 +
- S('\r\n')^1))
+ S('\r\n')^1))
return lex
diff --git a/lexlua/logtalk.lua b/lexlua/logtalk.lua
index bc9a1d810..245af99fd 100644
--- a/lexlua/logtalk.lua
+++ b/lexlua/logtalk.lua
@@ -65,6 +65,6 @@ local operators = [[
as
]]
lex:modify_rule('operator', token(lexer.OPERATOR, word_match(operators)) +
- lex:get_rule('operator'))
+ lex:get_rule('operator'))
return lex
diff --git a/lexlua/lua.lua b/lexlua/lua.lua
index c648475ef..655f237f5 100644
--- a/lexlua/lua.lua
+++ b/lexlua/lua.lua
@@ -32,7 +32,7 @@ local deprecated_func = token('deprecated_function', word_match[[
getfenv loadstring module setfenv unpack
]])
lex:add_rule('function', func + deprecated_func)
-lex:add_style('deprecated_function', lexer.STYLE_FUNCTION..',italics')
+lex:add_style('deprecated_function', lexer.STYLE_FUNCTION .. ',italics')
-- Constants.
lex:add_rule('constant', token(lexer.CONSTANT, word_match[[
@@ -103,26 +103,28 @@ local deprecated_library = token('deprecated_library', word_match[[
]])
lex:add_rule('library', library + deprecated_library)
lex:add_style('library', lexer.STYLE_TYPE)
-lex:add_style('deprecated_library', lexer.STYLE_TYPE..',italics')
+lex:add_style('deprecated_library', lexer.STYLE_TYPE .. ',italics')
-- Identifiers.
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
local longstring = lpeg.Cmt('[' * lpeg.C(P('=')^0) * '[',
- function(input, index, eq)
- local _, e = input:find(']'..eq..']', index, true)
- return (e or #input) + 1
- end)
+ function(input, index, eq)
+ local _, e = input:find(']' .. eq .. ']', index, true)
+ return (e or #input) + 1
+ end)
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") +
- lexer.delimited_range('"')) +
- token('longstring', longstring))
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str) +
+ token('longstring', longstring))
lex:add_style('longstring', lexer.STYLE_STRING)
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '--' * (longstring +
- lexer.nonnewline^0)))
+local line_comment = lexer.to_eol('--')
+local block_comment = '--' * longstring
+lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment))
-- Numbers.
local lua_integer = P('-')^-1 * (lexer.hex_num + lexer.dec_num)
@@ -133,7 +135,7 @@ lex:add_rule('label', token(lexer.LABEL, '::' * lexer.word * '::'))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, '..' +
- S('+-*/%^#=<>&|~;:,.{}[]()')))
+ S('+-*/%^#=<>&|~;:,.{}[]()')))
-- Fold points.
local function fold_longcomment(text, pos, line, s, symbol)
diff --git a/lexlua/makefile.lua b/lexlua/makefile.lua
index e1f9fef99..ee8f0ac59 100644
--- a/lexlua/makefile.lua
+++ b/lexlua/makefile.lua
@@ -29,7 +29,7 @@ local special_target = token(lexer.CONSTANT, word_match[[
]])
local normal_target = token('target', (lexer.any - lexer.space - S(':#='))^1)
lex:add_rule('target', lexer.starts_line((special_target + normal_target) *
- ws^0 * #(':' * -P('='))))
+ ws^0 * #(':' * -P('='))))
lex:add_style('target', lexer.STYLE_LABEL)
-- Variables.
@@ -52,21 +52,22 @@ local implicit_var = word_match[[
DESTDIR MAKE MAKEFLAGS MAKEOVERRIDES MFLAGS
]] * #(ws^0 * assign)
local computed_var = token(lexer.OPERATOR, '$' * S('({')) *
- token(lexer.FUNCTION, word_match[[
- -- Functions for String Substitution and Analysis.
- subst patsubst strip findstring filter filter-out sort word wordlist words
- firstword lastword
- -- Functions for File Names.
- dir notdir suffix basename addsuffix addprefix join wildcard realpath abspath
- -- Functions for Conditionals.
- if or and
- -- Miscellaneous Functions.
- foreach call value eval origin flavor shell
- -- Functions That Control Make.
- error warning info
-]])
+ token(lexer.FUNCTION, word_match[[
+ -- Functions for String Substitution and Analysis.
+ subst patsubst strip findstring filter filter-out sort word wordlist words
+ firstword lastword
+ -- Functions for File Names.
+ dir notdir suffix basename addsuffix addprefix join wildcard realpath
+ abspath
+ -- Functions for Conditionals.
+ if or and
+ -- Miscellaneous Functions.
+ foreach call value eval origin flavor shell
+ -- Functions That Control Make.
+ error warning info
+ ]])
local variable = token(lexer.VARIABLE, expanded_var + auto_var + special_var +
- implicit_var) + computed_var
+ implicit_var) + computed_var
lex:add_rule('variable', variable)
-- Operators.
@@ -76,14 +77,14 @@ lex:add_rule('operator', token(lexer.OPERATOR, assign + S(':$(){}')))
lex:add_rule('identifier', token(lexer.IDENTIFIER, word_char^1))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
-- Embedded Bash.
local bash = lexer.load('bash')
bash:modify_rule('variable', token(lexer.VARIABLE, '$$' * word_char^1) +
- bash:get_rule('variable') + variable)
+ bash:get_rule('variable') + variable)
local bash_start_rule = token(lexer.WHITESPACE, P('\t')) +
- token(lexer.OPERATOR, P(';'))
+ token(lexer.OPERATOR, P(';'))
local bash_end_rule = token(lexer.WHITESPACE, P('\n'))
lex:embed(bash, bash_start_rule, bash_end_rule)
diff --git a/lexlua/man.lua b/lexlua/man.lua
index efe765480..3a875f6b9 100644
--- a/lexlua/man.lua
+++ b/lexlua/man.lua
@@ -11,19 +11,16 @@ local lex = lexer.new('man')
lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Markup.
-lex:add_rule('rule1', token(lexer.STRING, '.' * ('B' * P('R')^-1 +
- 'I' * P('PR')^-1) *
- lexer.nonnewline^0))
-lex:add_rule('rule2', token(lexer.NUMBER, '.' * S('ST') * 'H' *
- lexer.nonnewline^0))
+lex:add_rule('rule1', token(lexer.STRING, '.' *
+ lexer.to_eol('B' * P('R')^-1 + 'I' * P('PR')^-1)))
+lex:add_rule('rule2', token(lexer.NUMBER, lexer.to_eol('.' * S('ST') * 'H')))
lex:add_rule('rule3', token(lexer.KEYWORD, P('.br') + '.DS' + '.RS' + '.RE' +
- '.PD'))
+ '.PD'))
lex:add_rule('rule4', token(lexer.LABEL, '.' * (S('ST') * 'H' + '.TP')))
lex:add_rule('rule5', token(lexer.VARIABLE, '.B' * P('R')^-1 +
- '.I' * S('PR')^-1 +
- '.PP'))
+ '.I' * S('PR')^-1 + '.PP'))
lex:add_rule('rule6', token(lexer.TYPE, '\\f' * S('BIPR')))
lex:add_rule('rule7', token(lexer.PREPROCESSOR, lexer.starts_line('.') *
- lexer.alpha^1))
+ lexer.alpha^1))
return lex
diff --git a/lexlua/markdown.lua b/lexlua/markdown.lua
index e4bba29a0..cac5c8322 100644
--- a/lexlua/markdown.lua
+++ b/lexlua/markdown.lua
@@ -8,58 +8,40 @@ local P, R, S = lpeg.P, lpeg.R, lpeg.S
local lex = lexer.new('markdown')
-- Block elements.
-lex:add_rule('header',
- token('h6', lexer.starts_line('######') * lexer.nonnewline^0) +
- token('h5', lexer.starts_line('#####') * lexer.nonnewline^0) +
- token('h4', lexer.starts_line('####') * lexer.nonnewline^0) +
- token('h3', lexer.starts_line('###') * lexer.nonnewline^0) +
- token('h2', lexer.starts_line('##') * lexer.nonnewline^0) +
- token('h1', lexer.starts_line('#') * lexer.nonnewline^0))
-local font_size = lexer.property_int['fontsize'] > 0 and
- lexer.property_int['fontsize'] or 10
-local hstyle = 'fore:$(color.red)'
-lex:add_style('h6', hstyle)
-lex:add_style('h5', hstyle..',size:'..(font_size + 1))
-lex:add_style('h4', hstyle..',size:'..(font_size + 2))
-lex:add_style('h3', hstyle..',size:'..(font_size + 3))
-lex:add_style('h2', hstyle..',size:'..(font_size + 4))
-lex:add_style('h1', hstyle..',size:'..(font_size + 5))
-
-lex:add_rule('blockquote',
- token(lexer.STRING,
- lpeg.Cmt(lexer.starts_line(S(' \t')^0 * '>'),
- function(input, index)
- local _, e = input:find('\n[ \t]*\r?\n', index)
- return (e or #input) + 1
- end)))
-
-lex:add_rule('list', token('list', lexer.starts_line(S(' \t')^0 * (S('*+-') +
- R('09')^1 * '.')) *
- S(' \t')))
+local function h(n)
+ return token('h' .. n, lexer.to_eol(lexer.starts_line(string.rep('#', n))))
+end
+lex:add_rule('header', h(6) + h(5) + h(4) + h(3) + h(2) + h(1))
+local function add_header_style(n)
+ local font_size = lexer.property_int['fontsize'] > 0 and
+ lexer.property_int['fontsize'] or 10
+ lex:add_style('h' .. n, 'fore:$(color.red),size:' .. (font_size + (6 - n)))
+end
+for i = 1, 6 do add_header_style(i) end
+
+lex:add_rule('blockquote', token(lexer.STRING,
+ lpeg.Cmt(lexer.starts_line(S(' \t')^0 * '>'), function(input, index)
+ local _, e = input:find('\n[ \t]*\r?\n', index)
+ return (e or #input) + 1
+ end)))
+
+lex:add_rule('list', token('list',
+ lexer.starts_line(S(' \t')^0 * (S('*+-') + R('09')^1 * '.')) * S(' \t')))
lex:add_style('list', lexer.STYLE_CONSTANT)
-lex:add_rule('block_code',
- token('code', lexer.starts_line(P(' ')^4 + P('\t')) * -P('<') *
- lexer.nonnewline^0 * lexer.newline^-1) +
- token('code', lexer.starts_line(P('```')) * (lexer.any - '```')^0 *
- P('```')^-1))
-lex:add_rule('inline_code',
- token('code', P('``') * (lexer.any - '``')^0 * P('``')^-1 +
- lexer.delimited_range('`', false, true)))
-lex:add_style('code', lexer.STYLE_EMBEDDED..',eolfilled')
-
-lex:add_rule('hr',
- token('hr',
- lpeg.Cmt(lexer.starts_line(S(' \t')^0 * lpeg.C(S('*-_'))),
- function(input, index, c)
- local line = input:match('[^\r\n]*', index)
- line = line:gsub('[ \t]', '')
- if line:find('[^'..c..']') or #line < 2 then
- return nil
- end
- return (select(2, input:find('\r?\n', index)) or
- #input) + 1
- end)))
+local code_line = lexer.to_eol(lexer.starts_line(P(' ')^4 + '\t') * -P('<')) *
+ lexer.newline^-1
+local code_block = lexer.range(lexer.starts_line('```'), '```')
+local code_inline = lexer.range('``') + lexer.range('`', false, false)
+lex:add_rule('block_code', token('code', code_line + code_block + code_inline))
+lex:add_style('code', lexer.STYLE_EMBEDDED .. ',eolfilled')
+
+lex:add_rule('hr', token('hr', lpeg.Cmt(
+ lexer.starts_line(S(' \t')^0 * lpeg.C(S('*-_'))), function(input, index, c)
+ local line = input:match('[^\r\n]*', index):gsub('[ \t]', '')
+ if line:find('[^' .. c .. ']') or #line < 2 then return nil end
+ return (select(2, input:find('\r?\n', index)) or #input) + 1
+ end)))
lex:add_style('hr', 'back:$(color.black),eolfilled')
-- Whitespace.
@@ -69,23 +51,22 @@ lex:add_rule('whitespace', ws)
-- Span elements.
lex:add_rule('escape', token(lexer.DEFAULT, P('\\') * 1))
-lex:add_rule('link_label',
- token('link_label', lexer.delimited_range('[]') * ':') * ws *
- token('link_url', (lexer.any - lexer.space)^1) *
- (ws * token(lexer.STRING, lexer.delimited_range('"', false, true) +
- lexer.delimited_range("'", false, true) +
- lexer.delimited_range('()')))^-1)
+local ref_link_label = token('link_label', lexer.range('[', ']', true) * ':')
+local ref_link_url = token('link_url', (lexer.any - lexer.space)^1)
+local ref_link_title = token(lexer.STRING, lexer.range('"', true, false) +
+ lexer.range("'", true, false) + lexer.range('(', ')', true))
+lex:add_rule('link_label', ref_link_label * ws * ref_link_url *
+ (ws * ref_link_title)^-1)
lex:add_style('link_label', lexer.STYLE_LABEL)
lex:add_style('link_url', 'underlined')
-lex:add_rule('link',
- token('link', P('!')^-1 * lexer.delimited_range('[]') *
- (P('(') * (lexer.any - S(') \t'))^0 *
- (S(' \t')^1 *
- lexer.delimited_range('"', false, true))^-1 * ')' +
- S(' \t')^0 * lexer.delimited_range('[]')) +
- 'http' * P('s')^-1 * '://' *
- (lexer.any - lexer.space)^1))
+local link_label = P('!')^-1 * lexer.range('[', ']', true)
+local link_target = P('(') * (lexer.any - S(') \t'))^0 *
+ (S(' \t')^1 * lexer.range('"', false, false))^-1 * ')'
+local link_ref = S(' \t')^0 * lexer.range('[', ']', true)
+local link_url = 'http' * P('s')^-1 * '://' * (lexer.any - lexer.space)^1
+lex:add_rule('link', token('link', link_label * (link_target + link_ref) +
+ link_url))
lex:add_style('link', 'underlined')
local punct_space = lexer.punct + lexer.space
@@ -96,29 +77,27 @@ local punct_space = lexer.punct + lexer.space
local function flanked_range(s, not_inword)
local fl_char = lexer.any - s - lexer.space
local left_fl = lpeg.B(punct_space - s) * s * #fl_char +
- s * #(fl_char - lexer.punct)
+ s * #(fl_char - lexer.punct)
local right_fl = lpeg.B(lexer.punct) * s * #(punct_space - s) +
- lpeg.B(fl_char) * s
+ lpeg.B(fl_char) * s
return left_fl * (lexer.any - (not_inword and s * #punct_space or s))^0 *
- right_fl
+ right_fl
end
-lex:add_rule('strong',
- token('strong', flanked_range('**') +
- (lpeg.B(punct_space) + #lexer.starts_line('_')) *
- flanked_range('__', true) * #(punct_space + -1)))
+lex:add_rule('strong', token('strong', flanked_range('**') +
+ (lpeg.B(punct_space) + #lexer.starts_line('_')) * flanked_range('__', true) *
+ #(punct_space + -1)))
lex:add_style('strong', 'bold')
-lex:add_rule('em',
- token('em', flanked_range('*') +
- (lpeg.B(punct_space) + #lexer.starts_line('_')) *
- flanked_range('_', true) * #(punct_space + -1)))
+lex:add_rule('em', token('em', flanked_range('*') +
+ (lpeg.B(punct_space) + #lexer.starts_line('_')) * flanked_range('_', true) *
+ #(punct_space + -1)))
lex:add_style('em', 'italics')
-- Embedded HTML.
local html = lexer.load('html')
local start_rule = lexer.starts_line(S(' \t')^0) * #P('<') *
- html:get_rule('element')
+ html:get_rule('element')
local end_rule = token(lexer.DEFAULT, P('\n')) -- TODO: lexer.WHITESPACE errors
lex:embed(html, start_rule, end_rule)
diff --git a/lexlua/matlab.lua b/lexlua/matlab.lua
index d371ebc96..7800a421c 100644
--- a/lexlua/matlab.lua
+++ b/lexlua/matlab.lua
@@ -54,23 +54,22 @@ lex:add_rule('variable', token(lexer.VARIABLE, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) +
- lexer.delimited_range('"') +
- lexer.delimited_range('`')))
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"')
+local bq_str = lexer.range('`')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + bq_str))
-- Comments.
-local line_comment = (P('%') + '#') * lexer.nonnewline^0
-local block_comment = '%{' * (lexer.any - '%}')^0 * P('%}')^-1
+local line_comment = lexer.to_eol(P('%') + '#')
+local block_comment = lexer.range('%{', '%}')
lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer +
- lexer.dec_num + lexer.hex_num +
- lexer.oct_num))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR,
- S('!%^&*()[]{}-=+/\\|:;.,?<>~`´')))
+ S('!%^&*()[]{}-=+/\\|:;.,?<>~`´')))
-- Fold points.
lex:add_fold_point(lexer.KEYWORD, 'if', 'end')
diff --git a/lexlua/mediawiki.lua b/lexlua/mediawiki.lua
index 6a8a3a704..27a7409d8 100644
--- a/lexlua/mediawiki.lua
+++ b/lexlua/mediawiki.lua
@@ -9,18 +9,15 @@ local P, R, S, B = lpeg.P, lpeg.R, lpeg.S, lpeg.B
local lex = lexer.new('mediawiki')
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '<!--' * (lexer.any - '-->')^0 *
- P('-->')^-1))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.range('<!--', '-->')))
-- HTML-like tags
local tag_start = token('tag_start', '<' * P('/')^-1 * lexer.alnum^1 *
- lexer.space^0)
+ lexer.space^0)
local tag_attr = token('tag_attr', lexer.alpha^1 * lexer.space^0 *
- ('=' * lexer.space^0 *
- ('"' * ((lexer.any - S('>"\\')) +
- ('\\' * lexer.any))^0 * '"' +
- (lexer.any - lexer.space - '>')^0)^-1)^0 *
- lexer.space^0)
+ ('=' * lexer.space^0 *
+ ('"' * ((lexer.any - S('>"\\')) + ('\\' * lexer.any))^0 * '"' +
+ (lexer.any - lexer.space - '>')^0)^-1)^0 * lexer.space^0)
local tag_end = token('tag_end', P('/')^-1 * '>')
lex:add_rule('tag', tag_start * tag_attr^0 * tag_end)
lex:add_style('tag_start', lexer.STYLE_KEYWORD)
@@ -30,18 +27,17 @@ lex:add_style('tag_end', lexer.STYLE_KEYWORD)
-- Link
lex:add_rule('link', token(lexer.STRING, S('[]')))
lex:add_rule('internal_link', B('[[') *
- token('link_article', (lexer.any - '|' - ']]')^1))
-lex:add_style('link_article', lexer.STYLE_STRING..',underlined')
+ token('link_article', (lexer.any - '|' - ']]')^1))
+lex:add_style('link_article', lexer.STYLE_STRING .. ',underlined')
-- Templates and parser functions.
lex:add_rule('template', token(lexer.OPERATOR, S('{}')))
lex:add_rule('parser_func', B('{{') *
- token('parser_func', P('#') * lexer.alpha^1 +
- lexer.upper^1 * ':'))
+ token('parser_func', P('#') * lexer.alpha^1 + lexer.upper^1 * ':'))
lex:add_rule('template_name', B('{{') *
- token('template_name', (lexer.any - S('{}|'))^1))
+ token('template_name', (lexer.any - S('{}|'))^1))
lex:add_style('parser_func', lexer.STYLE_FUNCTION)
-lex:add_style('template_name', lexer.STYLE_OPERATOR..',underlined')
+lex:add_style('template_name', lexer.STYLE_OPERATOR .. ',underlined')
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('-=|#~!')))
@@ -49,10 +45,9 @@ lex:add_rule('operator', token(lexer.OPERATOR, S('-=|#~!')))
-- Behavior switches
local start_pat = P(function(_, pos) return pos == 1 end)
lex:add_rule('behavior_switch', (B(lexer.space) + start_pat) *
- token('behavior_switch',
- '__' * (P('TOC') + 'FORCETOC' + 'NOTOC' +
- 'NOEDITSECTION' + 'NOCC' +
- 'NOINDEX') * '__') * #lexer.space)
+ token('behavior_switch', '__' *
+ (P('TOC') + 'FORCETOC' + 'NOTOC' + 'NOEDITSECTION' + 'NOCC' + 'NOINDEX') *
+ '__') * #lexer.space)
lex:add_style('behavior_switch', lexer.STYLE_KEYWORD)
return lex
diff --git a/lexlua/moonscript.lua b/lexlua/moonscript.lua
index 933130b74..d57cd55a9 100644
--- a/lexlua/moonscript.lua
+++ b/lexlua/moonscript.lua
@@ -110,24 +110,25 @@ lex:add_style('proper_ident', lexer.STYLE_CLASS)
lex:add_style('tbl_key', lexer.STYLE_REGEX)
local longstring = lpeg.Cmt('[' * lpeg.C(P('=')^0) * '[',
- function(input, index, eq)
- local _, e = input:find(']'..eq..']', index, true)
- return (e or #input) + 1
- end)
+ function(input, index, eq)
+ local _, e = input:find(']' .. eq .. ']', index, true)
+ return (e or #input) + 1
+ end)
-- Strings.
-local sq_str = lexer.delimited_range("'", false, true)
-local dq_str = lexer.delimited_range('"', false, true)
+local sq_str = lexer.range("'", false, false)
+local dq_str = lexer.range('"', false, false)
lex:add_rule('string', token(lexer.STRING, sq_str + dq_str) +
- token('longstring', longstring))
+ token('longstring', longstring))
lex:add_style('longstring', lexer.STYLE_STRING)
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '--' * (longstring +
- lexer.nonnewline^0)))
+local line_comment = lexer.to_eol('--')
+local block_comment = '--' * longstring
+lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Function definition.
lex:add_rule('fndef', token('fndef', P('->') + '=>'))
diff --git a/lexlua/myrddin.lua b/lexlua/myrddin.lua
index e836a0b24..1277468c4 100644
--- a/lexlua/myrddin.lua
+++ b/lexlua/myrddin.lua
@@ -26,17 +26,14 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Comments.
-local line_comment = '//' * lexer.nonnewline_esc^0
-local block_comment = P{
- V'part' * P'*/'^-1,
- part = '/*' * (V'full' + (lexer.any - '/*' - '*/'))^0,
- full = V'part' * '*/',
-}
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/', false, false, true)
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) +
- lexer.delimited_range('"', true)))
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Numbers.
local digit = lexer.digit + '_'
@@ -44,8 +41,9 @@ local bdigit = R'01' + '_'
local xdigit = lexer.xdigit + '_'
local odigit = R'07' + '_'
local integer = '0x' * xdigit^1 + '0o' * odigit^1 + '0b' * bdigit^1 + digit^1
-local float = digit^1 * (('.' * digit^1) * (S'eE' * S'+-'^-1 * digit^1)^-1 +
- ('.' * digit^1)^-1 * S'eE' * S'+-'^-1 * digit^1)
+local float = digit^1 * (
+ ('.' * digit^1) * (S'eE' * S'+-'^-1 * digit^1)^-1 +
+ ('.' * digit^1)^-1 * S'eE' * S'+-'^-1 * digit^1)
lex:add_rule('number', token(lexer.NUMBER, float + integer))
-- Operators.
diff --git a/lexlua/nemerle.lua b/lexlua/nemerle.lua
index f12cae300..196d0f028 100644
--- a/lexlua/nemerle.lua
+++ b/lexlua/nemerle.lua
@@ -28,20 +28,20 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[
]]))
-- Strings.
-local sq_str = P('L')^-1 * lexer.delimited_range("'", true)
-local dq_str = P('L')^-1 * lexer.delimited_range('"', true)
+local sq_str = P('L')^-1 * lexer.range("'", true)
+local dq_str = P('L')^-1 * lexer.range('"', true)
lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Identifiers.
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Comments.
-local line_comment = '//' * lexer.nonnewline_esc^0
-local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Preprocessor.
local preproc_word = word_match[[
@@ -49,7 +49,7 @@ local preproc_word = word_match[[
undef using warning
]]
lex:add_rule('preproc', token(lexer.PREPROCESSOR, lexer.starts_line('#') *
- S('\t ')^0 * preproc_word))
+ S('\t ')^0 * preproc_word))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}')))
diff --git a/lexlua/nim.lua b/lexlua/nim.lua
index 203ed3628..8840aa8fc 100644
--- a/lexlua/nim.lua
+++ b/lexlua/nim.lua
@@ -69,18 +69,17 @@ lex:add_rule('constant', token(lexer.CONSTANT, word_match[[
]]))
-- Strings.
-local sq_str = lexer.delimited_range("'", true)
-local dq_str = lexer.delimited_range('"', true)
-local triple_dq_str = '"""' * (lexer.any - '"""')^0 * P('"""')^-1
-local raw_dq_str = 'r' * lexer.delimited_range('"', false, true)
-lex:add_rule('string', token(lexer.STRING, triple_dq_str + sq_str + dq_str +
- raw_dq_str))
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+local tq_str = lexer.range('"""')
+local raw_str = 'r' * lexer.range('"', false, false)
+lex:add_rule('string', token(lexer.STRING, tq_str + sq_str + dq_str + raw_str))
-- Identifiers.
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline_esc^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#', true)))
-- Numbers.
local dec = lexer.digit^1 * ('_' * lexer.digit^1)^0
@@ -88,14 +87,14 @@ local hex = '0' * S('xX') * lexer.xdigit^1 * ('_' * lexer.xdigit^1)^0
local bin = '0' * S('bB') * S('01')^1 * ('_' * S('01')^1)^0
local oct = '0o' * R('07')^1
local integer = S('+-')^-1 * (bin + hex + oct + dec) *
- ("'" * S('iIuUfF') * (P('8') + '16' + '32' + '64'))^-1
+ ("'" * S('iIuUfF') * (P('8') + '16' + '32' + '64'))^-1
local float = lexer.digit^1 * ('_' * lexer.digit^1)^0 *
- ('.' * ('_' * lexer.digit)^0)^-1 * S('eE') * S('+-')^-1 *
- lexer.digit^1 * ('_' * lexer.digit^1)^0
+ ('.' * ('_' * lexer.digit)^0)^-1 * S('eE') * S('+-')^-1 * lexer.digit^1 *
+ ('_' * lexer.digit^1)^0
lex:add_rule('number', token(lexer.NUMBER, lexer.float + integer))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR,
- S('=+-*/<>@$~&%|!?^.:\\`()[]{},;')))
+ S('=+-*/<>@$~&%|!?^.:\\`()[]{},;')))
return lex
diff --git a/lexlua/nsis.lua b/lexlua/nsis.lua
index 466550bee..39be082d3 100644
--- a/lexlua/nsis.lua
+++ b/lexlua/nsis.lua
@@ -12,14 +12,15 @@ local lex = lexer.new('nsis')
lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Comments (4.1).
-local line_comment = (P(';') + '#') * lexer.nonnewline^0
-local block_comment = '/*' * (lexer.any - '*/')^0 * '*/'
+local line_comment = lexer.to_eol(P(';') + '#')
+local block_comment = lexer.range('/*', '*/')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") +
- lexer.delimited_range('"') +
- lexer.delimited_range('`')))
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+local bq_str = lexer.range('`')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + bq_str))
-- Constants (4.2.3).
lex:add_rule('constant', token(lexer.CONSTANT, word_match[[
diff --git a/lexlua/objective_c.lua b/lexlua/objective_c.lua
index 48aaaa1d3..348b2a9b8 100644
--- a/lexlua/objective_c.lua
+++ b/lexlua/objective_c.lua
@@ -32,20 +32,20 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[
]]))
-- Strings.
-local sq_str = P('L')^-1 * lexer.delimited_range("'", true)
-local dq_str = P('L')^-1 * lexer.delimited_range('"', true)
+local sq_str = P('L')^-1 * lexer.range("'", true)
+local dq_str = P('L')^-1 * lexer.range('"', true)
lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Identifiers.
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Comments.
-local line_comment = '//' * lexer.nonnewline_esc^0
-local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Preprocessor.
local preproc_word = word_match[[
@@ -53,8 +53,7 @@ local preproc_word = word_match[[
warning
]]
lex:add_rule('preprocessor', #lexer.starts_line('#') *
- token(lexer.PREPROCESSOR, '#' * S('\t ')^0 *
- preproc_word))
+ token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * preproc_word))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}')))
diff --git a/lexlua/pascal.lua b/lexlua/pascal.lua
index e5db67982..05cb3b5c0 100644
--- a/lexlua/pascal.lua
+++ b/lexlua/pascal.lua
@@ -38,23 +38,21 @@ lex:add_rule('type', token(lexer.TYPE, word_match([[
]], true)))
-- Strings.
-lex:add_rule('string', token(lexer.STRING,
- S('uUrR')^-1 *
- lexer.delimited_range("'", true, true)))
+lex:add_rule('string', token(lexer.STRING, S('uUrR')^-1 *
+ lexer.range("'", true, false)))
-- Identifiers.
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Comments.
-local line_comment = '//' * lexer.nonnewline_esc^0
-local bblock_comment = '{' * (lexer.any - '}')^0 * P('}')^-1
-local pblock_comment = '(*' * (lexer.any - '*)')^0 * P('*)')^-1
+local line_comment = lexer.to_eol('//', true)
+local bblock_comment = lexer.range('{', '}')
+local pblock_comment = lexer.range('(*', '*)')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + bblock_comment +
- pblock_comment))
+ pblock_comment))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) *
- S('LlDdFf')^-1))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('LlDdFf')^-1))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('.,;^@:=<>+-/*()[]')))
diff --git a/lexlua/perl.lua b/lexlua/perl.lua
index 6686dcaf0..819b2a1a3 100644
--- a/lexlua/perl.lua
+++ b/lexlua/perl.lua
@@ -21,7 +21,7 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
-- Markers.
lex:add_rule('marker', token(lexer.COMMENT, word_match[[__DATA__ __END__]] *
- lexer.any^0))
+ lexer.any^0))
-- Functions.
lex:add_rule('function', token(lexer.FUNCTION, word_match[[
@@ -46,22 +46,22 @@ lex:add_rule('function', token(lexer.FUNCTION, word_match[[
]]))
local delimiter_matches = {['('] = ')', ['['] = ']', ['{'] = '}', ['<'] = '>'}
-local literal_delimitted = P(function(input, index) -- for single delimiter sets
+local literal_delimited = P(function(input, index) -- for single delimiter sets
local delimiter = input:sub(index, index)
if not delimiter:find('%w') then -- only non alpha-numerics
local match_pos, patt
if delimiter_matches[delimiter] then
-- Handle nested delimiter/matches in strings.
local s, e = delimiter, delimiter_matches[delimiter]
- patt = lexer.delimited_range(s..e, false, false, true)
+ patt = lexer.range(s, e, false, true, true)
else
- patt = lexer.delimited_range(delimiter)
+ patt = lexer.range(delimiter)
end
match_pos = lpeg.match(patt, input, index)
return match_pos or #input + 1
end
end)
-local literal_delimitted2 = P(function(input, index) -- for 2 delimiter sets
+local literal_delimited2 = P(function(input, index) -- for 2 delimiter sets
local delimiter = input:sub(index, index)
-- Only consider non-alpha-numerics and non-spaces as delimiters. The
-- non-spaces are used to ignore operators like "-s".
@@ -70,9 +70,9 @@ local literal_delimitted2 = P(function(input, index) -- for 2 delimiter sets
if delimiter_matches[delimiter] then
-- Handle nested delimiter/matches in strings.
local s, e = delimiter, delimiter_matches[delimiter]
- patt = lexer.delimited_range(s..e, false, false, true)
+ patt = lexer.range(s, e, false, true, true)
else
- patt = lexer.delimited_range(delimiter)
+ patt = lexer.range(delimiter)
end
first_match_pos = lpeg.match(patt, input, index)
final_match_pos = lpeg.match(patt, input, first_match_pos - 1)
@@ -84,50 +84,47 @@ local literal_delimitted2 = P(function(input, index) -- for 2 delimiter sets
end)
-- Strings.
-local sq_str = lexer.delimited_range("'")
-local dq_str = lexer.delimited_range('"')
-local cmd_str = lexer.delimited_range('`')
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+local cmd_str = lexer.range('`')
local heredoc = '<<' * P(function(input, index)
local s, e, delimiter = input:find('([%a_][%w_]*)[\n\r\f;]+', index)
if s == index and delimiter then
local end_heredoc = '[\n\r\f]+'
- local _, e = input:find(end_heredoc..delimiter, e)
+ local _, e = input:find(end_heredoc .. delimiter, e)
return e and e + 1 or #input + 1
end
end)
-local lit_str = 'q' * P('q')^-1 * literal_delimitted
-local lit_array = 'qw' * literal_delimitted
-local lit_cmd = 'qx' * literal_delimitted
-local lit_tr = (P('tr') + 'y') * literal_delimitted2 * S('cds')^0
+local lit_str = 'q' * P('q')^-1 * literal_delimited
+local lit_array = 'qw' * literal_delimited
+local lit_cmd = 'qx' * literal_delimited
+local lit_tr = (P('tr') + 'y') * literal_delimited2 * S('cds')^0
+local string = token(lexer.STRING, sq_str + dq_str + cmd_str + heredoc +
+ lit_str + lit_array + lit_cmd + lit_tr)
local regex_str = #P('/') * lexer.last_char_includes('-<>+*!~\\=%&|^?:;([{') *
- lexer.delimited_range('/', true) * S('imosx')^0
-local lit_regex = 'qr' * literal_delimitted * S('imosx')^0
-local lit_match = 'm' * literal_delimitted * S('cgimosx')^0
-local lit_sub = 's' * literal_delimitted2 * S('ecgimosx')^0
-lex:add_rule('string',
- token(lexer.STRING, sq_str + dq_str + cmd_str + heredoc + lit_str +
- lit_array + lit_cmd + lit_tr) +
- token(lexer.REGEX, regex_str + lit_regex + lit_match + lit_sub))
+ lexer.range('/', true) * S('imosx')^0
+local lit_regex = 'qr' * literal_delimited * S('imosx')^0
+local lit_match = 'm' * literal_delimited * S('cgimosx')^0
+local lit_sub = 's' * literal_delimited2 * S('ecgimosx')^0
+local regex = token(lexer.REGEX, regex_str + lit_regex + lit_match + lit_sub)
+lex:add_rule('string', string + regex)
-- Identifiers.
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Comments.
-local line_comment = '#' * lexer.nonnewline_esc^0
-local block_comment = lexer.starts_line('=') * lexer.alpha *
- (lexer.any - lexer.newline * '=cut')^0 *
- (lexer.newline * '=cut')^-1
+local line_comment = lexer.to_eol('#', true)
+local block_comment = lexer.range(lexer.starts_line('=' * lexer.alpha),
+ lexer.starts_line('=cut'))
lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Variables.
-local special_var = '$' * ('^' * S('ADEFHILMOPSTWX')^-1 +
- S('\\"[]\'&`+*.,;=%~?@<>(|/!-') +
- ':' * (lexer.any - ':') +
- P('$') * -lexer.word +
- lexer.digit^1)
+local special_var = '$' * (
+ '^' * S('ADEFHILMOPSTWX')^-1 + S('\\"[]\'&`+*.,;=%~?@<>(|/!-') +
+ ':' * (lexer.any - ':') + P('$') * -lexer.word + lexer.digit^1)
local plain_var = ('$#' + S('$@%')) * P('$')^0 * lexer.word + '$#'
lex:add_rule('variable', token(lexer.VARIABLE, special_var + plain_var))
diff --git a/lexlua/php.lua b/lexlua/php.lua
index adf7ef432..257bc955d 100644
--- a/lexlua/php.lua
+++ b/lexlua/php.lua
@@ -22,7 +22,7 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
]]))
local word = (lexer.alpha + '_' + R('\127\255')) *
- (lexer.alnum + '_' + R('\127\255'))^0
+ (lexer.alnum + '_' + R('\127\255'))^0
-- Identifiers.
lex:add_rule('identifier', token(lexer.IDENTIFIER, word))
@@ -31,26 +31,26 @@ lex:add_rule('identifier', token(lexer.IDENTIFIER, word))
lex:add_rule('variable', token(lexer.VARIABLE, '$' * word))
-- Strings.
-local sq_str = lexer.delimited_range("'")
-local dq_str = lexer.delimited_range('"')
-local bt_str = lexer.delimited_range('`')
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+local bq_str = lexer.range('`')
local heredoc = '<<<' * P(function(input, index)
local _, e, delimiter = input:find('([%a_][%w_]*)[\n\r\f]+', index)
if delimiter then
- local _, e = input:find('[\n\r\f]+'..delimiter, e)
+ local _, e = input:find('[\n\r\f]+' .. delimiter, e)
return e and e + 1
end
end)
-lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + bt_str + heredoc))
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + bq_str + heredoc))
-- TODO: interpolated code.
-- Comments.
-local line_comment = (P('//') + '#') * lexer.nonnewline^0
-local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1
+local line_comment = lexer.to_eol(P('//') + '#')
+local block_comment = lexer.range('/*', '*/')
lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('!@%^*&()-+=|/?.,;:<>[]{}')))
diff --git a/lexlua/pico8.lua b/lexlua/pico8.lua
index 3e82aa9e1..c0061277c 100644
--- a/lexlua/pico8.lua
+++ b/lexlua/pico8.lua
@@ -20,7 +20,7 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Comments
-lex:add_rule('comment', token(lexer.COMMENT, '//' * lexer.nonnewline_esc^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('//', true)))
-- Numbers
lex:add_rule('number', token(lexer.NUMBER, lexer.integer))
diff --git a/lexlua/pike.lua b/lexlua/pike.lua
index c17d1b4b4..3dff044ac 100644
--- a/lexlua/pike.lua
+++ b/lexlua/pike.lua
@@ -29,21 +29,21 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) +
- lexer.delimited_range('"', true) +
- '#' * lexer.delimited_range('"')))
+local sq_str = lexer.range("'", true)
+local dq_str = P('#')^-1 * lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '//' * lexer.nonnewline_esc^0 +
- lexer.nested_pair('/*', '*/')))
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/', false, false, true)
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) *
- S('lLdDfF')^-1))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('lLdDfF')^-1))
-- Preprocessors.
-lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, lexer.starts_line('#') *
- lexer.nonnewline^0))
+lex:add_rule('preprocessor', token(lexer.PREPROCESSOR,
+ lexer.to_eol(lexer.starts_line('#'))))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('<>=!+-/*%&|^~@`.,:;()[]{}')))
diff --git a/lexlua/pkgbuild.lua b/lexlua/pkgbuild.lua
index 759cf55ec..7916487c3 100644
--- a/lexlua/pkgbuild.lua
+++ b/lexlua/pkgbuild.lua
@@ -11,24 +11,24 @@ local lex = lexer.new('pkgbuild')
lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
-- Strings.
-local sq_str = lexer.delimited_range("'", false, true)
-local dq_str = lexer.delimited_range('"')
-local ex_str = lexer.delimited_range('`')
+local sq_str = lexer.range("'", false, false)
+local dq_str = lexer.range('"')
+local ex_str = lexer.range('`')
local heredoc = '<<' * P(function(input, index)
- local s, e, _, delimiter =
- input:find('(["\']?)([%a_][%w_]*)%1[\n\r\f;]+', index)
+ local s, e, _, delimiter = input:find('(["\']?)([%a_][%w_]*)%1[\n\r\f;]+',
+ index)
if s == index and delimiter then
- local _, e = input:find('[\n\r\f]+'..delimiter, e)
+ local _, e = input:find('[\n\r\f]+' .. delimiter, e)
return e and e + 1 or #input + 1
end
end)
lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + ex_str + heredoc))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Keywords.
lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
@@ -59,14 +59,14 @@ lex:add_rule('constant', token(lexer.CONSTANT, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Variables.
-lex:add_rule('variable', token(lexer.VARIABLE,
- '$' * (S('!#?*@$') +
- lexer.delimited_range('()', true, true) +
- lexer.delimited_range('[]', true, true) +
- lexer.delimited_range('{}', true, true) +
- lexer.delimited_range('`', true, true) +
- lexer.digit^1 +
- lexer.word)))
+local symbol = S('!#?*@$')
+local parens = lexer.range('(', ')', true)
+local brackets = lexer.range('[', ']', true)
+local braces = lexer.range('{', '}', true)
+local backticks = lexer.range('`', true, false)
+local number = lexer.digit^1
+lex:add_rule('variable', token(lexer.VARIABLE, '$' *
+ (symbol + parens + brackets + braces + backticks + number + lexer.word)))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/*^~.,:;?()[]{}')))
diff --git a/lexlua/powershell.lua b/lexlua/powershell.lua
index b0ef56224..d69f8f328 100644
--- a/lexlua/powershell.lua
+++ b/lexlua/powershell.lua
@@ -12,7 +12,7 @@ local lex = lexer.new('powershell')
lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
-- Keywords.
lex:add_rule('keyword', token(lexer.KEYWORD, word_match([[
@@ -44,15 +44,14 @@ lex:add_rule('type', token(lexer.KEYWORD, '[' * word_match([[
]], true) * ']'))
-- Variables.
-lex:add_rule('variable', token(lexer.VARIABLE,
- '$' * (lexer.digit^1 + lexer.word +
- lexer.delimited_range('{}', true, true))))
+lex:add_rule('variable', token(lexer.VARIABLE, '$' *
+ (lexer.digit^1 + lexer.word + lexer.range('{', '}', true))))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"', true)))
+lex:add_rule('string', token(lexer.STRING, lexer.range('"', true)))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/*^&|~.,:;?()[]{}%`')))
diff --git a/lexlua/prolog.lua b/lexlua/prolog.lua
index 79a3fbe10..c65748311 100644
--- a/lexlua/prolog.lua
+++ b/lexlua/prolog.lua
@@ -31,7 +31,7 @@ local P, R, S, B, V, C = lpeg.P, lpeg.R, lpeg.S, lpeg.B, lpeg.V, lpeg.C
local lex = lexer.new('prolog')
local dialects = setmetatable({gprolog = 'gprolog', swipl = 'swipl'},
- {__index = function(_, _) return 'iso' end})
+ {__index = function(_, _) return 'iso' end})
local dialect = dialects[lexer.property['prolog.dialect']]
-- Directives.
@@ -71,10 +71,10 @@ directives.swipl = directives.iso .. [[
module multifile op reexport thread_local use_module volatile
]]
lex:add_rule('directive',
- token(lexer.WHITESPACE, lexer.starts_line(S(' \t'))^0) *
- token(lexer.OPERATOR, P':-') *
- token(lexer.WHITESPACE, S(' \t')^0) *
- token(lexer.PREPROCESSOR, P(word_match(directives[dialect]))))
+ token(lexer.WHITESPACE, lexer.starts_line(S(' \t'))^0) *
+ token(lexer.OPERATOR, P':-') *
+ token(lexer.WHITESPACE, S(' \t')^0) *
+ token(lexer.PREPROCESSOR, P(word_match(directives[dialect]))))
-- Whitespace.
lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
@@ -278,10 +278,9 @@ one_plus_arity_keywords.swipl = [[
set_random prolog_stack_property put_char unload_file nb_setval put_byte
current_signal put_code write_length string read_string text_to_string
]]
-lex:add_rule('keyword',
- token(lexer.KEYWORD, word_match(zero_arity_keywords[dialect]) +
- (word_match(one_plus_arity_keywords[dialect]) *
- #(P'('))))
+lex:add_rule('keyword', token(lexer.KEYWORD,
+ word_match(zero_arity_keywords[dialect]) +
+ (word_match(one_plus_arity_keywords[dialect]) * #(P'('))))
-- BIFs.
local bifs = {}
@@ -311,16 +310,15 @@ local decimal_group = S('+-')^-1 * (lexer.digit + '_')^1
local binary_number = '0b' * (S('01') + '_')^1
local character_code = '0\'' * S('\\')^-1 * (lexer.print - lexer.space)
local decimal_number = decimal_group * ('.' * decimal_group)^-1 *
- ('e' * decimal_group)^-1
+ ('e' * decimal_group)^-1
local hexadecimal_number = '0x' * (lexer.xdigit + '_')^1
local octal_number = '0o' * (S('01234567') + '_')^1
lex:add_rule('number', token(lexer.NUMBER, character_code + binary_number +
- hexadecimal_number + octal_number +
- decimal_number))
+ hexadecimal_number + octal_number + decimal_number))
-- Comments.
-local line_comment = '%' * lexer.nonnewline^0
-local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1
+local line_comment = lexer.to_eol('%')
+local block_comment = lexer.range('/*', '*/')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Operators.
@@ -338,18 +336,18 @@ operators.swipl = [[
initialization rem
]]
lex:add_rule('operator', token(lexer.OPERATOR, word_match(operators[dialect]) +
- S('-!+\\|=:;&<>()[]{}/*^@?.')))
+ S('-!+\\|=:;&<>()[]{}/*^@?.')))
-- Variables.
-lex:add_rule('variable',
- token(lexer.VARIABLE, (lexer.upper + '_') *
- (lexer.word^1 + lexer.digit^1 + P('_')^1)^0))
+lex:add_rule('variable', token(lexer.VARIABLE, (lexer.upper + '_') *
+ (lexer.word^1 + lexer.digit^1 + P('_')^1)^0))
-- Identifiers.
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) +
- lexer.delimited_range('"', true)))
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
return lex
diff --git a/lexlua/props.lua b/lexlua/props.lua
index 74281a8dd..ab0bde27b 100644
--- a/lexlua/props.lua
+++ b/lexlua/props.lua
@@ -12,22 +12,22 @@ lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Colors.
lex:add_rule('color', token('color', '#' * lexer.xdigit * lexer.xdigit *
- lexer.xdigit * lexer.xdigit *
- lexer.xdigit * lexer.xdigit))
+ lexer.xdigit * lexer.xdigit * lexer.xdigit * lexer.xdigit))
lex:add_style('color', lexer.STYLE_NUMBER)
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
-- Equals.
lex:add_rule('equals', token(lexer.OPERATOR, '='))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") +
- lexer.delimited_range('"')))
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Variables.
-lex:add_rule('variable', token(lexer.VARIABLE, '$(' * (lexer.any - ')')^1 *
- ')'))
+lex:add_rule('variable', token(lexer.VARIABLE, '$' *
+ lexer.range('(', ')', true)))
return lex
diff --git a/lexlua/protobuf.lua b/lexlua/protobuf.lua
index c90778407..71284dbdf 100644
--- a/lexlua/protobuf.lua
+++ b/lexlua/protobuf.lua
@@ -24,20 +24,20 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[
]]))
-- Strings.
-local sq_str = P('L')^-1 * lexer.delimited_range("'", true)
-local dq_str = P('L')^-1 * lexer.delimited_range('"', true)
+local sq_str = P('L')^-1 * lexer.range("'", true)
+local dq_str = P('L')^-1 * lexer.range('"', true)
lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Identifiers.
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Comments.
-local line_comment = '//' * lexer.nonnewline_esc^0
-local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('<>=|;,.()[]{}')))
diff --git a/lexlua/ps.lua b/lexlua/ps.lua
index 255a92339..8ebde45fd 100644
--- a/lexlua/ps.lua
+++ b/lexlua/ps.lua
@@ -28,15 +28,15 @@ local word = (lexer.alpha + '-') * (lexer.alnum + '-')^0
lex:add_rule('identifier', token(lexer.IDENTIFIER, word))
-- Strings.
-local arrow_string = lexer.delimited_range('<>')
-local nested_string = lexer.delimited_range('()', false, false, true)
+local arrow_string = lexer.range('<', '>')
+local nested_string = lexer.range('(', ')', false, false, true)
lex:add_rule('string', token(lexer.STRING, arrow_string + nested_string))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '%' * lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('%')))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Labels.
lex:add_rule('label', token(lexer.LABEL, '/' * word))
diff --git a/lexlua/pure.lua b/lexlua/pure.lua
index db75233b7..5e8f04aa9 100644
--- a/lexlua/pure.lua
+++ b/lexlua/pure.lua
@@ -11,8 +11,8 @@ local lex = lexer.new('pure')
lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Comments.
-local line_comment = '//' * lexer.nonnewline^0
-local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1
+local line_comment = lexer.to_eol('//')
+local block_comment = lexer.range('/*', '*/')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Pragmas.
@@ -45,6 +45,6 @@ lex:add_rule('operator', token(lexer.OPERATOR, dots + punct))
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"', true)))
+lex:add_rule('string', token(lexer.STRING, lexer.range('"', true)))
return lex
diff --git a/lexlua/python.lua b/lexlua/python.lua
index 58c6ba308..72e70d70e 100644
--- a/lexlua/python.lua
+++ b/lexlua/python.lua
@@ -73,19 +73,17 @@ lex:add_style('self', lexer.STYLE_TYPE)
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline_esc^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#', true)))
-- Strings.
-local sq_str = P('u')^-1 * lexer.delimited_range("'", true)
-local dq_str = P('U')^-1 * lexer.delimited_range('"', true)
-local triple_sq_str = "'''" * (lexer.any - "'''")^0 * P("'''")^-1
-local triple_dq_str = '"""' * (lexer.any - '"""')^0 * P('"""')^-1
+local sq_str = P('u')^-1 * lexer.range("'", true)
+local dq_str = P('U')^-1 * lexer.range('"', true)
+local tq_str = lexer.range("'''") + lexer.range('"""')
-- TODO: raw_strs cannot end in single \.
-local raw_sq_str = P('u')^-1 * 'r' * lexer.delimited_range("'", false, true)
-local raw_dq_str = P('U')^-1 * 'R' * lexer.delimited_range('"', false, true)
-lex:add_rule('string', token(lexer.STRING, triple_sq_str + triple_dq_str +
- sq_str + dq_str + raw_sq_str +
- raw_dq_str))
+local raw_sq_str = P('u')^-1 * 'r' * lexer.range("'", false, false)
+local raw_dq_str = P('U')^-1 * 'R' * lexer.range('"', false, false)
+lex:add_rule('string', token(lexer.STRING, tq_str + sq_str + dq_str +
+ raw_sq_str + raw_dq_str))
-- Numbers.
local dec = lexer.digit^1 * S('Ll')^-1
@@ -95,7 +93,7 @@ local integer = S('+-')^-1 * (bin + lexer.hex_num + oct + dec)
lex:add_rule('number', token(lexer.NUMBER, lexer.float + integer))
-- Decorators.
-lex:add_rule('decorator', token('decorator', '@' * lexer.nonnewline^0))
+lex:add_rule('decorator', token('decorator', lexer.to_eol('@')))
lex:add_style('decorator', lexer.STYLE_PREPROCESSOR)
-- Operators.
diff --git a/lexlua/rc.lua b/lexlua/rc.lua
index 8c257c6fb..3639cc556 100644
--- a/lexlua/rc.lua
+++ b/lexlua/rc.lua
@@ -20,32 +20,31 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-local str = lexer.delimited_range("'", false, true)
+local str = lexer.range("'", false, false)
local heredoc = '<<' * P(function(input, index)
local s, e, _, delimiter = input:find('[ \t]*(["\']?)([%w!"%%+,-./:?@_~]+)%1',
- index)
+ index)
if s == index and delimiter then
delimiter = delimiter:gsub('[%%+-.?]', '%%%1')
- local _, e = input:find('[\n\r]'..delimiter..'[\n\r]', e)
+ local _, e = input:find('[\n\r]' .. delimiter .. '[\n\r]', e)
return e and e + 1 or #input + 1
end
end)
lex:add_rule('string', token(lexer.STRING, str + heredoc))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.integer + lexer.float))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Variables.
lex:add_rule('variable', token(lexer.VARIABLE, '$' * S('"#')^-1 *
- ('*' + lexer.digit^1 +
- lexer.word)))
+ ('*' + lexer.digit^1 + lexer.word)))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('@`=!<>*&^|;?()[]{}') +
- '\\\n'))
+ '\\\n'))
-- Fold points.
lex:add_fold_point(lexer.OPERATOR, '{', '}')
diff --git a/lexlua/rebol.lua b/lexlua/rebol.lua
index 7cc8a2186..5994a4cd5 100644
--- a/lexlua/rebol.lua
+++ b/lexlua/rebol.lua
@@ -11,9 +11,8 @@ local lex = lexer.new('rebol')
lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Comments.
-local line_comment = ';' * lexer.nonnewline^0;
-local block_comment = 'comment' * P(' ')^-1 *
- lexer.delimited_range('{}', false, true)
+local line_comment = lexer.to_eol(';')
+local block_comment = 'comment' * P(' ')^-1 * lexer.range('{', '}')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Keywords.
@@ -80,12 +79,13 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
-- Identifiers.
lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alpha + '-') *
- (lexer.alnum + '-')^0))
+ (lexer.alnum + '-')^0))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"', true) +
- lexer.delimited_range('{}') +
- "'" * lexer.word))
+local dq_str = lexer.range('"', true)
+local br_str = lexer.range('{', '}', false, false, true)
+local word_str = "'" * lexer.word
+lex:add_rule('string', token(lexer.STRING, dq_str + br_str + word_str))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('=<>+/*:()[]')))
diff --git a/lexlua/rest.lua b/lexlua/rest.lua
index 3d7177311..a4060a8bb 100644
--- a/lexlua/rest.lua
+++ b/lexlua/rest.lua
@@ -15,11 +15,11 @@ local any_indent = S(' \t')^0
local adornment_chars = lpeg.C(S('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'))
local adornment = lpeg.C(adornment_chars^2 * any_indent) * (l.newline + -1)
local overline = lpeg.Cmt(starts_line(adornment), function(input, index, adm, c)
- if not adm:find('^%'..c..'+%s*$') then return nil end
+ if not adm:find('^%' .. c .. '+%s*$') then return nil end
local rest = input:sub(index)
local lines = 1
for line, e in rest:gmatch('([^\r\n]+)()') do
- if lines > 1 and line:match('^(%'..c..'+)%s*$') == adm then
+ if lines > 1 and line:match('^(%' .. c .. '+)%s*$') == adm then
return index + e - 1
end
if lines > 3 or #line > #adm then return nil end
@@ -28,7 +28,7 @@ local overline = lpeg.Cmt(starts_line(adornment), function(input, index, adm, c)
return #input + 1
end)
local underline = lpeg.Cmt(starts_line(adornment), function(_, index, adm, c)
- local pos = adm:match('^%'..c..'+()%s*$')
+ local pos = adm:match('^%' .. c .. '+()%s*$')
return pos and index - #adm + pos - 1 or nil
end)
-- Token needs to be a predefined one in order for folder to work.
@@ -37,16 +37,15 @@ local title = token(l.CONSTANT, overline + underline)
-- Lists.
local bullet_list = S('*+-') -- TODO: '•‣⁃', as lpeg does not support UTF-8
local enum_list = P('(')^-1 *
- (l.digit^1 + S('ivxlcmIVXLCM')^1 + l.alnum + '#') * S('.)')
+ (l.digit^1 + S('ivxlcmIVXLCM')^1 + l.alnum + '#') * S('.)')
local field_list = ':' * (l.any - ':')^1 * P(':')^-1
local option_word = l.alnum * (l.alnum + '-')^0
local option = S('-/') * option_word * (' ' * option_word)^-1 +
- '--' * option_word * ('=' * option_word)^-1
+ '--' * option_word * ('=' * option_word)^-1
local option_list = option * (',' * l.space^1 * option)^-1
local list = #(l.space^0 * (S('*+-:/') + enum_list)) *
- starts_line(token('list', l.space^0 * (option_list + bullet_list +
- enum_list + field_list) *
- l.space))
+ starts_line(token('list', l.space^0 *
+ (option_list + bullet_list + enum_list + field_list) * l.space))
-- Literal block.
local block = P('::') * (l.newline + -1) * function(input, index)
@@ -55,7 +54,7 @@ local block = P('::') * (l.newline + -1) * function(input, index)
for pos, indent, line in rest:gmatch('()[ \t]*()([^\r\n]+)') do
local no_indent = (indent - pos < level and line ~= ' ' or level == 0)
local quoted = no_indent and line:find(quote or '^%s*%W')
- if quoted and not quote then quote = '^%s*%'..line:match('^%s*(%W)') end
+ if quoted and not quote then quote = '^%s*%' .. line:match('^%s*(%W)') end
if no_indent and not quoted and pos > 1 then return index + pos - 1 end
end
return #input + 1
@@ -74,8 +73,7 @@ local footnote = token('footnote_block', prefix * footnote_label * l.space)
local citation_label = '[' * word * ']'
local citation = token('citation_block', prefix * citation_label * l.space)
local link = token('link_block', prefix * '_' *
- (l.delimited_range('`') + (P('\\') * 1 +
- l.nonnewline - ':')^1) * ':' * l.space)
+ (l.range('`') + (P('\\') * 1 + l.nonnewline - ':')^1) * ':' * l.space)
local markup_block = #prefix * starts_line(footnote + citation + link)
-- Directives.
@@ -102,8 +100,8 @@ local directive_type = word_match({
'include', 'raw', 'class', 'role', 'default-role', 'title',
'restructuredtext-test-directive',
}, '-')
-local known_directive = token('directive',
- prefix * directive_type * '::' * l.space)
+local known_directive = token('directive', prefix * directive_type * '::' *
+ l.space)
local sphinx_directive_type = word_match({
-- The TOC tree.
'toctree',
@@ -115,12 +113,12 @@ local sphinx_directive_type = word_match({
-- Miscellaneous
'sectionauthor', 'index', 'only', 'tabularcolumns'
}, '-')
-local sphinx_directive = token('sphinx_directive',
- prefix * sphinx_directive_type * '::' * l.space)
-local unknown_directive = token('unknown_directive',
- prefix * word * '::' * l.space)
+local sphinx_directive = token('sphinx_directive', prefix *
+ sphinx_directive_type * '::' * l.space)
+local unknown_directive = token('unknown_directive', prefix * word * '::' *
+ l.space)
local directive = #prefix * starts_line(known_directive + sphinx_directive +
- unknown_directive)
+ unknown_directive)
-- Sphinx code block.
local indented_block = function(input, index)
@@ -134,42 +132,37 @@ local indented_block = function(input, index)
return #input + 1
end
local code_block = prefix * 'code-block::' * S(' \t')^1 * l.nonnewline^0 *
- (l.newline + -1) * indented_block
+ (l.newline + -1) * indented_block
local sphinx_block = #prefix * token('code_block', starts_line(code_block))
-- Substitution definitions.
-local substitution = #prefix *
- token('substitution',
- starts_line(prefix * l.delimited_range('|') *
- l.space^1 * word * '::' * l.space))
+local substitution = #prefix * token('substitution',
+ starts_line(prefix * l.range('|') * l.space^1 * word * '::' * l.space))
-- Comments.
-local line_comment = prefix * l.nonnewline^0
+local line_comment = l.to_eol(prefix)
local bprefix = any_indent * '..'
local block_comment = bprefix * l.newline * indented_block
-local comment = #bprefix *
- token(l.COMMENT, starts_line(line_comment + block_comment))
+local comment = #bprefix * token(l.COMMENT,
+ starts_line(line_comment + block_comment))
-- Inline markup.
-local em = token('em', l.delimited_range('*'))
-local strong = token('strong', ('**' * (l.any - '**')^0 * P('**')^-1))
+local em = token('em', l.range('*'))
+local strong = token('strong', l.range('**', '**'))
local role = token('role', ':' * word * ':' * (word * ':')^-1)
-local interpreted = role^-1 * token('interpreted', l.delimited_range('`')) *
- role^-1
-local inline_literal = token('inline_literal',
- '``' * (l.any - '``')^0 * P('``')^-1)
-local link_ref = token('link',
- (word + l.delimited_range('`')) * '_' * P('_')^-1 +
- '_' * l.delimited_range('`'))
+local interpreted = role^-1 * token('interpreted', l.range('`')) * role^-1
+local inline_literal = token('inline_literal', l.range('``', '``'))
+local postfix_link = (word + l.range('`')) * '_' * P('_')^-1
+local prefix_link = '_' * l.range('`')
+local link_ref = token('link', postfix_link + prefix_link)
local footnote_ref = token('footnote', footnote_label * '_')
local citation_ref = token('citation', citation_label * '_')
-local substitution_ref = token('substitution', l.delimited_range('|', true) *
- ('_' * P('_')^-1)^-1)
+local substitution_ref = token('substitution', l.range('|', true) *
+ ('_' * P('_')^-1)^-1)
local link = token('link', l.alpha * (l.alnum + S('-.'))^1 * ':' *
- (l.alnum + S('/.+-%@'))^1)
+ (l.alnum + S('/.+-%@'))^1)
local inline_markup = (strong + em + inline_literal + link_ref + interpreted +
- footnote_ref + citation_ref + substitution_ref + link) *
- -l.alnum
+ footnote_ref + citation_ref + substitution_ref + link) * -l.alnum
-- Other.
local non_space = token(l.DEFAULT, l.alnum * (l.any - l.space)^0)
@@ -193,14 +186,14 @@ M._rules = {
M._tokenstyles = {
list = l.STYLE_TYPE,
- literal_block = l.STYLE_EMBEDDED..',eolfilled',
+ literal_block = l.STYLE_EMBEDDED .. ',eolfilled',
footnote_block = l.STYLE_LABEL,
citation_block = l.STYLE_LABEL,
link_block = l.STYLE_LABEL,
directive = l.STYLE_KEYWORD,
- sphinx_directive = l.STYLE_KEYWORD..',bold',
- unknown_directive = l.STYLE_KEYWORD..',italics',
- code_block = l.STYLE_EMBEDDED..',eolfilled',
+ sphinx_directive = l.STYLE_KEYWORD .. ',bold',
+ unknown_directive = l.STYLE_KEYWORD .. ',italics',
+ code_block = l.STYLE_EMBEDDED .. ',eolfilled',
substitution = l.STYLE_VARIABLE,
strong = 'bold',
em = 'italics',
@@ -219,7 +212,7 @@ local sphinx_levels = {
-- Section-based folding.
M._fold = function(text, start_pos, start_line, start_level)
local folds, line_starts = {}, {}
- for pos in (text..'\n'):gmatch('().-\r?\n') do
+ for pos in (text .. '\n'):gmatch('().-\r?\n') do
line_starts[#line_starts + 1] = pos
end
local style_at, CONSTANT, level = l.style_at, l.CONSTANT, start_level
@@ -231,7 +224,7 @@ M._fold = function(text, start_pos, start_line, start_level)
local c = text:sub(pos, pos)
local line_num = start_line + i - 1
folds[line_num] = level
- if style_at[start_pos + pos] == CONSTANT and c:find('^[^%w%s]') then
+ if style_at[start_pos + pos - 1] == CONSTANT and c:find('^[^%w%s]') then
local sphinx_level = FOLD_BASE + (sphinx_levels[c] or #sphinx_levels)
level = not sphinx and level - 1 or sphinx_level
if level < FOLD_BASE then level = FOLD_BASE end
@@ -249,11 +242,11 @@ l.property['fold.by.sphinx.convention'] = '0'
--[[ Embedded languages.
local bash = l.load('bash')
local bash_indent_level
-local start_rule = #(prefix * 'code-block' * '::' * l.space^1 * 'bash' *
- (l.newline + -1)) * sphinx_directive *
- token('bash_begin', P(function(input, index)
- bash_indent_level = #input:match('^([ \t]*)', index)
- return index
- end))]]
+local start_rule =
+ #(prefix * 'code-block' * '::' * l.space^1 * 'bash' * (l.newline + -1)) *
+ sphinx_directive * token('bash_begin', P(function(input, index)
+ bash_indent_level = #input:match('^([ \t]*)', index)
+ return index
+ end))]]
return M
diff --git a/lexlua/rexx.lua b/lexlua/rexx.lua
index 576df8b18..e33a613fc 100644
--- a/lexlua/rexx.lua
+++ b/lexlua/rexx.lua
@@ -48,20 +48,21 @@ local word = lexer.alpha * (lexer.alnum + S('@#$\\.!?_'))^0
lex:add_rule('identifier', token(lexer.IDENTIFIER, word))
-- Strings.
-local sq_str = lexer.delimited_range("'", true, true)
-local dq_str = lexer.delimited_range('"', true, true)
+local sq_str = lexer.range("'", true, false)
+local dq_str = lexer.range('"', true, false)
lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '--' * lexer.nonnewline_esc^0 +
- lexer.nested_pair('/*', '*/')))
+local line_comment = lexer.to_eol('--', true)
+local block_comment = lexer.range('/*', '*/', false, false, true)
+lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Preprocessor.
-lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, lexer.starts_line('#') *
- lexer.nonnewline^0))
+lex:add_rule('preprocessor', token(lexer.PREPROCESSOR,
+ lexer.to_eol(lexer.starts_line('#'))))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/\\*%&|^~.,:;(){}')))
diff --git a/lexlua/rstats.lua b/lexlua/rstats.lua
index 978a73c25..d499dc500 100644
--- a/lexlua/rstats.lua
+++ b/lexlua/rstats.lua
@@ -26,15 +26,15 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) +
- lexer.delimited_range('"', true)))
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) *
- P('i')^-1))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number * P('i')^-1))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('<->+*/^=.,:;|$()[]{}')))
diff --git a/lexlua/ruby.lua b/lexlua/ruby.lua
index efa2a1779..e25ca6000 100644
--- a/lexlua/ruby.lua
+++ b/lexlua/ruby.lua
@@ -33,23 +33,22 @@ local word = (lexer.alpha + '_') * word_char^0
lex:add_rule('identifier', token(lexer.IDENTIFIER, word))
-- Comments.
-local line_comment = '#' * lexer.nonnewline_esc^0
-local block_comment = lexer.starts_line('=begin') *
- (lexer.any - lexer.newline * '=end')^0 *
- (lexer.newline * '=end')^-1
+local line_comment = lexer.to_eol('#', true)
+local block_comment = lexer.range(lexer.starts_line('=begin'),
+ lexer.starts_line('=end'))
lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment))
local delimiter_matches = {['('] = ')', ['['] = ']', ['{'] = '}'}
-local literal_delimitted = P(function(input, index)
+local literal_delimited = P(function(input, index)
local delimiter = input:sub(index, index)
if not delimiter:find('[%w\r\n\f\t ]') then -- only non alpha-numerics
local match_pos, patt
if delimiter_matches[delimiter] then
-- Handle nested delimiter/matches in strings.
local s, e = delimiter, delimiter_matches[delimiter]
- patt = lexer.delimited_range(s..e, false, false, true)
+ patt = lexer.range(s, e, false, true, true)
else
- patt = lexer.delimited_range(delimiter)
+ patt = lexer.range(delimiter)
end
match_pos = lpeg.match(patt, input, index)
return match_pos or #input + 1
@@ -57,29 +56,29 @@ local literal_delimitted = P(function(input, index)
end)
-- Strings.
-local cmd_str = lexer.delimited_range('`')
-local lit_cmd = '%x' * literal_delimitted
-local lit_array = '%w' * literal_delimitted
-local sq_str = lexer.delimited_range("'")
-local dq_str = lexer.delimited_range('"')
-local lit_str = '%' * S('qQ')^-1 * literal_delimitted
+local cmd_str = lexer.range('`')
+local lit_cmd = '%x' * literal_delimited
+local lit_array = '%w' * literal_delimited
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+local lit_str = '%' * S('qQ')^-1 * literal_delimited
local heredoc = '<<' * P(function(input, index)
- local s, e, indented, _, delimiter =
- input:find('(%-?)(["`]?)([%a_][%w_]*)%2[\n\r\f;]+', index)
+ local s, e, indented, _, delimiter = input:find(
+ '(%-?)(["`]?)([%a_][%w_]*)%2[\n\r\f;]+', index)
if s == index and delimiter then
local end_heredoc = (#indented > 0 and '[\n\r\f]+ *' or '[\n\r\f]+')
- local _, e = input:find(end_heredoc..delimiter, e)
+ local _, e = input:find(end_heredoc .. delimiter, e)
return e and e + 1 or #input + 1
end
end)
+local string = token(lexer.STRING, (sq_str + dq_str + lit_str + heredoc +
+ cmd_str + lit_cmd + lit_array) * S('f')^-1)
-- TODO: regex_str fails with `obj.method /patt/` syntax.
local regex_str = #P('/') * lexer.last_char_includes('!%^&*([{-=+|:;,?<>~') *
- lexer.delimited_range('/', true, false) * S('iomx')^0
-local lit_regex = '%r' * literal_delimitted * S('iomx')^0
-lex:add_rule('string', token(lexer.STRING, (sq_str + dq_str + lit_str +
- heredoc + cmd_str + lit_cmd +
- lit_array) * S('f')^-1) +
- token(lexer.REGEX, regex_str + lit_regex))
+ lexer.range('/', true) * S('iomx')^0
+local lit_regex = '%r' * literal_delimited * S('iomx')^0
+local regex = token(lexer.REGEX, regex_str + lit_regex)
+lex:add_rule('string', string + regex)
-- Numbers.
local dec = lexer.digit^1 * ('_' * lexer.digit^1)^0 * S('ri')^-1
@@ -88,15 +87,15 @@ local integer = S('+-')^-1 * (bin + lexer.hex_num + lexer.oct_num + dec)
-- TODO: meta, control, etc. for numeric_literal.
local numeric_literal = '?' * (lexer.any - lexer.space) * -word_char
lex:add_rule('number', token(lexer.NUMBER, lexer.float * S('ri')^-1 + integer +
- numeric_literal))
+ numeric_literal))
-- Variables.
local global_var = '$' * (word + S('!@L+`\'=~/\\,.;<>_*"$?:') + lexer.digit +
- '-' * S('0FadiIKlpvw'))
+ '-' * S('0FadiIKlpvw'))
local class_var = '@@' * word
local inst_var = '@' * word
lex:add_rule('variable', token(lexer.VARIABLE, global_var + class_var +
- inst_var))
+ inst_var))
-- Symbols.
lex:add_rule('symbol', token('symbol', ':' * P(function(input, index)
@@ -110,7 +109,7 @@ lex:add_rule('operator', token(lexer.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~')))
-- Fold points.
local function disambiguate(text, pos, line, s)
return line:sub(1, s - 1):match('^%s*$') and
- not text:sub(1, pos - 1):match('\\[ \t]*\r?\n$') and 1 or 0
+ not text:sub(1, pos - 1):match('\\[ \t]*\r?\n$') and 1 or 0
end
lex:add_fold_point(lexer.KEYWORD, 'begin', 'end')
lex:add_fold_point(lexer.KEYWORD, 'class', 'end')
diff --git a/lexlua/rust.lua b/lexlua/rust.lua
index 7546e8c6d..427b12d04 100644
--- a/lexlua/rust.lua
+++ b/lexlua/rust.lua
@@ -25,7 +25,7 @@ lex:add_rule('macro', token(lexer.FUNCTION, lexer.word * S("!")))
-- Library types
lex:add_rule('library', token(lexer.LABEL, lexer.upper *
- (lexer.lower + lexer.dec_num)^1))
+ (lexer.lower + lexer.dec_num)^1))
-- Numbers.
local identifier = P('r#')^-1 * lexer.word
@@ -37,22 +37,21 @@ end
local function opt_cap(patt)
return C(patt^-1)
end
-local float = decimal_literal *
- (Cmt(opt_cap('.' * decimal_literal) *
- opt_cap(S('eE') * S('+-')^-1 * integer_suffix(digit)) *
- opt_cap(P('f32') + 'f64'),
- function (input, index, decimals, exponent, type)
- return decimals ~= "" or exponent ~= "" or type ~= ""
- end) +
- '.' * -(S('._') + identifier))
+local float = decimal_literal * (Cmt(
+ opt_cap('.' * decimal_literal) * opt_cap(S('eE') * S('+-')^-1 *
+ integer_suffix(digit)) * opt_cap(P('f32') + 'f64'),
+ function (input, index, decimals, exponent, type)
+ return decimals ~= "" or exponent ~= "" or type ~= ""
+ end) + '.' * -(S('._') + identifier))
local function prefixed_integer(prefix, digit)
return P(prefix) * integer_suffix(digit)
end
-local integer = (prefixed_integer('0b', S('01')) +
- prefixed_integer('0o', R('07')) +
- prefixed_integer('0x', lexer.xdigit) +
- decimal_literal) *
- (S('iu') * (P('8') + '16' + '32' + '64' + '128' + 'size'))^-1
+local integer = (
+ prefixed_integer('0b', S('01')) +
+ prefixed_integer('0o', R('07')) +
+ prefixed_integer('0x', lexer.xdigit) +
+ decimal_literal
+) * (S('iu') * (P('8') + '16' + '32' + '64' + '128' + 'size'))^-1
lex:add_rule('number', token(lexer.NUMBER, float + integer))
-- Types.
@@ -61,31 +60,30 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[
]]))
-- Strings.
-local sq_str = P('b')^-1 * lexer.delimited_range("'", true)
-local dq_str = P('b')^-1 * lexer.delimited_range('"')
+local sq_str = P('b')^-1 * lexer.range("'", true)
+local dq_str = P('b')^-1 * lexer.range('"')
local raw_str = Cmt(P('b')^-1 * P('r') * C(P('#')^0) * '"',
- function(input, index, hashes)
- local _, e = input:find('"'..hashes, index, true)
- return (e or #input) + 1
- end)
+ function(input, index, hashes)
+ local _, e = input:find('"' .. hashes, index, true)
+ return (e or #input) + 1
+ end)
lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + raw_str))
-- Identifiers.
lex:add_rule('identifier', token(lexer.IDENTIFIER, identifier))
-- Comments.
-local line_comment = '//' * lexer.nonnewline_esc^0
-local block_comment = lexer.nested_pair('/*', '*/')
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/', false, false, true)
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
+-- Attributes.
+lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, '#' *
+ lexer.range('[', ']', true)))
+
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR,
- S('+-/*%<>!=`^~@&|?#~:;,.()[]{}')))
-
--- Attributes.
-lex:add_rule('preprocessor', token(lexer.PREPROCESSOR,
- "#[" * (lexer.nonnewline - ']')^0 *
- P("]")^-1))
+ S('+-/*%<>!=`^~@&|?#~:;,.()[]{}')))
-- Fold points.
lex:add_fold_point(lexer.COMMENT, '/*', '*/')
diff --git a/lexlua/sass.lua b/lexlua/sass.lua
index 1c6d8640f..02dcf75c3 100644
--- a/lexlua/sass.lua
+++ b/lexlua/sass.lua
@@ -9,7 +9,7 @@ local P, S = lpeg.P, lpeg.S
local lex = lexer.new('sass', {inherit = lexer.load('css')})
-- Line comments.
-lex:add_rule('line_comment', token(lexer.COMMENT, '//' * lexer.nonnewline^0))
+lex:add_rule('line_comment', token(lexer.COMMENT, lexer.to_eol('//')))
-- Variables.
lex:add_rule('variable', token(lexer.VARIABLE, '$' * (lexer.alnum + S('_-'))^1))
diff --git a/lexlua/scala.lua b/lexlua/scala.lua
index 38d328b54..f2959396f 100644
--- a/lexlua/scala.lua
+++ b/lexlua/scala.lua
@@ -13,7 +13,7 @@ lex:add_rule('whitespace', ws)
-- Classes.
lex:add_rule('class', token(lexer.KEYWORD, P('class')) * ws^1 *
- token(lexer.CLASS, lexer.word))
+ token(lexer.CLASS, lexer.word))
-- Keywords.
lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
@@ -37,18 +37,17 @@ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
local symbol = "'" * lexer.word
-local dq_str = lexer.delimited_range('"', true)
-local tq_str = '"""' * (lexer.any - '"""')^0 * P('"""')^-1
+local dq_str = lexer.range('"', true)
+local tq_str = lexer.range('"""')
lex:add_rule('string', token(lexer.STRING, tq_str + symbol + dq_str))
-- Comments.
-local line_comment = '//' * lexer.nonnewline_esc^0
-local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) *
- S('LlFfDd')^-1))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('LlFfDd')^-1))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}')))
diff --git a/lexlua/scheme.lua b/lexlua/scheme.lua
index 1d37c65c6..326f52cf5 100644
--- a/lexlua/scheme.lua
+++ b/lexlua/scheme.lua
@@ -53,17 +53,17 @@ lex:add_rule('identifier', token(lexer.IDENTIFIER, word))
-- Strings.
local literal = (P("'") + '#' * S('\\bdox')) * lexer.word
-local dq_str = lexer.delimited_range('"')
+local dq_str = lexer.range('"')
lex:add_rule('string', token(lexer.STRING, literal + dq_str))
-- Comments.
-local line_comment = ';' * lexer.nonnewline^0
-local block_comment = '#|' * (lexer.any - '|#')^0 * P('|#')^-1
+local line_comment = lexer.to_eol(';')
+local block_comment = lexer.range('#|', '|#')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
lex:add_rule('number', token(lexer.NUMBER, P('-')^-1 * lexer.digit^1 *
- (S('./') * lexer.digit^1)^-1))
+ (S('./') * lexer.digit^1)^-1))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('<>=*/+-`@%:()')))
diff --git a/lexlua/smalltalk.lua b/lexlua/smalltalk.lua
index 086ce79e6..0acad115f 100644
--- a/lexlua/smalltalk.lua
+++ b/lexlua/smalltalk.lua
@@ -24,15 +24,15 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") +
- '$' * lexer.word))
+local sq_str = lexer.range("'")
+local word_str = '$' * lexer.word
+lex:add_rule('string', token(lexer.STRING, sq_str + word_str))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT,
- lexer.delimited_range('"', false, true)))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.range('"', false, false)))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S(':=_<>+-/*!()[]')))
diff --git a/lexlua/sml.lua b/lexlua/sml.lua
index e1d00cfe6..9aa4a6922 100644
--- a/lexlua/sml.lua
+++ b/lexlua/sml.lua
@@ -11,11 +11,11 @@ end
local ws = token(lexer.WHITESPACE, lexer.space^1)
-- single line comments are valid in successor ml
-local cl = '(*)' * lexer.nonnewline^0
-local comment = token(lexer.COMMENT, cl + lexer.nested_pair('(*', '*)'))
+local line_comment = lexer.to_eol('(*)')
+local block_comment = lexer.range('(*', '*)', false, false, true)
+local comment = token(lexer.COMMENT, line_comment + block_comment)
-local string = token(lexer.STRING, lpeg.P('#')^-1 *
- lexer.delimited_range('"', true))
+local string = token(lexer.STRING, lpeg.P('#')^-1 * lexer.range('"', true))
local function num(digit)
return digit * (digit^0 * lpeg.P('_'))^0 * digit^1 + digit
@@ -29,15 +29,10 @@ local real = int * frac^-1 * exp + int * frac * exp^-1
local hex = num(lexer.xdigit)
local bin = num(lpeg.S('01'))
-local number = token(lexer.NUMBER,
- lpeg.P('0w') * int
- + (lpeg.P('0wx') + lpeg.P('0xw')) * hex
- + (lpeg.P('0wb') + lpeg.P('0bw')) * bin
- + minus * lpeg.P('0x') * hex
- + minus * lpeg.P('0b') * bin
- + minus * real
- + minus * int
-)
+local number = token(lexer.NUMBER, lpeg.P('0w') * int +
+ (lpeg.P('0wx') + lpeg.P('0xw')) * hex +
+ (lpeg.P('0wb') + lpeg.P('0bw')) * bin + minus * lpeg.P('0x') * hex +
+ minus * lpeg.P('0b') * bin + minus * real + minus * int)
local keyword = token(lexer.KEYWORD, mlword{
'abstype', 'and', 'andalso', 'as', 'case', 'do', 'datatype', 'else', 'end',
@@ -51,7 +46,7 @@ local keyword = token(lexer.KEYWORD, mlword{
-- includes valid symbols for identifiers
local operator = token(lexer.OPERATOR,
- lpeg.S('!*/+-^:@=<>()[]{},;._|#%&$?~`\\'))
+ lpeg.S('!*/+-^:@=<>()[]{},;._|#%&$?~`\\'))
local type = token(lexer.TYPE, mlword{
'int', 'real', 'word', 'bool', 'char', 'string', 'unit',
@@ -78,14 +73,11 @@ local c = mlword{'true', 'false', 'nil'}
local const = token(lexer.CONSTANT, lexer.upper * id + c)
local structure = token(lexer.CLASS, aid * lpeg.P('.'))
-local open
- = token(lexer.KEYWORD, mlword{'open', 'structure', 'functor'})
- * ws * token(lexer.CLASS, longid)
+local open = token(lexer.KEYWORD, mlword{'open', 'structure', 'functor'}) * ws *
+ token(lexer.CLASS, longid)
-local struct_dec
- = token(lexer.KEYWORD, lpeg.P('structure')) * ws
- * token(lexer.CLASS, aid) * ws
- * token(lexer.OPERATOR, lpeg.P('=')) * ws
+local struct_dec = token(lexer.KEYWORD, lpeg.P('structure')) * ws *
+ token(lexer.CLASS, aid) * ws * token(lexer.OPERATOR, lpeg.P('=')) * ws
local struct_new = struct_dec * token(lexer.KEYWORD, lpeg.P('struct'))
local struct_alias = struct_dec * token(lexer.CLASS, longid)
diff --git a/lexlua/snobol4.lua b/lexlua/snobol4.lua
index ad31aa782..1723a127f 100644
--- a/lexlua/snobol4.lua
+++ b/lexlua/snobol4.lua
@@ -11,8 +11,8 @@ local M = { _NAME = 'snobol4' }
-- Helper patterns.
local dotted_id = lexer.word * (P'.' * lexer.word)^0
-local dq_str = lexer.delimited_range('"', true, true)
-local sq_str = lexer.delimited_range("'", true, true)
+local dq_str = lexer.range('"', true, false)
+local sq_str = lexer.range("'", true, false)
local branch = B(lexer.space * P':(') * dotted_id * #P')'
local sbranch = B(lexer.space * P':' * S'SF' * '(') * dotted_id * #P')'
@@ -27,9 +27,9 @@ local bif = token(lexer.FUNCTION, word_match({
'REVERSE', 'RPAD', 'RSORT', 'SERV_LISTEN', 'SET', 'SETEXIT', 'SIZE', 'SORT',
'SQRT', 'SSET', 'SUBSTR', 'TABLE', 'THAW', 'TIME', 'TRACE', 'TRIM', 'UNLOAD',
'VALUE', 'VDIFFER',
-}, '', true) * #lexer.delimited_range('()', false, true, true))
-local comment = token(lexer.COMMENT,
- lexer.starts_line(S'*#|;!' * lexer.nonnewline^0))
+}, '', true) * #lexer.range('(', ')', false, false, true))
+local comment = token(lexer.COMMENT, lexer.starts_line(S'*#|;!' *
+ lexer.nonnewline^0))
local control = token(lexer.PREPROCESSOR, lexer.starts_line(P'-' * lexer.word))
local identifier = token(lexer.DEFAULT, dotted_id)
local keyword = token(lexer.KEYWORD, word_match({
@@ -42,7 +42,7 @@ local operator = token(lexer.OPERATOR, S'¬?$.!%*/#+-@⊥&^~\\=')
local pattern = lexer.token(lexer.CLASS, word_match({ -- keep distinct
'ABORT', 'ANY', 'ARB', 'ARBNO', 'BAL', 'BREAK', 'BREAKX', 'FAIL', 'FENCE',
'LEN', 'NOTANY', 'POS', 'REM', 'RPOS', 'RTAB', 'SPAN', 'SUCCEED', 'TAB',
-}, '', true) * #lexer.delimited_range('()', false, true, true))
+}, '', true) * #lexer.range('(', ')', false, false, true))
local str = token(lexer.STRING, sq_str + dq_str)
local target = token(lexer.LABEL, branch + sbranch + sbranchx)
local ws = token(lexer.WHITESPACE, lexer.space^1)
diff --git a/lexlua/sql.lua b/lexlua/sql.lua
index b38e48501..f0049f552 100644
--- a/lexlua/sql.lua
+++ b/lexlua/sql.lua
@@ -41,17 +41,18 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match([[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") +
- lexer.delimited_range('"') +
- lexer.delimited_range('`')))
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+local bq_str = lexer.range('`')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + bq_str))
-- Comments.
-local line_comment = (P('--') + '#') * lexer.nonnewline^0
-local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1
+local line_comment = lexer.to_eol(P('--') + '#')
+local block_comment = lexer.range('/*', '*/')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S(',()')))
diff --git a/lexlua/taskpaper.lua b/lexlua/taskpaper.lua
index de9270286..a7a390c68 100644
--- a/lexlua/taskpaper.lua
+++ b/lexlua/taskpaper.lua
@@ -19,14 +19,12 @@ local overdue_tag = token('overdue_tag', P('@overdue'))
local plain_tag = token('plain_tag', P('@') * lexer.word)
-local extended_tag = token('extended_tag',
- P('@') * lexer.word * P('(') *
- (lexer.word + R('09') + P('-'))^1 * P(')'))
+local extended_tag = token('extended_tag', P('@') * lexer.word * P('(') *
+ (lexer.word + R('09') + P('-'))^1 * P(')'))
-- Projects
-local project = token('project',
- lexer.nested_pair(lexer.starts_line(lexer.alnum), ':') *
- lexer.newline)
+local project = token('project', lexer.range(lexer.starts_line(lexer.alnum),
+ ':', false, false, true) * lexer.newline)
-- Notes
local note = token('note', delimiter^1 * lexer.alnum * lexer.nonnewline^0)
diff --git a/lexlua/tcl.lua b/lexlua/tcl.lua
index 45e3ccf9d..0f0a0d80a 100644
--- a/lexlua/tcl.lua
+++ b/lexlua/tcl.lua
@@ -13,11 +13,12 @@ local lex = lexer.new('tcl')
lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Comment.
-lex:add_rule('comment', token(lexer.COMMENT, '#' * P(function(input, index)
- local i = index - 2
- while i > 0 and input:find('^[ \t]', i) do i = i - 1 end
- if i < 1 or input:find('^[\r\n;]', i) then return index end
-end) * lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#' *
+ P(function(input, index)
+ local i = index - 2
+ while i > 0 and input:find('^[ \t]', i) do i = i - 1 end
+ if i < 1 or input:find('^[\r\n;]', i) then return index end
+ end))))
-- Separator (semicolon).
lex:add_rule('separator', token(lexer.CLASS, P(';')))
@@ -32,15 +33,13 @@ lex:add_rule('brackets', token(lexer.VARIABLE, S('[]')))
-- Variable substitution.
lex:add_rule('variable', token(lexer.STRING, '$' *
- (lexer.alnum + '_' + P(':')^2)^0))
+ (lexer.alnum + '_' + P(':')^2)^0))
-- Backslash substitution.
-lex:add_rule('backslash', token(lexer.TYPE,
- '\\' * (lexer.digit * lexer.digit^-2 +
- 'x' * lexer.xdigit^1 +
- 'u' * lexer.xdigit * lexer.xdigit^-3 +
- 'U' * lexer.xdigit * lexer.xdigit^-7 +
- 1)))
+local oct = lexer.digit * lexer.digit^-2
+local hex = 'x' * lexer.xdigit^1
+local unicode = 'u' * lexer.xdigit * lexer.xdigit^-3
+lex:add_rule('backslash', token(lexer.TYPE, '\\' * (oct + hex + unicode + 1)))
-- Fold points.
lex:add_fold_point(lexer.KEYWORD, '{', '}')
diff --git a/lexlua/template.txt b/lexlua/template.txt
index 730479384..a4dda44c4 100644
--- a/lexlua/template.txt
+++ b/lexlua/template.txt
@@ -18,14 +18,15 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") +
- lexer.delimited_range('"')))
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('+-*/%^=<>,.{}[]()')))
diff --git a/lexlua/tex.lua b/lexlua/tex.lua
index 9e707f9be..8c9e76860 100644
--- a/lexlua/tex.lua
+++ b/lexlua/tex.lua
@@ -12,16 +12,16 @@ local lex = lexer.new('tex')
lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '%' * lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('%')))
-- TeX environments.
lex:add_rule('environment', token('environment', '\\' * (P('begin') + 'end') *
- lexer.word))
+ lexer.word))
lex:add_style('environment', lexer.STYLE_KEYWORD)
-- Commands.
-lex:add_rule('command', token(lexer.KEYWORD, '\\' *
- (lexer.alpha^1 + S('#$&~_^%{}'))))
+lex:add_rule('command', token(lexer.KEYWORD, '\\' * (lexer.alpha^1 +
+ S('#$&~_^%{}'))))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('$&#{}[]')))
diff --git a/lexlua/texinfo.lua b/lexlua/texinfo.lua
index 89628715e..bb5ba55c6 100644
--- a/lexlua/texinfo.lua
+++ b/lexlua/texinfo.lua
@@ -79,7 +79,7 @@ local directives_base = word_match([[
bye
]], true)
lex:add_rule('directive', token('directives', ('@end' * lexer.space^1 + '@') *
- directives_base))
+ directives_base))
lex:add_style('directives', lexer.STYLE_FUNCTION)
-- Chapters.
@@ -103,7 +103,7 @@ local chapters_base = word_match([[
chapheading majorheading heading subheading subsubheading
]], true)
lex:add_rule('chapter', token('chapters', ('@end' * lexer.space^1 + '@') *
- chapters_base))
+ chapters_base))
lex:add_style('chapters', lexer.STYLE_CLASS)
-- Common keywords.
@@ -175,35 +175,32 @@ local keyword_base = word_match([[
-- not implemented
]], true)
lex:add_rule('keyword', token(lexer.KEYWORD, ('@end' * lexer.space^1 + '@') *
- keyword_base))
+ keyword_base))
+
+local nested_braces = lexer.range('{', '}', false, false, true)
-- Italics
-lex:add_rule('emph', token('emph',
- '@emph' *
- lexer.delimited_range('{}', false, true, true)))
-lex:add_style('emph', lexer.STYLE_STRING..',italics')
+lex:add_rule('emph', token('emph', '@emph' * nested_braces))
+
+lex:add_style('emph', lexer.STYLE_STRING .. ',italics')
-- Bold
-lex:add_rule('strong', token('strong',
- '@strong' *
- lexer.delimited_range('{}', false, true, true)))
-lex:add_style('strong', lexer.STYLE_STRING..',bold')
+lex:add_rule('strong', token('strong', '@strong' * nested_braces))
+lex:add_style('strong', lexer.STYLE_STRING .. ',bold')
-- Identifiers
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING,
- lexer.delimited_range('{}', false, true, true)))
+lex:add_rule('string', token(lexer.STRING, nested_braces))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Comments.
-local line_comment = '@c' * lexer.nonnewline_esc^0
---local line_comment_long = '@comment' * lexer.nonnewline_esc^0
-local block_comment = '@ignore' * (lexer.any - '@end ignore')^0 *
- P('@end ignore')^-1
+local line_comment = lexer.to_eol('@c', true)
+--local line_comment_long = lexer.to_eol('@comment', true)
+local block_comment = lexer.range('@ignore', '@end ignore')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Fold points.
diff --git a/lexlua/toml.lua b/lexlua/toml.lua
index 7b32c0c1d..ae6835174 100644
--- a/lexlua/toml.lua
+++ b/lexlua/toml.lua
@@ -9,10 +9,9 @@ local lex = lexer.new('toml', {fold_by_indentation = true})
-- Whitespace
lex:add_rule('indent', #lexer.starts_line(S(' \t')) *
- (token(lexer.WHITESPACE, ' ') +
- token('indent_error', '\t'))^1)
+ (token(lexer.WHITESPACE, ' ') + token('indent_error', '\t'))^1)
lex:add_rule('whitespace', token(lexer.WHITESPACE, S(' \t')^1 +
- lexer.newline^1))
+ lexer.newline^1))
lex:add_style('indent_error', 'back:%(color.red)')
-- kewwords.
@@ -22,32 +21,32 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[true false]]))
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") +
- lexer.delimited_range('"')))
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#')))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('#=+-,.{}[]()')))
-- Datetime.
-lex:add_rule('datetime',
- token('timestamp',
- lexer.digit * lexer.digit * lexer.digit * lexer.digit * -- yr
- '-' * lexer.digit * lexer.digit^-1 * -- month
- '-' * lexer.digit * lexer.digit^-1 * -- day
- ((S(' \t')^1 + S('tT'))^-1 * -- separator
- lexer.digit * lexer.digit^-1 * -- hour
- ':' * lexer.digit * lexer.digit * -- minute
- ':' * lexer.digit * lexer.digit * -- second
- ('.' * lexer.digit^0)^-1 * -- fraction
- ('Z' + -- timezone
- S(' \t')^0 * S('-+') * lexer.digit * lexer.digit^-1 *
- (':' * lexer.digit * lexer.digit)^-1)^-1)^-1))
+local year = lexer.digit * lexer.digit * lexer.digit * lexer.digit
+local month = lexer.digit * lexer.digit^-1
+local day = lexer.digit * lexer.digit^-1
+local date = year * '-' * month * '-' * day
+local hours = lexer.digit * lexer.digit^-1
+local minutes = lexer.digit * lexer.digit
+local seconds = lexer.digit * lexer.digit
+local fraction = '.' * lexer.digit^0
+local time = hours * ':' * minutes * ':' * seconds * fraction^-1
+local T = S(' \t')^1 + S('tT')
+local zone = 'Z' + S(' \t')^0 * S('-+') * hours * (':' * minutes)^-1
+lex:add_rule('datetime', token('timestamp', date * (T * time * zone^-1)))
lex:add_style('timestamp', lexer.STYLE_NUMBER)
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
return lex
diff --git a/lexlua/txt2tags.lua b/lexlua/txt2tags.lua
index 828b8b8fe..1fca8a695 100644
--- a/lexlua/txt2tags.lua
+++ b/lexlua/txt2tags.lua
@@ -16,81 +16,51 @@ local ws = token(lexer.WHITESPACE, (lexer.space - lexer.newline)^1)
-- Titles
local alphanumeric = R('AZ') + R('az') + R('09') + P('_') + P('-')
local header_label = token('header_label_start', '[') *
- token('header_label', alphanumeric^1) *
- token('header_label_end', ']')
-local header = (token('h5', ('=====' * (lexer.nonnewline - '=')^1 * '=====') +
- ('+++++' * (lexer.nonnewline - '+')^1 * '+++++')) *
- header_label^-1) +
- (token('h4', ('====' * (lexer.nonnewline - '=')^1 * '====') +
- ('++++' * (lexer.nonnewline - '+')^1 * '++++')) *
- header_label^-1) +
- (token('h3', ('===' * (lexer.nonnewline - '=')^1 * '===') +
- ('+++' * (lexer.nonnewline - '+')^1 * '+++')) *
- header_label^-1) +
- (token('h2', ('==' * (lexer.nonnewline - '=')^1 * '==') +
- ('++' * (lexer.nonnewline - '+')^1 * '++')) *
- header_label^-1) +
- (token('h1', ('=' * (lexer.nonnewline - '=')^1 * '=') +
- ('+' * (lexer.nonnewline - '+')^1 * '+')) *
- header_label^-1)
+ token('header_label', alphanumeric^1) * token('header_label_end', ']')
+local function h(level)
+ local equal = string.rep('=', level) * (lexer.nonnewline - '=')^1 *
+ string.rep('=', level)
+ local plus = string.rep('+', level) * (lexer.nonnewline - '+')^1 *
+ string.rep('+', level)
+ return token('h' .. level, equal + plus) * header_label^-1
+end
+local header = h(5) + h(4) + h(3) + h(2) + h(1)
-- Comments.
-local line_comment = lexer.starts_line('%') * lexer.nonnewline^0
-local block_comment = lexer.starts_line('%%%') *
- (lexer.space - lexer.newline)^0 * lexer.newline *
- (lexer.any - '%%%')^0 * lexer.starts_line('%%%')^-1
+local line_comment = lexer.to_eol(lexer.starts_line('%'))
+local block_comment = lexer.range(lexer.starts_line('%%%'))
local comment = token(lexer.COMMENT, block_comment + line_comment)
-- Inline.
-local bold = token('bold', ('**' * nonspace * '**' * S('*')^0) +
- ('**' * nonspace *
- (lexer.nonnewline - (nonspace * '**'))^0 *
- nonspace * '**' * S('*')^0))
-local italic = token('italic', ('//' * nonspace * '//' * S('/')^0) +
- ('//' * nonspace *
- (lexer.nonnewline - (nonspace * '//'))^0 *
- nonspace * '//' * S('/')^0))
-local underline = token('underline', ('__' * nonspace * '__' * S('_')^0) +
- ('__' * nonspace *
- (lexer.nonnewline - (nonspace * '__'))^0 *
- nonspace * '__' * S('_')^0))
-local strike = token('strike', ('--' * nonspace * '--' * S('-')^0) +
- ('--' * nonspace *
- (lexer.nonnewline - (nonspace * '--'))^0 *
- nonspace * '--' * S('-')^0))
-local mono = token('mono', ('``' * nonspace * '``' * S('`')^0) +
- ('``' * nonspace *
- (lexer.nonnewline - (nonspace * '``'))^0 *
- nonspace * '``' * S('`')^0))
-local raw = token('raw', ('""' * nonspace * '""' * S('"')^0) +
- ('""' * nonspace *
- (lexer.nonnewline - (nonspace * '""'))^0 * nonspace *
- '""' * S('"')^0))
-local tagged = token('tagged', ('\'\'' * nonspace * '\'\'' * S('\'')^0) +
- ('\'\'' * nonspace *
- (lexer.nonnewline - (nonspace * '\'\''))^0 *
- nonspace * '\'\'' * S('\'')^0))
+local function span(name, delimiter)
+ return token(name, (delimiter * nonspace * delimiter * S(delimiter)^0) + (
+ delimiter * nonspace * (lexer.nonnewline - nonspace * delimiter)^0 *
+ nonspace * delimiter * S(delimiter)^0))
+end
+local bold = span('bold', '**')
+local italic = span('italic', '//')
+local underline = span('underline', '__')
+local strike = span('strike', '--')
+local mono = span('mono', '``')
+local raw = span('raw', '""')
+local tagged = span('tagged', "''")
local inline = bold + italic + underline + strike + mono + raw + tagged
-- Link.
local email = token('email', (nonspace - '@')^1 * '@' * (nonspace - '.')^1 *
- ('.' * (nonspace - '.' - '?')^1)^1 *
- ('?' * nonspace^1)^-1)
+ ('.' * (nonspace - '.' - '?')^1)^1 * ('?' * nonspace^1)^-1)
local host = token('host', (P('www') + P('WWW') + P('ftp') + P('FTP')) *
- (nonspace - '.')^0 * '.' * (nonspace - '.')^1 * '.' *
- (nonspace - ',' - '.')^1)
+ (nonspace - '.')^0 * '.' * (nonspace - '.')^1 * '.' *
+ (nonspace - ',' - '.')^1)
local url = token('url', (nonspace - '://')^1 * '://' *
- (nonspace - ',' - '.')^1 *
- ('.' * (nonspace - ',' - '.' - '/' - '?' - '#')^1)^1 *
- ('/' * (nonspace - '.' - '/' - '?' - '#')^0 *
- ('.' * (nonspace - ',' - '.' - '?' - '#')^1)^0)^0 *
- ('?' * (nonspace - '#')^1)^-1 * ('#' * nonspace^0)^-1)
-local label_with_address = token('label_start', '[') *
- lexer.space^0 *
- token('address_label', ((nonspace - ']')^1 *
- lexer.space^1)^1) *
- token('address', (nonspace - ']')^1) *
- token('label_end', ']')
+ (nonspace - ',' - '.')^1 *
+ ('.' * (nonspace - ',' - '.' - '/' - '?' - '#')^1)^1 *
+ ('/' * (nonspace - '.' - '/' - '?' - '#')^0 *
+ ('.' * (nonspace - ',' - '.' - '?' - '#')^1)^0)^0 *
+ ('?' * (nonspace - '#')^1)^-1 * ('#' * nonspace^0)^-1)
+local label_with_address = token('label_start', '[') * lexer.space^0 *
+ token('address_label', ((nonspace - ']')^1 * lexer.space^1)^1) *
+ token('address', (nonspace - ']')^1) * token('label_end', ']')
local link = label_with_address + url + host + email
-- Line.
@@ -98,54 +68,43 @@ local line = token('line', (P('-') + P('=') + P('_'))^20)
-- Image.
local image_only = token('image_start', '[') *
- token('image', (nonspace - ']')^1) * token('image_end', ']')
+ token('image', (nonspace - ']')^1) * token('image_end', ']')
local image_link = token('image_link_start', '[') * image_only *
- token('image_link_sep', lexer.space^1) *
- token('image_link', (nonspace - ']')^1) *
- token('image_link_end', ']')
+ token('image_link_sep', lexer.space^1) *
+ token('image_link', (nonspace - ']')^1) * token('image_link_end', ']')
local image = image_link + image_only
-- Macro.
local macro = token('macro', '%%' * (nonspace - '(')^1 *
- ('(' * (lexer.nonnewline - ')')^0 * ')')^-1)
+ lexer.range('(', ')', true)^-1)
-- Verbatim.
-local verbatim_line = lexer.starts_line('```') * (lexer.space - lexer.newline) *
- lexer.nonnewline^0
-local verbatim_block = lexer.starts_line('```') *
- (lexer.space - lexer.newline)^0 * lexer.newline *
- (lexer.any - '```')^0 * lexer.starts_line('```')^-1
+local verbatim_line = lexer.to_eol(lexer.starts_line('```') * S(' \t'))
+local verbatim_block = lexer.range(lexer.starts_line('```'))
local verbatim_area = token('verbatim_area', verbatim_block + verbatim_line)
-- Raw.
-local raw_line = lexer.starts_line('"""') * (lexer.space - lexer.newline) *
- lexer.nonnewline^0
-local raw_block = lexer.starts_line('"""') * (lexer.space - lexer.newline)^0 *
- lexer.newline * (lexer.any - '"""')^0 *
- lexer.starts_line('"""')^-1
+local raw_line = lexer.to_eol(lexer.starts_line('"""') * S(' \t'))
+local raw_block = lexer.range(lexer.starts_line('"""'))
local raw_area = token('raw_area', raw_block + raw_line)
-- Tagged.
-local tagged_line = lexer.starts_line('\'\'\'') *
- (lexer.space - lexer.newline) * lexer.nonnewline^0
-local tagged_block = lexer.starts_line('\'\'\'') *
- (lexer.space - lexer.newline)^0 * lexer.newline *
- (lexer.any - '\'\'\'')^0 * lexer.starts_line('\'\'\'')^-1
+local tagged_line = lexer.to_eol(lexer.starts_line('\'\'\'') * S(' \t'))
+local tagged_block = lexer.range(lexer.starts_line('\'\'\''))
local tagged_area = token('tagged_area', tagged_block + tagged_line)
-- Table.
local table_sep = token('table_sep', '|')
local cell_content = inline + link + image + macro +
- token('cell_content', lexer.nonnewline - ' |')
-local header_cell_content = token('header_cell_content',
- lexer.nonnewline - ' |')
+ token('cell_content', lexer.nonnewline - ' |')
+local header_cell_content = token('header_cell_content', lexer.nonnewline -
+ ' |')
local field_sep = ' ' * table_sep^1 * ' '
local table_row_end = P(' ')^0 * table_sep^0
local table_row = lexer.starts_line(P(' ')^0 * table_sep) * cell_content^0 *
- (field_sep * cell_content^0)^0 * table_row_end
+ (field_sep * cell_content^0)^0 * table_row_end
local table_row_header = lexer.starts_line(P(' ')^0 * table_sep * table_sep) *
- header_cell_content^0 *
- (field_sep * header_cell_content^0)^0 * table_row_end
+ header_cell_content^0 * (field_sep * header_cell_content^0)^0 * table_row_end
local table = table_row_header + table_row
lex:add_rule('table', table)
@@ -162,15 +121,15 @@ lex:add_rule('raw_area', raw_area)
lex:add_rule('tagged_area', tagged_area)
local font_size = lexer.property_int['fontsize'] > 0 and
- lexer.property_int['fontsize'] or 10
+ lexer.property_int['fontsize'] or 10
local hstyle = 'fore:$(color.red)'
lex:add_style('line', 'bold')
-lex:add_style('h5', hstyle..',size:'..(font_size + 1))
-lex:add_style('h4', hstyle..',size:'..(font_size + 2))
-lex:add_style('h3', hstyle..',size:'..(font_size + 3))
-lex:add_style('h2', hstyle..',size:'..(font_size + 4))
-lex:add_style('h1', hstyle..',size:'..(font_size + 5))
+lex:add_style('h5', hstyle .. ',size:' .. (font_size + 1))
+lex:add_style('h4', hstyle .. ',size:' .. (font_size + 2))
+lex:add_style('h3', hstyle .. ',size:' .. (font_size + 3))
+lex:add_style('h2', hstyle .. ',size:' .. (font_size + 4))
+lex:add_style('h1', hstyle .. ',size:' .. (font_size + 5))
lex:add_style('header_label', lexer.STYLE_LABEL)
lex:add_style('email', 'underlined')
lex:add_style('host', 'underlined')
diff --git a/lexlua/vala.lua b/lexlua/vala.lua
index 456841187..3a2b16fbf 100644
--- a/lexlua/vala.lua
+++ b/lexlua/vala.lua
@@ -34,20 +34,19 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-local sq_str = lexer.delimited_range("'", true)
-local dq_str = lexer.delimited_range('"', true)
-local tq_str = '"""' * (lexer.any - '"""')^0 * P('"""')^-1
-local ml_str = '@' * lexer.delimited_range('"', false, true)
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+local tq_str = lexer.range('"""')
+local ml_str = '@' * lexer.range('"', false, false)
lex:add_rule('string', token(lexer.STRING, tq_str + sq_str + dq_str + ml_str))
-- Comments.
-local line_comment = '//' * lexer.nonnewline_esc^0
-local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) *
- S('uUlLfFdDmM')^-1))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('uUlLfFdDmM')^-1))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}')))
diff --git a/lexlua/vb.lua b/lexlua/vb.lua
index 128cff63e..f85f8f875 100644
--- a/lexlua/vb.lua
+++ b/lexlua/vb.lua
@@ -33,19 +33,16 @@ lex:add_rule('type', token(lexer.TYPE, word_match([[
-- Comments.
lex:add_rule('comment', token(lexer.COMMENT,
- (P("'") + word_match([[rem]], true)) *
- lexer.nonnewline^0))
+ lexer.to_eol("'" + word_match([[rem]], true))))
-- Identifiers.
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING,
- lexer.delimited_range('"', true, true)))
+lex:add_rule('string', token(lexer.STRING, lexer.range('"', true, false)))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) *
- S('LlUuFf')^-2))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('LlUuFf')^-2))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('=><+-*^&:.,_()')))
diff --git a/lexlua/vbscript.lua b/lexlua/vbscript.lua
index c0d5ba221..0bf2c0e12 100644
--- a/lexlua/vbscript.lua
+++ b/lexlua/vbscript.lua
@@ -33,19 +33,16 @@ lex:add_rule('type', token(lexer.TYPE, word_match([[
-- Comments.
lex:add_rule('comment', token(lexer.COMMENT,
- (P("'") + word_match([[rem]], true)) *
- lexer.nonnewline^0))
+ lexer.to_eol("'" + word_match([[rem]], true))))
-- Identifiers.
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING,
- lexer.delimited_range('"', true, true)))
+lex:add_rule('string', token(lexer.STRING, lexer.range('"', true, false)))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) *
- S('LlUuFf')^-2))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('LlUuFf')^-2))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('=><+-*^&:.,_()')))
diff --git a/lexlua/vcard.lua b/lexlua/vcard.lua
index 52ca4035a..0ea39b284 100644
--- a/lexlua/vcard.lua
+++ b/lexlua/vcard.lua
@@ -44,35 +44,30 @@ local identifier = lexer.alpha^1 * lexer.digit^0 * (P('-') * lexer.alnum^1)^0
-- Extension.
local extension = token(lexer.TYPE, lexer.starts_line(S('xX') * P('-') *
- identifier * #S(':;')))
+ identifier * #S(':;')))
-- Parameter.
local parameter = token(lexer.IDENTIFIER,
- lexer.starts_line(identifier * #S(':='))) +
- token(lexer.STRING, identifier) * #S(':=')
+ lexer.starts_line(identifier * #S(':='))) + token(lexer.STRING, identifier) *
+ #S(':=')
-- Operators.
local operator = token(lexer.OPERATOR, S('.:;='))
-- Group and property.
local group_sequence = token(lexer.CONSTANT, lexer.starts_line(identifier)) *
- token(lexer.OPERATOR, P('.')) *
- (required_property + supported_property +
- lexer.token(lexer.TYPE, S('xX') * P('-') * identifier) *
- #S(':;'))
+ token(lexer.OPERATOR, P('.')) * (required_property + supported_property +
+ lexer.token(lexer.TYPE, S('xX') * P('-') * identifier) * #S(':;'))
-- Begin vCard, end vCard.
local begin_sequence = token(lexer.KEYWORD, P('BEGIN')) *
- token(lexer.OPERATOR, P(':')) *
- token(lexer.COMMENT, P('VCARD'))
+ token(lexer.OPERATOR, P(':')) * token(lexer.COMMENT, P('VCARD'))
local end_sequence = token(lexer.KEYWORD, P('END')) *
- token(lexer.OPERATOR, P(':')) *
- token(lexer.COMMENT, P('VCARD'))
+ token(lexer.OPERATOR, P(':')) * token(lexer.COMMENT, P('VCARD'))
-- vCard version (in v3.0 and v4.0 must appear immediately after BEGIN:VCARD).
local version_sequence = token(lexer.KEYWORD, P('VERSION')) *
- token(lexer.OPERATOR, P(':')) *
- token(lexer.CONSTANT, lexer.digit^1 *
- (P('.') * lexer.digit^1)^-1)
+ token(lexer.OPERATOR, P(':')) *
+ token(lexer.CONSTANT, lexer.digit^1 * (P('.') * lexer.digit^1)^-1)
-- Data.
local data = token(lexer.IDENTIFIER, lexer.any)
diff --git a/lexlua/verilog.lua b/lexlua/verilog.lua
index efae1ebbd..e3b5bf454 100644
--- a/lexlua/verilog.lua
+++ b/lexlua/verilog.lua
@@ -46,11 +46,11 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"')))
+lex:add_rule('string', token(lexer.STRING, lexer.range('"')))
-- Comments.
-local line_comment = '//' * lexer.nonnewline^0
-local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1
+local line_comment = lexer.to_eol('//')
+local block_comment = lexer.range('/*', '*/')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
@@ -58,9 +58,8 @@ local bin_suffix = S('bB') * S('01_xXzZ')^1
local oct_suffix = S('oO') * S('01234567_xXzZ')^1
local dec_suffix = S('dD') * S('0123456789_xXzZ')^1
local hex_suffix = S('hH') * S('0123456789abcdefABCDEF_xXzZ')^1
-lex:add_rule('number', token(lexer.NUMBER, (lexer.digit + '_')^1 +
- "'" * (bin_suffix + oct_suffix +
- dec_suffix + hex_suffix)))
+lex:add_rule('number', token(lexer.NUMBER, (lexer.digit + '_')^1 + "'" *
+ (bin_suffix + oct_suffix + dec_suffix + hex_suffix)))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('=~+-/*<>%&|^~,:;()[]{}')))
diff --git a/lexlua/vhdl.lua b/lexlua/vhdl.lua
index 938f738cb..7570de7f5 100644
--- a/lexlua/vhdl.lua
+++ b/lexlua/vhdl.lua
@@ -50,18 +50,18 @@ lex:add_rule('constant', token(lexer.CONSTANT, word_match[[
-- Identifiers.
lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alpha + "'") *
- (lexer.alnum + S("_'"))^1))
+ (lexer.alnum + S("_'"))^1))
-- Strings.
-local sq_str = lexer.delimited_range("'", true, true)
-local dq_str = lexer.delimited_range('"', true)
+local sq_str = lexer.range("'", true, false)
+local dq_str = lexer.range('"', true)
lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '--' * lexer.nonnewline^0))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('--')))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('=/!:;<>+-/*%&|^~()')))
diff --git a/lexlua/wsf.lua b/lexlua/wsf.lua
index 2d64356cc..dfa14b1eb 100644
--- a/lexlua/wsf.lua
+++ b/lexlua/wsf.lua
@@ -13,12 +13,11 @@ local ws = token(lexer.WHITESPACE, lexer.space^1)
lex:add_rule('whitespace', ws)
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '<!--' * (lexer.any - '-->')^0 *
- P('-->')^-1))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.range('<!--', '-->')))
local alpha = R('az', 'AZ', '\127\255')
-local word_char = lexer.alnum + S('_-:.??')
-local identifier = (alpha + S('_-:.??')) * word_char^0
+local word_char = lexer.alnum + S('_-:.?')
+local identifier = (alpha + S('_-:.?')) * word_char^0
-- Elements.
local element = token('element', '<' * P('/')^-1 * identifier)
@@ -47,14 +46,15 @@ local equals = token(lexer.OPERATOR, '=') * in_tag
lex:add_rule('equals', equals)
-- Strings.
+local sq_str = lexer.range("'", false, false)
+local dq_str = lexer.range('"', false, false)
local string = #S('\'"') * lexer.last_char_includes('=') *
- token(lexer.STRING, lexer.delimited_range("'", false, true) +
- lexer.delimited_range('"', false, true))
+ token(lexer.STRING, sq_str + dq_str)
lex:add_rule('string', string)
-- Numbers.
lex:add_rule('number', #lexer.digit * lexer.last_char_includes('=') *
- token(lexer.NUMBER, lexer.digit^1 * P('%')^-1) * in_tag)
+ token(lexer.NUMBER, lexer.digit^1 * P('%')^-1) * in_tag)
-- Entities.
lex:add_rule('entity', token('entity', '&' * word_match[[
@@ -74,8 +74,7 @@ lex:add_fold_point(lexer.COMMENT, '<!--', '-->')
-- Tags that start embedded languages.
local embed_start_tag = element *
- (ws^1 * attribute * ws^0 * equals * ws^0 * string)^0 *
- ws^0 * tag_close
+ (ws^1 * attribute * ws^0 * equals * ws^0 * string)^0 * ws^0 * tag_close
local embed_end_tag = element * tag_close
-- Embedded JavaScript.
diff --git a/lexlua/xml.lua b/lexlua/xml.lua
index 3d6b59b63..3acee7d22 100644
--- a/lexlua/xml.lua
+++ b/lexlua/xml.lua
@@ -12,31 +12,29 @@ local ws = token(lexer.WHITESPACE, lexer.space^1)
lex:add_rule('whitespace', ws)
-- Comments and CDATA.
-lex:add_rule('comment', token(lexer.COMMENT, '<!--' * (lexer.any - '-->')^0 *
- P('-->')^-1))
-lex:add_rule('cdata', token('cdata', '<![CDATA[' * (lexer.any - ']]>')^0 *
- P(']]>')^-1))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.range('<!--', '-->')))
+lex:add_rule('cdata', token('cdata', lexer.range('<![CDATA[', ']]>')))
lex:add_style('cdata', lexer.STYLE_COMMENT)
local alpha = R('az', 'AZ', '\127\255')
local word_char = lexer.alnum + S('_-:.??')
-local identifier = (alpha + S('_-:.??')) * word_char^0
+local identifier = (alpha + S('_-:.?')) * word_char^0
-- Doctypes and other markup tags.
lex:add_rule('doctype', token('doctype', P('<!DOCTYPE')) * ws *
- token('doctype', identifier) * (ws * identifier)^-1 *
- (1 - P('>'))^0 * token('doctype', '>'))
+ token('doctype', identifier) * (ws * identifier)^-1 * (1 - P('>'))^0 *
+ token('doctype', '>'))
lex:add_style('doctype', lexer.STYLE_COMMENT)
-- Processing instructions.
lex:add_rule('proc_insn', token('proc_insn', P('<?') * (1 - P('?>'))^0 *
- P('?>')^-1))
+ P('?>')^-1))
lex:add_style('proc_insn', lexer.STYLE_COMMENT)
-- Elements.
local namespace = token(lexer.OPERATOR, ':') * token('namespace', identifier)
lex:add_rule('element', token('element', '<' * P('/')^-1 * identifier) *
- namespace^-1)
+ namespace^-1)
lex:add_style('element', lexer.STYLE_KEYWORD)
lex:add_style('namespace', lexer.STYLE_CLASS)
@@ -45,7 +43,7 @@ lex:add_rule('close_tag', token('element', P('/')^-1 * '>'))
-- Attributes.
lex:add_rule('attribute', token('attribute', identifier) * namespace^-1 *
- #(lexer.space^0 * '='))
+ #(lexer.space^0 * '='))
lex:add_style('attribute', lexer.STYLE_TYPE)
-- TODO: performance is terrible on large files.
@@ -61,14 +59,14 @@ end)
--lex:add_rule('equal', token(lexer.OPERATOR, '=')) -- * in_tag
-- Strings.
+local sq_str = lexer.range("'", false, false)
+local dq_str = lexer.range('"', false, false)
lex:add_rule('string', #S('\'"') * lexer.last_char_includes('=') *
- token(lexer.STRING,
- lexer.delimited_range("'", false, true) +
- lexer.delimited_range('"', false, true)))
+ token(lexer.STRING, sq_str + dq_str))
-- Numbers.
lex:add_rule('number', #lexer.digit * lexer.last_char_includes('=') *
- token(lexer.NUMBER, lexer.digit^1 * P('%')^-1))--*in_tag)
+ token(lexer.NUMBER, lexer.digit^1 * P('%')^-1))--*in_tag)
-- Entities.
lex:add_rule('entity', token('entity', '&' * word_match[[
diff --git a/lexlua/xtend.lua b/lexlua/xtend.lua
index d8efbb574..c54bc137d 100644
--- a/lexlua/xtend.lua
+++ b/lexlua/xtend.lua
@@ -13,7 +13,7 @@ lex:add_rule('whitespace', ws)
-- Classes.
lex:add_rule('class', token(lexer.KEYWORD, P('class')) * ws^1 *
- token(lexer.CLASS, lexer.word))
+ token(lexer.CLASS, lexer.word))
-- Keywords.
lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
@@ -41,17 +41,17 @@ lex:add_rule('function', token(lexer.FUNCTION, lexer.word) * #P('('))
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Templates.
-lex:add_rule('template', token('template', "'''" * (lexer.any - P("'''"))^0 *
- P("'''")^-1))
+lex:add_rule('template', token('template', lexer.range("'''")))
lex:add_style('template', lexer.STYLE_EMBEDDED)
-- Strings.
-lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) +
- lexer.delimited_range('"', true)))
+local sq_str = lexer.range("'", true)
+local dq_str = lexer.range('"', true)
+lex:add_rule('string', token(lexer.STRING, sq_str + dq_str))
-- Comments.
-local line_comment = '//' * lexer.nonnewline_esc^0
-local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1
+local line_comment = lexer.to_eol('//', true)
+local block_comment = lexer.range('/*', '*/')
lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment))
-- Numbers.
diff --git a/lexlua/yaml.lua b/lexlua/yaml.lua
index 2cd54d210..fd70182fc 100644
--- a/lexlua/yaml.lua
+++ b/lexlua/yaml.lua
@@ -10,15 +10,16 @@ local M = {_NAME = 'yaml'}
-- Whitespace.
local indent = #lexer.starts_line(S(' \t')) *
- (token(lexer.WHITESPACE, ' ') + token('indent_error', '\t'))^1
+ (token(lexer.WHITESPACE, ' ') + token('indent_error', '\t'))^1
local ws = token(lexer.WHITESPACE, S(' \t')^1 + lexer.newline^1)
-- Comments.
-local comment = token(lexer.COMMENT, '#' * lexer.nonnewline^0)
+local comment = token(lexer.COMMENT, lexer.to_eol('#'))
-- Strings.
-local string = token(lexer.STRING, lexer.delimited_range("'") +
- lexer.delimited_range('"'))
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+local string = token(lexer.STRING, sq_str + dq_str)
-- Numbers.
local integer = lexer.dec_num + lexer.hex_num + '0' * S('oO') * R('07')^1
@@ -26,22 +27,23 @@ local special_num = '.' * word_match({'inf', 'nan'}, nil, true)
local number = token(lexer.NUMBER, special_num + lexer.float + integer)
-- Timestamps.
-local ts = token('timestamp',
- lexer.digit * lexer.digit * lexer.digit * lexer.digit * -- year
- '-' * lexer.digit * lexer.digit^-1 * -- month
- '-' * lexer.digit * lexer.digit^-1 * -- day
- ((S(' \t')^1 + S('tT'))^-1 * -- separator
- lexer.digit * lexer.digit^-1 * -- hour
- ':' * lexer.digit * lexer.digit * -- minute
- ':' * lexer.digit * lexer.digit * -- second
- ('.' * lexer.digit^0)^-1 * -- fraction
- ('Z' + -- timezone
- S(' \t')^0 * S('-+') * lexer.digit * lexer.digit^-1 *
- (':' * lexer.digit * lexer.digit)^-1)^-1)^-1)
+local year = lexer.digit * lexer.digit * lexer.digit * lexer.digit
+local month = lexer.digit * lexer.digit^-1
+local day = lexer.digit * lexer.digit^-1
+local date = year * '-' * month * '-' * day
+local hours = lexer.digit * lexer.digit^-1
+local minutes = lexer.digit * lexer.digit
+local seconds = lexer.digit * lexer.digit
+local fraction = '.' * lexer.digit^0
+local time = hours * ':' * minutes * ':' * seconds * fraction^-1
+local T = S(' \t')^1 + S('tT')
+local zone = 'Z' + S(' \t')^0 * S('-+') * hours * (':' * minutes)^-1
+local ts = token('timestamp', date * (T * time * zone^-1))
-- Constants.
-local constant = token(lexer.CONSTANT,
- word_match({'null', 'true', 'false'}, nil, true))
+local constant = token(lexer.CONSTANT, word_match({
+ 'null', 'true', 'false'
+}, nil, true))
-- Types.
local type = token(lexer.TYPE, '!!' * word_match({
@@ -50,38 +52,36 @@ local type = token(lexer.TYPE, '!!' * word_match({
-- Scalar types.
'binary', 'bool', 'float', 'int', 'merge', 'null', 'str', 'timestamp',
'value', 'yaml'
-}, nil, true) + '!' * lexer.delimited_range('<>'))
+}, nil, true) + '!' * lexer.range('<', '>', true))
-- Document boundaries.
local doc_bounds = token('document', lexer.starts_line(P('---') + '...'))
-- Directives
local directive = token('directive', lexer.starts_line('%') *
- lexer.nonnewline^1)
+ lexer.nonnewline^1)
local word = (lexer.alpha + '-' * -lexer.space) * (lexer.alnum + '-')^0
-- Keys and literals.
local colon = S(' \t')^0 * ':' * (lexer.space + -1)
-local key = token(lexer.KEYWORD,
- #word * (lexer.nonnewline - colon)^1 * #colon *
- P(function(input, index)
- local line = input:sub(1, index - 1):match('[^\r\n]+$')
- return not line:find('[%w-]+:') and index
- end))
+local key = token(lexer.KEYWORD, #word * (lexer.nonnewline - colon)^1 * #colon *
+ P(function(input, index)
+ local line = input:sub(1, index - 1):match('[^\r\n]+$')
+ return not line:find('[%w-]+:') and index
+ end))
local value = #word * (lexer.nonnewline - lexer.space^0 * S(',]}'))^1
local block = S('|>') * S('+-')^-1 * (lexer.newline + -1) *
- function(input, index)
- local rest = input:sub(index)
- local level = #rest:match('^( *)')
- for pos, indent, line in rest:gmatch('() *()([^\r\n]+)') do
- if indent - pos < level and line ~= ' ' or
- level == 0 and pos > 1 then
- return index + pos - 1
- end
- end
- return #input + 1
- end
+ function(input, index)
+ local rest = input:sub(index)
+ local level = #rest:match('^( *)')
+ for pos, indent, line in rest:gmatch('() *()([^\r\n]+)') do
+ if indent - pos < level and line ~= ' ' or level == 0 and pos > 1 then
+ return index + pos - 1
+ end
+ end
+ return #input + 1
+ end
local literal = token('literal', value + block)
-- Indicators.
diff --git a/test/test_lexlua.lua b/test/test_lexlua.lua
index 276f8273c..cd27f4bed 100644
--- a/test/test_lexlua.lua
+++ b/test/test_lexlua.lua
@@ -178,13 +178,50 @@ end
-- Unit tests.
+function test_to_eol()
+ local code = '#foo\\\nbar\\\nbaz'
+ assert(lpeg.match(lexer.to_eol('#'), code) == 6)
+ assert(lpeg.match(lexer.to_eol('#', true), code) == #code + 1)
+end
+
+function test_range()
+ assert(lpeg.match(lexer.range('"'), '"foo\\"bar\n"baz') == 12)
+ assert(lpeg.match(lexer.range('"', true), '"foo\\"bar\n"baz') == 10)
+ assert(lpeg.match(lexer.range('"', false, false), '"foo\\"bar\n"baz') == 7)
+
+ assert(lpeg.match(lexer.range('(', ')'), '(foo\\)bar)baz') == 7)
+
+ assert(lpeg.match(lexer.range('/*', '*/'), '/*/*foo*/bar*/baz') == 10)
+ assert(lpeg.match(lexer.range('/*', '*/', false, false, true),
+ '/*/*foo*/bar*/baz') == 15)
+end
+
+function test_starts_line()
+ assert(lpeg.match(lexer.starts_line('#'), '#foo') == 2)
+ assert(lpeg.match(lexer.starts_line('#'), '\n#foo', 2) == 3)
+ assert(not lpeg.match(lexer.starts_line('#'), ' #foo', 2))
+end
+
+function test_last_char_includes()
+ assert(lpeg.match(lexer.last_char_includes('=,'), '/foo/'))
+ assert(lpeg.match(lexer.last_char_includes('=,'), 'foo=/bar/', 5) == 5)
+ assert(lpeg.match(lexer.last_char_includes('=,'), 'foo, /bar/', 6) == 6)
+ assert(not lpeg.match(lexer.last_char_includes('=,'), 'foo/bar', 4))
+end
+
+function test_word_match()
+ assert(lpeg.match(lexer.word_match[[foo bar baz]], 'foo') == 4)
+ assert(not lpeg.match(lexer.word_match[[foo bar baz]], 'foo_bar'))
+ assert(lpeg.match(lexer.word_match([[foo! bar? baz.]], true), 'FOO!') == 5)
+end
+
-- Tests a basic lexer with a few simple rules and no custom styles.
function test_basics()
local lex = lexer.new('test')
assert_default_styles(lex)
lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[foo bar baz]]))
- lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"')))
+ lex:add_rule('string', token(lexer.STRING, lexer.range('"')))
lex:add_rule('number', token(lexer.NUMBER, lexer.integer))
local code = [[foo bar baz "foo bar baz" 123]]
local tokens = {
@@ -933,6 +970,8 @@ function test_php()
assert_lex(php, code, tokens, initial_style)
initial_style = php._TOKENSTYLES['default'] -- also test non-ws init style
assert_lex(php, code, tokens, initial_style)
+ initial_style = php._TOKENSTYLES['default'] -- also test non-ws init style
+ assert_lex(php, code, tokens, initial_style)
-- Starting in PHP.
code = [[echo "hi";]]
initial_style = php._TOKENSTYLES['php_whitespace']