diff options
Diffstat (limited to 'lexlua')
121 files changed, 1245 insertions, 1338 deletions
diff --git a/lexlua/actionscript.lua b/lexlua/actionscript.lua index 69dcc80fc..401ae77c0 100644 --- a/lexlua/actionscript.lua +++ b/lexlua/actionscript.lua @@ -33,19 +33,18 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -local sq_str = lexer.delimited_range("'", true) -local dq_str = lexer.delimited_range('"', true) -local ml_str = '<![CDATA[' * (lexer.any - ']]>')^0 * ']]>' +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +local ml_str = lexer.range('<![CDATA[', ']]>') lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + ml_str)) -- Comments. -local line_comment = '//' * lexer.nonnewline^0 -local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +local line_comment = lexer.to_eol('//') +local block_comment = lexer.range('/*', '*/') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) * - S('LlUuFf')^-2)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('LlUuFf')^-2)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/*%&|^~.,;?()[]{}'))) diff --git a/lexlua/ada.lua b/lexlua/ada.lua index bdcbfe313..3a4c385f4 100644 --- a/lexlua/ada.lua +++ b/lexlua/ada.lua @@ -37,19 +37,15 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, - lexer.delimited_range('"', true, true))) +lex:add_rule('string', token(lexer.STRING, lexer.range('"', true, false))) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '--' * lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('--'))) -- Numbers. -local hex_num = 'O' * S('xX') * (lexer.xdigit + '_')^1 local integer = lexer.digit^1 * ('_' * lexer.digit^1)^0 local float = integer^1 * ('.' * integer^0)^-1 * S('eE') * S('+-')^-1 * integer -lex:add_rule('number', token(lexer.NUMBER, hex_num + - S('+-')^-1 * (float + integer) * - S('LlUuFf')^-3)) +lex:add_rule('number', token(lexer.NUMBER, S('+-')^-1 * (float + integer))) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S(':;=<>&+-*/.()'))) diff --git a/lexlua/ansi_c.lua b/lexlua/ansi_c.lua index ddb59e3a0..b0c646b5c 100644 --- a/lexlua/ansi_c.lua +++ b/lexlua/ansi_c.lua @@ -8,7 +8,8 @@ local P, R, S = lpeg.P, lpeg.R, lpeg.S local lex = lexer.new('ansi_c') -- Whitespace. -lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) +local ws = token(lexer.WHITESPACE, lexer.space^1) +lex:add_rule('whitespace', ws) -- Keywords. lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ @@ -48,30 +49,26 @@ lex:add_rule('constants', token(lexer.CONSTANT, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -local sq_str = P('L')^-1 * lexer.delimited_range("'", true) -local dq_str = P('L')^-1 * lexer.delimited_range('"', true) +local sq_str = P('L')^-1 * lexer.range("'", true) +local dq_str = P('L')^-1 * lexer.range('"', true) lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Comments. -local line_comment = '//' * lexer.nonnewline_esc^0 -local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 + - '#if' * S(' \t')^0 * '0' * lexer.space * - (lexer.any - '#endif')^0 * P('#endif')^-1 +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/') + + lexer.range('#if' * S(' \t')^0 * '0' * lexer.space, '#endif') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Preprocessor. -local preproc_word = word_match[[ +local include = token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * 'include') * + (ws * token(lexer.STRING, lexer.range('<', '>', true)))^-1 +local preproc = token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * word_match[[ define elif else endif if ifdef ifndef line pragma undef -]] -lex:add_rule('preprocessor', - (token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * preproc_word) + - token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * 'include') * - (token(lexer.WHITESPACE, S('\t ')^1) * - token(lexer.STRING, - lexer.delimited_range('<>', true, true)))^-1)) +]]) +lex:add_rule('preprocessor', include + preproc) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>~!=^&|?~:;,.()[]{}'))) diff --git a/lexlua/antlr.lua b/lexlua/antlr.lua index 184ef10e1..53cd2d57f 100644 --- a/lexlua/antlr.lua +++ b/lexlua/antlr.lua @@ -31,18 +31,17 @@ lex:add_rule('func', token(lexer.FUNCTION, 'assert')) lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Comments. -local line_comment = '//' * lexer.nonnewline^0 -local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +local line_comment = lexer.to_eol('//') +local block_comment = lexer.range('/*', '*/') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Actions. lex:add_rule('action', token(lexer.OPERATOR, P('{')) * - token('action', (1 - P('}'))^0) * - token(lexer.OPERATOR, P('}'))^-1) + token('action', (1 - P('}'))^0) * token(lexer.OPERATOR, P('}'))^-1) lex:add_style('action', lexer.STYLE_NOTHING) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true))) +lex:add_rule('string', token(lexer.STRING, lexer.range("'", true))) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('$@:;|.=+*?~!^>-()[]{}'))) diff --git a/lexlua/apdl.lua b/lexlua/apdl.lua index 22dc64b83..be70f2a5d 100644 --- a/lexlua/apdl.lua +++ b/lexlua/apdl.lua @@ -46,21 +46,19 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, - lexer.delimited_range("'", true, true))) +lex:add_rule('string', token(lexer.STRING, lexer.range("'", true, false))) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Functions. -lex:add_rule('function', token(lexer.FUNCTION, - lexer.delimited_range('%', true, true))) +lex:add_rule('function', token(lexer.FUNCTION, lexer.range('%', true, false))) -- Labels. lex:add_rule('label', token(lexer.LABEL, lexer.starts_line(':') * lexer.word)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '!' * lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('!'))) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('+-*/$=,;()'))) diff --git a/lexlua/apl.lua b/lexlua/apl.lua index 285ae0737..ad0ec2d99 100644 --- a/lexlua/apl.lua +++ b/lexlua/apl.lua @@ -12,11 +12,11 @@ lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Comments. lex:add_rule('comment', token(lexer.COMMENT, (P('⍝') + '#') * - lexer.nonnewline^0)) + lexer.nonnewline^0)) -- Strings. -local sq_str = lexer.delimited_range("'", false, true) -local dq_str = lexer.delimited_range('"') +local sq_str = lexer.range("'", false, false) +local dq_str = lexer.range('"') lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Numbers. @@ -26,12 +26,12 @@ local exp = S('eE') local img = S('jJ') local sgn = P('¯')^-1 local float = sgn * (dig^0 * rad * dig^1 + dig^1 * rad * dig^0 + dig^1) * - (exp * sgn *dig^1)^-1 + (exp * sgn *dig^1)^-1 lex:add_rule('number', token(lexer.NUMBER, float * img * float + float)) -- Keywords. lex:add_rule('keyword', token(lexer.KEYWORD, P('⍞') + 'χ' + '⍺' + '⍶' + '⍵' + - '⍹' + '⎕' * R('AZ', 'az')^0)) + '⍹' + '⎕' * R('AZ', 'az')^0)) -- Names. local n1l = R('AZ', 'az') diff --git a/lexlua/applescript.lua b/lexlua/applescript.lua index 3f21f1512..cbdf95072 100644 --- a/lexlua/applescript.lua +++ b/lexlua/applescript.lua @@ -46,19 +46,19 @@ lex:add_rule('constant', token(lexer.CONSTANT, word_match[[ ]], true)) -- Identifiers. -lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alpha + '_') * - lexer.alnum^0)) +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.alpha * + (lexer.alnum + '_')^0)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"', true))) +lex:add_rule('string', token(lexer.STRING, lexer.range('"', true))) -- Comments. -local line_comment = '--' * lexer.nonnewline^0 -local block_comment = '(*' * (lexer.any - '*)')^0 * P('*)')^-1 +local line_comment = lexer.to_eol('--') +local block_comment = lexer.range('(*', '*)') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('+-^*/&<>=:,(){}'))) diff --git a/lexlua/asm.lua b/lexlua/asm.lua index 42558fa05..12ebdf3cf 100644 --- a/lexlua/asm.lua +++ b/lexlua/asm.lua @@ -314,7 +314,7 @@ local constants = word_match[[ __float80e__ __float80m__ __Infinity__ __NaN__ __QNaN__ __SNaN__ ]] lex:add_rule('constant', token(lexer.CONSTANT, constants + - '$' * P('$')^-1 * -word)) + '$' * P('$')^-1 * -word)) -- Labels. lex:add_rule('label', token(lexer.LABEL, word * ':')) @@ -323,18 +323,18 @@ lex:add_rule('label', token(lexer.LABEL, word * ':')) lex:add_rule('identifier', token(lexer.IDENTIFIER, word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + - lexer.delimited_range('"', true))) +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, ';' * lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol(';'))) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + - lexer.integer * S('hqb')^-1)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('hqb')^-1)) -- Preprocessor. -local preproc_word = word_match[[ +local pp_word = word_match[[ arg assign clear define defstr deftok depend elif elifctx elifdef elifempty elifenv elifid elifidn elifidni elifmacro elifn elifnctx elifndef elifnempty elifnenv elifnid elifnidn elifnidni elifnmacro elifnnum elifnstr elifntoken @@ -345,9 +345,8 @@ local preproc_word = word_match[[ ixdefine line local macro pathsearch pop push rep repl rmacro rotate stacksize strcat strlen substr undef unmacro use warning while xdefine ]] -local preproc_symbol = '??' + S('!$+?') + '%' * -lexer.space + R('09')^1 -lex:add_rule('preproc', token(lexer.PREPROCESSOR, '%' * (preproc_word + - preproc_symbol))) +local pp_symbol = '??' + S('!$+?') + '%' * -lexer.space + R('09')^1 +lex:add_rule('preproc', token(lexer.PREPROCESSOR, '%' * (pp_word + pp_symbol))) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|~:,()[]'))) diff --git a/lexlua/asp.lua b/lexlua/asp.lua index 05ba0a9b1..0f5fb21ea 100644 --- a/lexlua/asp.lua +++ b/lexlua/asp.lua @@ -25,7 +25,7 @@ local vbs_start_rule = #(P('<') * script_element * (P(function(input, index) end end) + '>')) * html.embed_start_tag -- <script language="vbscript"> local vbs_end_rule = #('</' * script_element * lexer.space^0 * '>') * - html.embed_end_tag -- </script> + html.embed_end_tag -- </script> lex:embed(vbs, vbs_start_rule, vbs_end_rule) -- Fold points. diff --git a/lexlua/autoit.lua b/lexlua/autoit.lua index 5bb5752f9..3b6d3ae6a 100644 --- a/lexlua/autoit.lua +++ b/lexlua/autoit.lua @@ -97,12 +97,10 @@ lex:add_rule('function', token(lexer.FUNCTION, word_match([[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Comments. -local line_comment = ';' * lexer.nonnewline_esc^0 -local block_comment1 = '#comments-start' * (lexer.any - '#comments-end')^0 * - P('#comments-end')^-1 -local block_comment2 = '#cs' * (lexer.any - '#ce')^0 * P('#ce')^-1 -lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment1 + - block_comment2)) +local line_comment = lexer.to_eol(';') +local block_comment = lexer.range('#comments-start', '#comments-end') + + lexer.range('#cs', '#ce') +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Preprocessor. lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, '#' * word_match([[ @@ -111,9 +109,9 @@ lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, '#' * word_match([[ ]], true))) -- Strings. -local dq_str = lexer.delimited_range('"', true, true) -local sq_str = lexer.delimited_range("'", true, true) -local inc = lexer.delimited_range('<>', true, true, true) +local dq_str = lexer.range('"', true, false) +local sq_str = lexer.range("'", true, false) +local inc = lexer.range('<', '>', true, false, true) lex:add_rule('string', token(lexer.STRING, dq_str + sq_str + inc)) -- Macros. @@ -124,7 +122,7 @@ lex:add_style('macro', lexer.STYLE_PREPROCESSOR) lex:add_rule('variable', token(lexer.VARIABLE, '$' * (lexer.alnum + '_')^1)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('+-^*/&<>=?:()[]'))) diff --git a/lexlua/awk.lua b/lexlua/awk.lua index 36329acc3..40ff501e7 100644 --- a/lexlua/awk.lua +++ b/lexlua/awk.lua @@ -160,7 +160,7 @@ local function scanString(input, index) return i + 1 elseif input:sub(i, i) == BACKSLASH then i = i + 1 - -- lexer.delimited_range() doesn't handle CRLF. + -- lexer.range() doesn't handle CRLF. if input:sub(i, i + 1) == CRLF then i = i + 1 end end i = i + 1 @@ -229,18 +229,17 @@ lex:add_rule('comment', token(lexer.COMMENT, '#' * P(scanComment))) lex:add_rule('string', token(lexer.STRING, DQUOTE * P(scanString))) -- No leading sign because it might be binary. -local float = ((lexer.digit^1 * ('.' * lexer.digit^0)^-1) + - ('.' * lexer.digit^1)) * - (S('eE') * S('+-')^-1 * lexer.digit^1)^-1 +local float = + ((lexer.digit^1 * ('.' * lexer.digit^0)^-1) + ('.' * lexer.digit^1)) * + (S('eE') * S('+-')^-1 * lexer.digit^1)^-1 -- Fields. E.g. $1, $a, $(x), $a(x), $a[x], $"1", $$a, etc. -lex:add_rule('field', - token('field', P('$') * S('$+-')^0 * - (float + - lexer.word^0 * '(' * P(scanFieldDelimiters) + - lexer.word^1 * ('[' * P(scanFieldDelimiters))^-1 + - '"' * P(scanString) + - '/' * P(eatRegex) * '/'))) +lex:add_rule('field', token('field', P('$') * S('$+-')^0 * ( + float + + lexer.word^0 * '(' * P(scanFieldDelimiters) + + lexer.word^1 * ('[' * P(scanFieldDelimiters))^-1 + + '"' * P(scanString) + + '/' * P(eatRegex) * '/'))) lex:add_style('field', lexer.STYLE_LABEL) -- Regular expressions. @@ -250,18 +249,18 @@ lex:add_style('field', lexer.STYLE_LABEL) -- sequences like '\S', '\s' have special meanings with Gawk. Tokens that -- contain them are displayed differently. lex:add_rule('gawkRegex', token('gawkRegex', SLASH * P(scanGawkRegex))) -lex:add_style('gawkRegex', lexer.STYLE_PREPROCESSOR..',underlined') +lex:add_style('gawkRegex', lexer.STYLE_PREPROCESSOR .. ',underlined') lex:add_rule('regex', token(lexer.REGEX, SLASH * P(scanRegex))) -- Operators. lex:add_rule('gawkOperator', token('gawkOperator', P("|&") + "@" + "**=" + - "**")) -lex:add_style('gawkOperator', lexer.STYLE_OPERATOR..',underlined') + "**")) +lex:add_style('gawkOperator', lexer.STYLE_OPERATOR .. ',underlined') lex:add_rule('operator', token(lexer.OPERATOR, S('!%&()*+,-/:;<=>?[\\]^{|}~'))) -- Numbers. lex:add_rule('gawkNumber', token('gawkNumber', lexer.hex_num + lexer.oct_num)) -lex:add_style('gawkNumber', lexer.STYLE_NUMBER..',underlined') +lex:add_style('gawkNumber', lexer.STYLE_NUMBER .. ',underlined') lex:add_rule('number', token(lexer.NUMBER, float)) -- Keywords. @@ -282,7 +281,7 @@ lex:add_rule('gawkBuiltInVariable', token('gawkBuiltInVariable', word_match[[ ARGIND BINMODE ERRNO FIELDWIDTHS FPAT FUNCTAB IGNORECASE LINT PREC PROCINFO ROUNDMODE RT SYMTAB TEXTDOMAIN ]])) -lex:add_style('gawkBuiltInVariable', lexer.STYLE_CONSTANT..',underlined') +lex:add_style('gawkBuiltInVariable', lexer.STYLE_CONSTANT .. ',underlined') -- Functions. lex:add_rule('function', token(lexer.FUNCTION, lexer.word * #P('('))) diff --git a/lexlua/bash.lua b/lexlua/bash.lua index d3a3953e8..fb214bca8 100644 --- a/lexlua/bash.lua +++ b/lexlua/bash.lua @@ -23,29 +23,28 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -local sq_str = lexer.delimited_range("'", false, true) -local dq_str = lexer.delimited_range('"') -local ex_str = lexer.delimited_range('`') +local sq_str = lexer.range("'", false, false) +local dq_str = lexer.range('"') +local ex_str = lexer.range('`') local heredoc = '<<' * P(function(input, index) - local s, e, _, delimiter = - input:find('%-?(["\']?)([%a_][%w_]*)%1[\n\r\f;]+', index) - if s == index and delimiter then - local _, e = input:find('[\n\r\f]+'..delimiter, e) - return e and e + 1 or #input + 1 - end + local _, e, _, delimiter = input:find( + '^%-?(["\']?)([%a_][%w_]*)%1[\n\r\f;]+', index) + if not delimiter then return end + _, e = input:find('[\n\r\f]+' .. delimiter, e) + return e and e + 1 or #input + 1 end) lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + ex_str + heredoc)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#'))) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Variables. -lex:add_rule('variable', token(lexer.VARIABLE, - '$' * (S('!#?*@$') + lexer.digit^1 + lexer.word + - lexer.delimited_range('{}', true, true)))) +lex:add_rule('variable', token(lexer.VARIABLE, '$' * ( + S('!#?*@$') + lexer.digit^1 + lexer.word + lexer.range('{', '}', true) +))) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/*^&|~.,:;?()[]{}'))) diff --git a/lexlua/batch.lua b/lexlua/batch.lua index a8dcadda9..314f4cd3a 100644 --- a/lexlua/batch.lua +++ b/lexlua/batch.lua @@ -26,19 +26,19 @@ lex:add_rule('function', token(lexer.FUNCTION, word_match([[ ]], true))) -- Comments. -local rem = (P('REM') + 'rem') * lexer.space -lex:add_rule('comment', token(lexer.COMMENT, (rem + '::') * lexer.nonnewline^0)) +local rem = (P('REM') + 'rem') * #lexer.space +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol(rem + '::'))) -- Identifiers. lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"', true))) +lex:add_rule('string', token(lexer.STRING, lexer.range('"', true))) -- Variables. -lex:add_rule('variable', token(lexer.VARIABLE, - '%' * (lexer.digit + '%' * lexer.alpha) + - lexer.delimited_range('%', true, true))) +local arg = '%' * lexer.digit + '%~' * lexer.alnum^1 +local variable = lexer.range('%', true, false) +lex:add_rule('variable', token(lexer.VARIABLE, arg + variable)) -- Labels. lex:add_rule('label', token(lexer.LABEL, ':' * lexer.word)) diff --git a/lexlua/bibtex.lua b/lexlua/bibtex.lua index 162156103..0eee5801d 100644 --- a/lexlua/bibtex.lua +++ b/lexlua/bibtex.lua @@ -22,9 +22,9 @@ lex:add_style('field', lexer.STYLE_CONSTANT) lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, - lexer.delimited_range('"') + - lexer.delimited_range('{}', false, true, true))) +local dq_str = lexer.range('"') +local br_str = lexer.range('{', '}', false, false, true) +lex:add_rule('string', token(lexer.STRING, dq_str + br_str)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S(',='))) diff --git a/lexlua/boo.lua b/lexlua/boo.lua index 907f72c19..926351f04 100644 --- a/lexlua/boo.lua +++ b/lexlua/boo.lua @@ -41,22 +41,23 @@ lex:add_rule('function', token(lexer.FUNCTION, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -local sq_str = lexer.delimited_range("'", true) -local dq_str = lexer.delimited_range('"', true) -local triple_dq_str = '"""' * (lexer.any - '"""')^0 * P('"""')^-1 +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +local tq_str = lexer.range('"""') +local string = token(lexer.STRING, tq_str + sq_str + dq_str) local regex_str = #P('/') * lexer.last_char_includes('!%^&*([{-=+|:;,?<>~') * - lexer.delimited_range('/', true) -lex:add_rule('string', token(lexer.STRING, triple_dq_str + sq_str + dq_str) + - token(lexer.REGEX, regex_str)) + lexer.range('/', true) +local regex = token(lexer.REGEX, regex_str) +lex:add_rule('string', string + regex) -- Comments. -local line_comment = '#' * lexer.nonnewline_esc^0 -local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +local line_comment = lexer.to_eol('#', true) +local block_comment = lexer.range('/*', '*/') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) * - (S('msdhsfFlL') + 'ms')^-1)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number * + (S('msdhsfFlL') + 'ms')^-1)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~`'))) diff --git a/lexlua/caml.lua b/lexlua/caml.lua index 5d668d133..a65d5552a 100644 --- a/lexlua/caml.lua +++ b/lexlua/caml.lua @@ -47,14 +47,16 @@ lex:add_rule('function', token(lexer.FUNCTION, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + - lexer.delimited_range('"', true))) +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, lexer.nested_pair('(*', '*)'))) +lex:add_rule('comment', token(lexer.COMMENT, + lexer.range('(*', '*)', false, false, true))) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('=<>+-*/.,:;~!#%^&|?[](){}'))) diff --git a/lexlua/chuck.lua b/lexlua/chuck.lua index 1db6913d9..2f918ed73 100644 --- a/lexlua/chuck.lua +++ b/lexlua/chuck.lua @@ -48,23 +48,23 @@ lex:add_style('time', lexer.STYLE_NUMBER) -- Special special value. lex:add_rule('now', token('now', P('now'))) -lex:add_style('now', lexer.STYLE_CONSTANT..',bold') +lex:add_style('now', lexer.STYLE_CONSTANT .. ',bold') -- Strings. -local sq_str = P('L')^-1 * lexer.delimited_range("'", true) -local dq_str = P('L')^-1 * lexer.delimited_range('"', true) +local sq_str = P('L')^-1 * lexer.range("'", true) +local dq_str = P('L')^-1 * lexer.range('"', true) lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Identifiers. lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Comments. -local line_comment = '//' * lexer.nonnewline_esc^0 -local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}@'))) diff --git a/lexlua/cmake.lua b/lexlua/cmake.lua index ef3b598b0..2c78d5d25 100644 --- a/lexlua/cmake.lua +++ b/lexlua/cmake.lua @@ -111,7 +111,7 @@ lex:add_rule('variable', token(lexer.VARIABLE, word_match[[ MINGW MSVC MSVC60 MSVC70 MSVC71 MSVC80 MSVC_IDE POST_BUILD PRE_BUILD PROJECT_BINARY_DIR PROJECT_NAME PROJECT_SOURCE_DIR RUN_CONFIGURE TARGET UNIX WIN32 -]] + P('$') * lexer.delimited_range('{}', false, true))) +]] + P('$') * lexer.range('{', '}'))) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, word_match[[ @@ -123,10 +123,10 @@ lex:add_rule('operator', token(lexer.OPERATOR, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"'))) +lex:add_rule('string', token(lexer.STRING, lexer.range('"'))) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#'))) -- Fold points. lex:add_fold_point(lexer.KEYWORD, 'IF', 'ENDIF') diff --git a/lexlua/coffeescript.lua b/lexlua/coffeescript.lua index 62023be60..b5edd1120 100644 --- a/lexlua/coffeescript.lua +++ b/lexlua/coffeescript.lua @@ -20,25 +20,27 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ -- Fields: object properties and methods. lex:add_rule('field', token(lexer.FUNCTION, '.' * (S('_$') + lexer.alpha) * - (S('_$') + lexer.alnum)^0)) + (S('_$') + lexer.alnum)^0)) -- Identifiers. lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') +local string = token(lexer.STRING, sq_str + dq_str) local regex_str = #P('/') * lexer.last_char_includes('+-*%<>!=^&|?~:;,([{') * - lexer.delimited_range('/', true) * S('igm')^0 -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") + - lexer.delimited_range('"')) + - token(lexer.REGEX, regex_str)) + lexer.range('/', true) * S('igm')^0 +local regex = token(lexer.REGEX, regex_str) +lex:add_rule('string', string + regex) -- Comments. -local block_comment = '###' * (lexer.any - '###')^0 * P('###')^-1 -local line_comment = '#' * lexer.nonnewline_esc^0 +local block_comment = lexer.range('###') +local line_comment = lexer.to_eol('#', true) lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;,.()[]{}'))) diff --git a/lexlua/context.lua b/lexlua/context.lua index 87811164b..904354c55 100644 --- a/lexlua/context.lua +++ b/lexlua/context.lua @@ -12,32 +12,31 @@ local beginend = (P('begin') + 'end') local startstop = (P('start') + 'stop') -- Whitespace. -local ws = token(lexer.WHITESPACE, lexer.space^1) -lex:add_rule('whitespace', ws) +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Comments. -local comment = token(lexer.COMMENT, '%' * lexer.nonnewline^0) -lex:add_rule('comment', comment) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('%'))) -- Sections. local wm_section = word_match[[ chapter part section subject subsection subsubject subsubsection subsubsubject subsubsubsection subsubsubsubject title ]] -local section = token(lexer.CLASS, - '\\' * (wm_section + (startstop * wm_section))) +local section = token(lexer.CLASS, '\\' * + (wm_section + (startstop * wm_section))) lex:add_rule('section', section) -- TeX and ConTeXt mkiv environments. -local environment = token(lexer.STRING, - '\\' * (beginend + startstop) * lexer.alpha^1) +local environment = token(lexer.STRING, '\\' * (beginend + startstop) * + lexer.alpha^1) lex:add_rule('environment', environment) -- Commands. -local command = token(lexer.KEYWORD, - '\\' * (lexer.alpha^1 * P('\\') * lexer.space^1 + - lexer.alpha^1 + - S('!"#$%&\',./;=[\\]_{|}~`^-'))) +local command = token(lexer.KEYWORD, '\\' * ( + lexer.alpha^1 * P('\\') * lexer.space^1 + + lexer.alpha^1 + + S('!"#$%&\',./;=[\\]_{|}~`^-') +)) lex:add_rule('command', command) -- Operators. diff --git a/lexlua/cpp.lua b/lexlua/cpp.lua index e501ce99c..61d18454f 100644 --- a/lexlua/cpp.lua +++ b/lexlua/cpp.lua @@ -8,7 +8,8 @@ local P, R, S = lpeg.P, lpeg.R, lpeg.S local lex = lexer.new('cpp') -- Whitespace. -lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) +local ws = token(lexer.WHITESPACE, lexer.space^1) +lex:add_rule('whitespace', ws) -- Keywords. lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ @@ -33,16 +34,16 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[ ]])) -- Strings. -local sq_str = P('L')^-1 * lexer.delimited_range("'", true) -local dq_str = P('L')^-1 * lexer.delimited_range('"', true) +local sq_str = P('L')^-1 * lexer.range("'", true) +local dq_str = P('L')^-1 * lexer.range('"', true) lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Identifiers. lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Comments. -local line_comment = '//' * lexer.nonnewline_esc^0 -local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Numbers. @@ -53,17 +54,13 @@ local integer = S('+-')^-1 * (hex + bin + dec) lex:add_rule('number', token(lexer.NUMBER, lexer.float + integer)) -- Preprocessor. -local preproc_word = word_match[[ +local include = token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * 'include') * + (ws * token(lexer.STRING, lexer.range('<', '>', true)))^-1 +local preproc = token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * word_match[[ define elif else endif error if ifdef ifndef import line pragma undef using warning -]] -lex:add_rule('preprocessor', - #lexer.starts_line('#') * - (token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * preproc_word) + - token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * 'include') * - (token(lexer.WHITESPACE, S('\t ')^1) * - token(lexer.STRING, - lexer.delimited_range('<>', true, true)))^-1)) +]]) +lex:add_rule('preprocessor', include + preproc) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;,.()[]{}'))) diff --git a/lexlua/crystal.lua b/lexlua/crystal.lua index 6f81dcc7e..85e111e90 100644 --- a/lexlua/crystal.lua +++ b/lexlua/crystal.lua @@ -40,9 +40,9 @@ local literal_delimitted = P(function(input, index) if delimiter_matches[delimiter] then -- Handle nested delimiter/matches in strings. local s, e = delimiter, delimiter_matches[delimiter] - patt = lexer.delimited_range(s..e, false, false, true) + patt = lexer.range(s, e, false, true, true) else - patt = lexer.delimited_range(delimiter) + patt = lexer.range(delimiter) end match_pos = lpeg.match(patt, input, index) return match_pos or #input + 1 @@ -50,27 +50,27 @@ local literal_delimitted = P(function(input, index) end) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline_esc^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#', true))) -- Strings. -local cmd_str = lexer.delimited_range('`') -local sq_str = lexer.delimited_range("'") -local dq_str = lexer.delimited_range('"') +local cmd_str = lexer.range('`') +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') local heredoc = '<<' * P(function(input, index) - local s, e, indented, _, delimiter = - input:find('(%-?)(["`]?)([%a_][%w_]*)%2[\n\r\f;]+', index) - if s == index and delimiter then - local end_heredoc = (#indented > 0 and '[\n\r\f]+ *' or '[\n\r\f]+') - local _, e = input:find(end_heredoc..delimiter, e) - return e and e + 1 or #input + 1 - end + local _, e, indented, _, delimiter = input:find( + '^(%-?)(["`]?)([%a_][%w_]*)%2[\n\r\f;]+', index) + if not delimiter then return end + local end_heredoc = (#indented > 0 and '[\n\r\f]+ *' or '[\n\r\f]+') + _, e = input:find(end_heredoc .. delimiter, e) + return e and e + 1 or #input + 1 end) +local string = token(lexer.STRING, (sq_str + dq_str + heredoc + cmd_str) * + S('f')^-1) -- TODO: regex_str fails with `obj.method /patt/` syntax. local regex_str = #P('/') * lexer.last_char_includes('!%^&*([{-=+|:;,?<>~') * - lexer.delimited_range('/', true, false) * S('iomx')^0 -lex:add_rule('string', token(lexer.STRING, (sq_str + dq_str + heredoc + - cmd_str) * S('f')^-1) + - token(lexer.REGEX, regex_str)) + lexer.range('/', true) * S('iomx')^0 +local regex = token(lexer.REGEX, regex_str) +lex:add_rule('string', string + regex) -- Numbers. local dec = lexer.digit^1 * ('_' * lexer.digit^1)^0 * S('ri')^-1 @@ -79,15 +79,18 @@ local integer = S('+-')^-1 * (bin + lexer.hex_num + lexer.oct_num + dec) -- TODO: meta, control, etc. for numeric_literal. local numeric_literal = '?' * (lexer.any - lexer.space) * -word_char lex:add_rule('number', token(lexer.NUMBER, lexer.float * S('ri')^-1 + integer + - numeric_literal)) + numeric_literal)) -- Variables. -local global_var = '$' * (word + S('!@L+`\'=~/\\,.;<>_*"$?:') + lexer.digit + - '-' * S('0FadiIKlpvw')) +local global_var = '$' * ( + word + S('!@L+`\'=~/\\,.;<>_*"$?:') + + lexer.digit + + '-' * S('0FadiIKlpvw') +) local class_var = '@@' * word local inst_var = '@' * word lex:add_rule('variable', token(lexer.VARIABLE, global_var + class_var + - inst_var)) + inst_var)) -- Symbols. lex:add_rule('symbol', token('symbol', ':' * P(function(input, index) @@ -101,7 +104,7 @@ lex:add_rule('operator', token(lexer.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~'))) -- Fold points. local function disambiguate(text, pos, line, s) return line:sub(1, s - 1):match('^%s*$') and - not text:sub(1, pos - 1):match('\\[ \t]*\r?\n$') and 1 or 0 + not text:sub(1, pos - 1):match('\\[ \t]*\r?\n$') and 1 or 0 end lex:add_fold_point(lexer.KEYWORD, 'begin', 'end') lex:add_fold_point(lexer.KEYWORD, 'case', 'end') diff --git a/lexlua/csharp.lua b/lexlua/csharp.lua index 83f17e411..3a63a20cf 100644 --- a/lexlua/csharp.lua +++ b/lexlua/csharp.lua @@ -31,27 +31,24 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Comments. -local line_comment = '//' * lexer.nonnewline_esc^0 -local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Strings. -local sq_str = lexer.delimited_range("'", true) -local dq_str = lexer.delimited_range('"', true) -local ml_str = P('@')^-1 * lexer.delimited_range('"', false, true) +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +local ml_str = P('@')^-1 * lexer.range('"', false, false) lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + ml_str)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) * - S('lLdDfFMm')^-1)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('lLdDfFmM')^-1)) -- Preprocessor. -local preproc_word = word_match[[ - define elif else endif error if line undef warning region endregion -]] -lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, lexer.starts_line('#') * - S('\t ')^0 * - preproc_word)) +lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * + word_match[[ + define elif else endif error if line undef warning region endregion + ]])) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('~!.,:;+-*/<>=\\^|&%?()[]{}'))) diff --git a/lexlua/css.lua b/lexlua/css.lua index 521540d2c..c11833750 100644 --- a/lexlua/css.lua +++ b/lexlua/css.lua @@ -124,8 +124,8 @@ lex:add_rule('color', token('color', word_match[[ lex:add_style('color', lexer.STYLE_NUMBER) -- Identifiers. -lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.alpha * - (lexer.alnum + S('_-'))^0)) +local word = lexer.alpha * (lexer.alnum + S('_-'))^0 +lex:add_rule('identifier', token(lexer.IDENTIFIER, word)) -- Pseudo classes and pseudo elements. lex:add_rule('pseudoclass', ':' * token('pseudoclass', word_match[[ @@ -141,12 +141,12 @@ lex:add_rule('pseudoelement', '::' * token('pseudoelement', word_match[[ lex:add_style('pseudoelement', lexer.STYLE_CONSTANT) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") + - lexer.delimited_range('"'))) +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '/*' * (lexer.any - '*/')^0 * - P('*/')^-1)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.range('/*', '*/'))) -- Numbers. local unit = token('unit', word_match[[ diff --git a/lexlua/dart.lua b/lexlua/dart.lua index c9aa80200..fca648485 100644 --- a/lexlua/dart.lua +++ b/lexlua/dart.lua @@ -25,22 +25,21 @@ lex:add_rule('builtin', token(lexer.CONSTANT, word_match[[ ]])) -- Strings. -local sq_str = S('r')^-1 * lexer.delimited_range("'", true) -local dq_str = S('r')^-1 * lexer.delimited_range('"', true) -local sq_str_multiline = S('r')^-1 * "'''" * (lexer.any - "'''")^0 * P("'''")^-1 -local dq_str_multiline = S('r')^-1 * '"""' * (lexer.any - '"""')^0 * P('"""')^-1 -lex:add_rule('string', token(lexer.STRING, sq_str_multiline + dq_str_multiline + - sq_str + dq_str)) +local sq_str = S('r')^-1 * lexer.range("'", true) +local dq_str = S('r')^-1 * lexer.range('"', true) +local tq_str = S('r')^-1 * (lexer.range("'''") + lexer.range('"""')) +lex:add_rule('string', token(lexer.STRING, tq_str + sq_str + dq_str)) -- Identifiers. lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '//' * lexer.nonnewline_esc^0 + - lexer.nested_pair('/*', '*/'))) +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/', false, false, true) +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.hex_num)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('#?=!<>+-*$/%&|^~.,;()[]{}'))) diff --git a/lexlua/desktop.lua b/lexlua/desktop.lua index f8dca5c00..8229d2c75 100644 --- a/lexlua/desktop.lua +++ b/lexlua/desktop.lua @@ -23,28 +23,27 @@ lex:add_rule('value', token('value', word_match[[true false]])) lex:add_style('value', lexer.STYLE_CONSTANT) -- Identifiers. -lex:add_rule('identifier', lexer.token(lexer.IDENTIFIER, - lexer.alpha * (lexer.alnum + S('_-'))^0)) +local word = lexer.alpha * (lexer.alnum + S('_-'))^0 +lex:add_rule('identifier', lexer.token(lexer.IDENTIFIER, word)) + +local bracketed = lexer.range('[', ']') -- Group headers. -lex:add_rule('header', - lexer.starts_line(token('header', - lexer.delimited_range('[]', false, true)))) +lex:add_rule('header', lexer.starts_line(token('header', bracketed))) lex:add_style('header', lexer.STYLE_LABEL) -- Locales. -lex:add_rule('locale', token('locale', - lexer.delimited_range('[]', false, true))) +lex:add_rule('locale', token('locale', bracketed)) lex:add_style('locale', lexer.STYLE_CLASS) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"'))) +lex:add_rule('string', token(lexer.STRING, lexer.range('"'))) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#'))) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer))) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Field codes. lex:add_rule('code', lexer.token('code', P('%') * S('fFuUdDnNickvm'))) diff --git a/lexlua/diff.lua b/lexlua/diff.lua index 19c110938..2b3b171f1 100644 --- a/lexlua/diff.lua +++ b/lexlua/diff.lua @@ -10,14 +10,14 @@ local lex = lexer.new('diff', {lex_by_line = true}) -- Text, separators, and file headers. lex:add_rule('index', token(lexer.COMMENT, 'Index: ' * lexer.any^0 * -1)) lex:add_rule('separator', token(lexer.COMMENT, ('---' + P('*')^4 + P('=')^1) * - lexer.space^0 * -1)) + lexer.space^0 * -1)) lex:add_rule('header', token('header', (P('*** ') + '--- ' + '+++ ') * - lexer.any^1)) + lexer.any^1)) lex:add_style('header', lexer.STYLE_COMMENT) -- Location. lex:add_rule('location', token(lexer.NUMBER, ('@@' + lexer.digit^1 + '****') * - lexer.any^1)) + lexer.any^1)) -- Additions, deletions, and changes. lex:add_rule('addition', token('addition', S('>+') * lexer.any^0)) diff --git a/lexlua/django.lua b/lexlua/django.lua index 74a6a7da2..96a9b454b 100644 --- a/lexlua/django.lua +++ b/lexlua/django.lua @@ -32,16 +32,15 @@ lex:add_rule('function', token(lexer.FUNCTION, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, - lexer.delimited_range('"', false, true))) +lex:add_rule('string', token(lexer.STRING, lexer.range('"', false, false))) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S(':,.|'))) -- Embed Django in HTML. local html = lexer.load('html') -local html_comment = '<!--' * (lexer.any - '-->')^0 * P('-->')^-1 -local django_comment = '{#' * (lexer.any - lexer.newline - '#}')^0 * P('#}')^-1 +local html_comment = lexer.range('<!--', '-->') +local django_comment = lexer.range('{#', '#}', true) html:modify_rule('comment', token(lexer.COMMENT, html_comment + django_comment)) local django_start_rule = token('django_tag', '{' * S('{%')) local django_end_rule = token('django_tag', S('%}') * '}') diff --git a/lexlua/dmd.lua b/lexlua/dmd.lua index 08757a594..968b01e6c 100644 --- a/lexlua/dmd.lua +++ b/lexlua/dmd.lua @@ -12,25 +12,25 @@ local M = {_NAME = 'dmd'} local ws = token(lexer.WHITESPACE, lexer.space^1) -- Comments. -local line_comment = '//' * lexer.nonnewline_esc^0 -local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 -local nested_comment = lexer.nested_pair('/+', '+/') +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/') +local nested_comment = lexer.range('/+', '+/', false, false, true) local comment = token(lexer.COMMENT, line_comment + block_comment + - nested_comment) + nested_comment) -- Strings. -local sq_str = lexer.delimited_range("'", true) * S('cwd')^-1 -local dq_str = lexer.delimited_range('"') * S('cwd')^-1 -local lit_str = 'r' * lexer.delimited_range('"', false, true) * S('cwd')^-1 -local bt_str = lexer.delimited_range('`', false, true) * S('cwd')^-1 -local hex_str = 'x' * lexer.delimited_range('"') * S('cwd')^-1 +local sq_str = lexer.range("'", true) * S('cwd')^-1 +local dq_str = lexer.range('"') * S('cwd')^-1 +local lit_str = 'r' * lexer.range('"', false, false) * S('cwd')^-1 +local bt_str = lexer.range('`', false, false) * S('cwd')^-1 +local hex_str = 'x' * lexer.range('"') * S('cwd')^-1 local other_hex_str = '\\x' * (lexer.xdigit * lexer.xdigit)^1 -local del_str = lexer.nested_pair('q"[', ']"') * S('cwd')^-1 + - lexer.nested_pair('q"(', ')"') * S('cwd')^-1 + - lexer.nested_pair('q"{', '}"') * S('cwd')^-1 + - lexer.nested_pair('q"<', '>"') * S('cwd')^-1 -local string = token(lexer.STRING, del_str + sq_str + dq_str + lit_str + - bt_str + hex_str + other_hex_str) +local str = sq_str + dq_str + lit_str + bt_str + hex_str + other_hex_str +for left, right in pairs{['['] = ']', ['('] = ')', ['{'] = '}', ['<'] = '>'} do + str = str + lexer.range('q"' .. left, right .. '"', false, false, true) * + S('cwd')^-1 +end +local string = token(lexer.STRING, str) -- Numbers. local dec = lexer.digit^1 * ('_' * lexer.digit^1)^0 @@ -72,7 +72,7 @@ local constant = token(lexer.CONSTANT, word_match{ }) local class_sequence = token(lexer.TYPE, P('class') + P('struct')) * ws^1 * - token(lexer.CLASS, lexer.word) + token(lexer.CLASS, lexer.word) -- Identifiers. local identifier = token(lexer.IDENTIFIER, lexer.word) @@ -126,17 +126,17 @@ local versions_list = token('versions', word_match{ }) local versions = token(lexer.KEYWORD, 'version') * lexer.space^0 * - token(lexer.OPERATOR, '(') * lexer.space^0 * versions_list + token(lexer.OPERATOR, '(') * lexer.space^0 * versions_list local scopes = token(lexer.KEYWORD, 'scope') * lexer.space^0 * - token(lexer.OPERATOR, '(') * lexer.space^0 * scopes_list + token(lexer.OPERATOR, '(') * lexer.space^0 * scopes_list local traits = token(lexer.KEYWORD, '__traits') * lexer.space^0 * - token(lexer.OPERATOR, '(') * lexer.space^0 * traits_list + token(lexer.OPERATOR, '(') * lexer.space^0 * traits_list -local func = token(lexer.FUNCTION, lexer.word) * - #(lexer.space^0 * (P('!') * lexer.word^-1 * lexer.space^-1)^-1 * - P('(')) +local func = token(lexer.FUNCTION, lexer.word) * #( + lexer.space^0 * (P('!') * lexer.word^-1 * lexer.space^-1)^-1 * P('(') +) M._rules = { {'whitespace', ws}, diff --git a/lexlua/dockerfile.lua b/lexlua/dockerfile.lua index ddf9e53f8..6aeef3a06 100644 --- a/lexlua/dockerfile.lua +++ b/lexlua/dockerfile.lua @@ -20,20 +20,19 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Variable. -lex:add_rule('variable', token(lexer.VARIABLE, - S('$')^1 * (S('{')^1 * lexer.word * S('}')^1 + - lexer.word))) +lex:add_rule('variable', token(lexer.VARIABLE, S('$')^1 * + (S('{')^1 * lexer.word * S('}')^1 + lexer.word))) -- Strings. -local sq_str = lexer.delimited_range("'", false, true) -local dq_str = lexer.delimited_range('"') +local sq_str = lexer.range("'", false, false) +local dq_str = lexer.range('"') lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#'))) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('\\[],=:{}'))) diff --git a/lexlua/dot.lua b/lexlua/dot.lua index e71079cbf..906ce6b48 100644 --- a/lexlua/dot.lua +++ b/lexlua/dot.lua @@ -32,12 +32,13 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") + - lexer.delimited_range('"'))) +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Comments. -local line_comment = '//' * lexer.nonnewline_esc^0 -local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Numbers. diff --git a/lexlua/eiffel.lua b/lexlua/eiffel.lua index 1bc134a89..7dda50bad 100644 --- a/lexlua/eiffel.lua +++ b/lexlua/eiffel.lua @@ -29,14 +29,15 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + - lexer.delimited_range('"', true))) +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '--' * lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('--'))) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/*%&|^~.,:;?()[]{}'))) diff --git a/lexlua/elixir.lua b/lexlua/elixir.lua index cc9675d78..18c62aaa9 100644 --- a/lexlua/elixir.lua +++ b/lexlua/elixir.lua @@ -12,58 +12,56 @@ local lex = lexer.new('elixir', {fold_by_indentation = true}) lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Sigils. -local sigil11 = P("~") * S("CRSW") * lexer.delimited_range('<>', false, true) -local sigil12 = P("~") * S("CRSW") * lexer.delimited_range('{}', false, true) -local sigil13 = P("~") * S("CRSW") * lexer.delimited_range('[]', false, true) -local sigil14 = P("~") * S("CRSW") * lexer.delimited_range('()', false, true) -local sigil15 = P("~") * S("CRSW") * lexer.delimited_range('|', false, true) -local sigil16 = P("~") * S("CRSW") * lexer.delimited_range('/', false, true) -local sigil17 = P("~") * S("CRSW") * lexer.delimited_range('"', false, true) -local sigil18 = P("~") * S("CRSW") * lexer.delimited_range("'", false, true) -local sigil19 = P("~") * S("CRSW") * '"""' * (lexer.any - '"""')^0 * P('"""')^-1 -local sigil10 = P("~") * S("CRSW") * "'''" * (lexer.any - "'''")^0 * P("'''")^-1 -local sigil21 = P("~") * S("crsw") * lexer.delimited_range('<>', false, false) -local sigil22 = P("~") * S("crsw") * lexer.delimited_range('{}', false, false) -local sigil23 = P("~") * S("crsw") * lexer.delimited_range('[]', false, false) -local sigil24 = P("~") * S("crsw") * lexer.delimited_range('()', false, false) -local sigil25 = P("~") * S("crsw") * lexer.delimited_range('|', false, false) -local sigil26 = P("~") * S("crsw") * lexer.delimited_range('/', false, false) -local sigil27 = P("~") * S("crsw") * lexer.delimited_range('"', false, false) -local sigil28 = P("~") * S("crsw") * lexer.delimited_range("'", false, false) -local sigil29 = P("~") * S("csrw") * '"""' * (lexer.any - '"""')^0 * P('"""')^-1 -local sigil20 = P("~") * S("csrw") * "'''" * (lexer.any - "'''")^0 * P("'''")^-1 +local sigil11 = P("~") * S("CRSW") * lexer.range('<', '>', false) +local sigil12 = P("~") * S("CRSW") * lexer.range('{', '}', false) +local sigil13 = P("~") * S("CRSW") * lexer.range('[', ']', false) +local sigil14 = P("~") * S("CRSW") * lexer.range('(', ')', false) +local sigil15 = P("~") * S("CRSW") * lexer.range('|', false, false) +local sigil16 = P("~") * S("CRSW") * lexer.range('/', false, false) +local sigil17 = P("~") * S("CRSW") * lexer.range('"', false, false) +local sigil18 = P("~") * S("CRSW") * lexer.range("'", false, false) +local sigil19 = P("~") * S("CRSW") * lexer.range('"""') +local sigil10 = P("~") * S("CRSW") * lexer.range("'''") +local sigil21 = P("~") * S("crsw") * lexer.range('<', '>', false, true) +local sigil22 = P("~") * S("crsw") * lexer.range('{', '}', false, true) +local sigil23 = P("~") * S("crsw") * lexer.range('[', ']', false, true) +local sigil24 = P("~") * S("crsw") * lexer.range('(', ')', false, true) +local sigil25 = P("~") * S("crsw") * lexer.range('|', false) +local sigil26 = P("~") * S("crsw") * lexer.range('/', false) +local sigil27 = P("~") * S("crsw") * lexer.range('"', false) +local sigil28 = P("~") * S("crsw") * lexer.range("'", false) +local sigil29 = P("~") * S("crsw") * lexer.range('"""') +local sigil20 = P("~") * S("crsw") * lexer.range("'''") local sigil_token = token(lexer.REGEX, sigil10 + sigil19 + sigil11 + sigil12 + - sigil13 + sigil14 + sigil15 + sigil16 + - sigil17 + sigil18 + sigil20 + sigil29 + - sigil21 + sigil22 + sigil23 + sigil24 + - sigil25 + sigil26 + sigil27 + sigil28) + sigil13 + sigil14 + sigil15 + sigil16 + sigil17 + sigil18 + sigil20 + + sigil29 + sigil21 + sigil22 + sigil23 + sigil24 + sigil25 + sigil26 + + sigil27 + sigil28) local sigiladdon_token = token(lexer.EMBEDDED, R('az', 'AZ')^0) lex:add_rule('sigil', sigil_token * sigiladdon_token) -- Atoms. -local atom1 = B(1 - P(':')) * P(':') * lexer.delimited_range('"', false) +local atom1 = B(1 - P(':')) * P(':') * lexer.range('"', false) local atom2 = B(1 - P(':')) * P(':') * R('az', 'AZ') * - R('az', 'AZ', '__', '@@', '09')^0 * S('?!')^-1 + R('az', 'AZ', '__', '@@', '09')^0 * S('?!')^-1 local atom3 = B(1 - R('az', 'AZ', '__', '09', '::')) * - R('AZ') * R('az', 'AZ', '__', '@@', '09')^0 * S('?!')^-1 + R('AZ') * R('az', 'AZ', '__', '@@', '09')^0 * S('?!')^-1 lex:add_rule('atom', token(lexer.CONSTANT, atom1 + atom2 + atom3)) -- Strings. -local dq_str = lexer.delimited_range('"', false) -local triple_dq_str = '"""' * (lexer.any - '"""')^0 * P('"""')^-1 +local dq_str = lexer.range('"') +local triple_dq_str = lexer.range('"""') lex:add_rule('string', token(lexer.STRING, triple_dq_str + dq_str)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline_esc^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#', true))) -- Attributes. lex:add_rule('attribute', token(lexer.LABEL, B(1 - R('az', 'AZ', '__')) * - P('@') * R('az','AZ') * - R('az','AZ','09','__')^0)) + P('@') * R('az','AZ') * R('az','AZ','09','__')^0)) -- Booleans. lex:add_rule('boolean', token(lexer.NUMBER, P(':')^-1 * - word_match[[true false nil]])) + word_match[[true false nil]])) -- Functions. lex:add_rule('function', token(lexer.FUNCTION, word_match[[ @@ -83,16 +81,14 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ -- Operators local operator1 = word_match[[and or not when xor in]] local operator2 = P('!==') + '!=' + '!' + '=~' + '===' + '==' + '=' + '<<<' + - '<<' + '<=' + '<-' + '<' + '>>>' + '>>' + '>=' + '>' + '->' + - '--' + '-' + '++' + '+' + '&&&' + '&&' + '&' + '|||' + '||' + - '|>' + '|' + '..' + '.' + '^^^' + '^' + '\\\\' + '::' + '*' + - '/' + '~~~' + '@' + '<<' + '<=' + '<-' + '<' + '>>>' + '>>' + '>=' + '>' + '->' + '--' + '-' + + '++' + '+' + '&&&' + '&&' + '&' + '|||' + '||' + '|>' + '|' + '..' + '.' + + '^^^' + '^' + '\\\\' + '::' + '*' + '/' + '~~~' + '@' lex:add_rule('operator', token(lexer.OPERATOR, operator1 + operator2)) -- Identifiers lex:add_rule('identifier', token(lexer.IDENTIFIER, R('az', '__') * - R('az', 'AZ', '__', '09')^0 * - S('?!')^-1)) + R('az', 'AZ', '__', '09')^0 * S('?!')^-1)) -- Numbers local dec = lexer.digit * (lexer.digit + P("_"))^0 @@ -100,8 +96,8 @@ local bin = '0b' * S('01')^1 local oct = '0o' * R('07')^1 local integer = bin + lexer.hex_num + oct + dec local float = lexer.digit^1 * P(".") * lexer.digit^1 * S("eE") * - (S('+-')^-1 * lexer.digit^1)^-1 + (S('+-')^-1 * lexer.digit^1)^-1 lex:add_rule('number', B(1 - R('az', 'AZ', '__')) * S('+-')^-1 * - token(lexer.NUMBER, float + integer)) + token(lexer.NUMBER, float + integer)) return lex diff --git a/lexlua/erlang.lua b/lexlua/erlang.lua index 943e31b36..0d2a3b19b 100644 --- a/lexlua/erlang.lua +++ b/lexlua/erlang.lua @@ -42,11 +42,11 @@ lex:add_rule('function', token(lexer.FUNCTION, word_match[[ -- Identifiers. lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.lower * - ('_' + lexer.alnum)^0)) + ('_' + lexer.alnum)^0)) -- Variables. lex:add_rule('variable', token(lexer.VARIABLE, P('_')^0 * lexer.upper * - ('_' + lexer.alnum)^0)) + ('_' + lexer.alnum)^0)) -- Directives. lex:add_rule('directive', token('directive', '-' * word_match[[ @@ -56,15 +56,16 @@ lex:add_rule('directive', token('directive', '-' * word_match[[ lex:add_style('directive', lexer.STYLE_PREPROCESSOR) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + - lexer.delimited_range('"') + - '$' * lexer.any * lexer.alnum^0)) +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"') +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + + '$' * lexer.any * lexer.alnum^0)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '%' * lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('%'))) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('-<>.;=/|+*:,!()[]{}'))) diff --git a/lexlua/faust.lua b/lexlua/faust.lua index c51956cc4..2e579dfd8 100644 --- a/lexlua/faust.lua +++ b/lexlua/faust.lua @@ -21,11 +21,11 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"', true))) +lex:add_rule('string', token(lexer.STRING, lexer.range('"', true))) -- Comments. -local line_comment = '//' * lexer.nonnewline^0 -local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +local line_comment = lexer.to_eol('//') +local block_comment = lexer.range('/*', '*/') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Numbers. @@ -37,11 +37,10 @@ lex:add_rule('number', token(lexer.NUMBER, flt + int)) -- Pragmas. lex:add_rule('pragma', token(lexer.PREPROCESSOR, P('<mdoc>') * - (lexer.any - P('</mdoc>'))^0 * - P('</mdoc>')^-1)) + (lexer.any - P('</mdoc>'))^0 * P('</mdoc>')^-1)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, - S('+-/*%<>~!=^&|?~:;,.()[]{}@#$`\\\''))) + S('+-/*%<>~!=^&|?~:;,.()[]{}@#$`\\\''))) return lex diff --git a/lexlua/fish.lua b/lexlua/fish.lua index 6585e240b..5ec829391 100644 --- a/lexlua/fish.lua +++ b/lexlua/fish.lua @@ -25,24 +25,23 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Variables. -lex:add_rule('variable', token(lexer.VARIABLE, - '$' * (lexer.word + - lexer.delimited_range('{}', true, true)))) +lex:add_rule('variable', token(lexer.VARIABLE, '$' * (lexer.word + + lexer.range('{', '}', true)))) -- Strings. -local sq_str = lexer.delimited_range("'", false, true) -local dq_str = lexer.delimited_range('"') +local sq_str = lexer.range("'", false, false) +local dq_str = lexer.range('"') lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Shebang. -lex:add_rule('shebang', token('shebang', '#!/' * lexer.nonnewline^0)) +lex:add_rule('shebang', token('shebang', lexer.to_eol('#!/'))) lex:add_style('shebang', lexer.STYLE_LABEL) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#'))) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/*^&|~.,:;?()[]{}'))) diff --git a/lexlua/forth.lua b/lexlua/forth.lua index df1dcdcb5..7f1d92ace 100644 --- a/lexlua/forth.lua +++ b/lexlua/forth.lua @@ -12,14 +12,14 @@ local lex = lexer.new('forth') lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Strings. -local c_str = 'c' * lexer.delimited_range('"', true, true) -local s_str = 's' * lexer.delimited_range('"', true, true) -local s_bs_str = 's\\' * lexer.delimited_range('"', true, false) -local dot_str = '.' * lexer.delimited_range('"', true, true) -local dot_paren_str = '.' * lexer.delimited_range('()', true, true, false) -local abort_str = 'abort' * lexer.delimited_range('"', true, true) +local c_str = 'c' * lexer.range('"', true, false) +local s_str = 's' * lexer.range('"', true, false) +local s_bs_str = 's\\' * lexer.range('"', true) +local dot_str = '.' * lexer.range('"', true, false) +local dot_paren_str = '.' * lexer.range('(', ')', true) +local abort_str = 'abort' * lexer.range('"', true, false) lex:add_rule('string', token(lexer.STRING, c_str + s_str + s_bs_str + dot_str + - dot_paren_str + abort_str)) + dot_paren_str + abort_str)) -- Keywords. lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ @@ -38,17 +38,17 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ ]], true)) -- Identifiers. -lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alnum + - S('+-*=<>.?/\'%,_$#'))^1)) +lex:add_rule('identifier', token(lexer.IDENTIFIER, + (lexer.alnum + S('+-*=<>.?/\'%,_$#'))^1)) -- Comments. -local line_comment = S('|\\') * lexer.nonnewline^0 -local block_comment = '(' * (lexer.any - ')')^0 * P(')')^-1 +local line_comment = lexer.to_eol(S('|\\')) +local block_comment = lexer.range('(', ')') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Numbers. lex:add_rule('number', token(lexer.NUMBER, P('-')^-1 * lexer.digit^1 * - (S('./') * lexer.digit^1)^-1)) + (S('./') * lexer.digit^1)^-1)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S(':;<>+*-/[]#'))) diff --git a/lexlua/fortran.lua b/lexlua/fortran.lua index a508a9625..e07068833 100644 --- a/lexlua/fortran.lua +++ b/lexlua/fortran.lua @@ -11,14 +11,13 @@ local lex = lexer.new('fortran') lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Comments. -local c_comment = lexer.starts_line(S('Cc')) * lexer.nonnewline^0 -local d_comment = lexer.starts_line(S('Dd')) * lexer.nonnewline^0 -local ex_comment = lexer.starts_line('!') * lexer.nonnewline^0 -local ast_comment = lexer.starts_line('*') * lexer.nonnewline^0 -local line_comment = '!' * lexer.nonnewline^0 +local c_comment = lexer.to_eol(lexer.starts_line(S('Cc'))) +local d_comment = lexer.to_eol(lexer.starts_line(S('Dd'))) +local ex_comment = lexer.to_eol(lexer.starts_line('!')) +local ast_comment = lexer.to_eol(lexer.starts_line('*')) +local line_comment = lexer.to_eol('!') lex:add_rule('comment', token(lexer.COMMENT, c_comment + d_comment + - ex_comment + ast_comment + - line_comment)) + ex_comment + ast_comment + line_comment)) -- Keywords. lex:add_rule('keyword', token(lexer.KEYWORD, word_match([[ @@ -55,15 +54,14 @@ lex:add_rule('type', token(lexer.TYPE, word_match([[ ]], true))) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) * - -lexer.alpha)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number * -lexer.alpha)) -- Identifiers. lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.alnum^1)) -- Strings. -local sq_str = lexer.delimited_range("'", true, true) -local dq_str = lexer.delimited_range('"', true, true) +local sq_str = lexer.range("'", true, false) +local dq_str = lexer.range('"', true, false) lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Operators. diff --git a/lexlua/fsharp.lua b/lexlua/fsharp.lua index d8ecdc628..b80bf37e4 100644 --- a/lexlua/fsharp.lua +++ b/lexlua/fsharp.lua @@ -34,26 +34,28 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + - lexer.delimited_range('"', true))) +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '//' * lexer.nonnewline^0 + - lexer.nested_pair('(*', '*)'))) +local line_comment = lexer.to_eol('//') +local block_comment = lexer.range('(*', '*)', false, false, true) +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, (lexer.float + - lexer.integer * S('uUlL')^-1))) +lex:add_rule('number', token(lexer.NUMBER, + (lexer.float + lexer.integer * S('uUlL')^-1))) -- Preprocessor. local preproc_word = word_match[[ else endif endregion if ifdef ifndef light region ]] lex:add_rule('preproc', token(lexer.PREPROCESSOR, lexer.starts_line('#') * - S('\t ')^0 * preproc_word)) + S('\t ')^0 * preproc_word)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, - S('=<>+-*/^.,:;~!@#%^&|?[](){}'))) + S('=<>+-*/^.,:;~!@#%^&|?[](){}'))) return lex diff --git a/lexlua/gap.lua b/lexlua/gap.lua index 49e69fb24..15cbc75dd 100644 --- a/lexlua/gap.lua +++ b/lexlua/gap.lua @@ -20,11 +20,12 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + - lexer.delimited_range('"', true))) +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#'))) -- Numbers. lex:add_rule('number', token(lexer.NUMBER, lexer.digit^1 * -lexer.alpha)) diff --git a/lexlua/gettext.lua b/lexlua/gettext.lua index a51133c5e..3c06487b3 100644 --- a/lexlua/gettext.lua +++ b/lexlua/gettext.lua @@ -22,10 +22,9 @@ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) lex:add_rule('variable', token(lexer.VARIABLE, S('%$@') * lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"', true))) +lex:add_rule('string', token(lexer.STRING, lexer.range('"', true))) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '#' * S(': .~') * - lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#' * S(': .~')))) return lex diff --git a/lexlua/gherkin.lua b/lexlua/gherkin.lua index 2fe38c57d..eedfe3436 100644 --- a/lexlua/gherkin.lua +++ b/lexlua/gherkin.lua @@ -16,26 +16,27 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ ]])) -- Strings. -local doc_str = '"""' * (lexer.any - '"""')^0 * P('"""')^-1 -local dq_str = lexer.delimited_range('"') +local doc_str = lexer.range('"""') +local dq_str = lexer.range('"') lex:add_rule('string', token(lexer.STRING, doc_str + dq_str)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#'))) -- Numbers. -local number = token(lexer.NUMBER, lexer.float + lexer.integer) +local number = token(lexer.NUMBER, lexer.number) -- Tags. lex:add_rule('tag', token('tag', '@' * lexer.word^0)) lex:add_style('tag', lexer.STYLE_LABEL) -- Placeholders. -lex:add_rule('placeholder', token('placeholder', lexer.nested_pair('<', '>'))) +lex:add_rule('placeholder', token('placeholder', + lexer.range('<', '>', false, false, true))) lex:add_style('placeholder', lexer.STYLE_VARIABLE) -- Examples. -lex:add_rule('example', token('example', '|' * lexer.nonnewline^0)) +lex:add_rule('example', token('example', lexer.to_eol('|'))) lex:add_style('example', lexer.STYLE_NUMBER) return lex diff --git a/lexlua/glsl.lua b/lexlua/glsl.lua index 31440f0a6..d6bef2b44 100644 --- a/lexlua/glsl.lua +++ b/lexlua/glsl.lua @@ -19,21 +19,15 @@ lex:modify_rule('keyword', token(lexer.KEYWORD, word_match[[ ]]) + lex:get_rule('keyword')) -- Types. -lex:modify_rule('type', - token(lexer.TYPE, - S('bdiu')^-1 * 'vec' * R('24') + - P('d')^-1 * 'mat' * R('24') * ('x' * R('24')^-1) + - S('iu')^-1 * 'sampler' * R('13') * 'D' + - 'sampler' * R('12') * 'D' * P('Array')^-1 * 'Shadow' + - S('iu')^-1 * 'sampler' * (R('12') * 'DArray' + - word_match[[ - Cube 2DRect Buffer 2DMS 2DMSArray 2DMSCubeArray - ]]) + - word_match[[ - samplerCubeShadow sampler2DRectShadow - samplerCubeArrayShadow - ]]) + - lex:get_rule('type') + +lex:modify_rule('type', token(lexer.TYPE, S('bdiu')^-1 * 'vec' * R('24') + + P('d')^-1 * 'mat' * R('24') * ('x' * R('24')^-1) + + S('iu')^-1 * 'sampler' * R('13') * 'D' + + 'sampler' * R('12') * 'D' * P('Array')^-1 * 'Shadow' + + S('iu')^-1 * 'sampler' * (R('12') * 'DArray' + word_match[[ + Cube 2DRect Buffer 2DMS 2DMSArray 2DMSCubeArray + ]]) + + word_match[[samplerCubeShadow sampler2DRectShadow samplerCubeArrayShadow]]) + + lex:get_rule('type') + -- Functions. token(lexer.FUNCTION, word_match[[ diff --git a/lexlua/gnuplot.lua b/lexlua/gnuplot.lua index 5c14f49ae..ca9ca5bad 100644 --- a/lexlua/gnuplot.lua +++ b/lexlua/gnuplot.lua @@ -45,13 +45,13 @@ lex:add_rule('variable', token(lexer.VARIABLE, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") + - lexer.delimited_range('"') + - lexer.delimited_range('[]', true) + - lexer.delimited_range('{}', true))) +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') +local br_str = lexer.range('[', ']', true) + lexer.range('{', '}', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + br_str)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#'))) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('-+~!$*%=<>&|^?:()'))) diff --git a/lexlua/go.lua b/lexlua/go.lua index e4a04a3a6..397908a0a 100644 --- a/lexlua/go.lua +++ b/lexlua/go.lua @@ -37,19 +37,18 @@ lex:add_rule('function', token(lexer.FUNCTION, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -local sq_str = lexer.delimited_range("'", true) -local dq_str = lexer.delimited_range('"', true) -local raw_str = lexer.delimited_range('`', false, true) +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +local raw_str = lexer.range('`', false, false) lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + raw_str)) -- Comments. -local line_comment = '//' * lexer.nonnewline^0 -local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +local line_comment = lexer.to_eol('//') +local block_comment = lexer.range('/*', '*/') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) * - P('i')^-1)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number * P('i')^-1)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('+-*/%&|^<>=!:;.,()[]{}'))) diff --git a/lexlua/groovy.lua b/lexlua/groovy.lua index 3d1398a38..07e0586a0 100644 --- a/lexlua/groovy.lua +++ b/lexlua/groovy.lua @@ -40,23 +40,22 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Comments. -local line_comment = '//' * lexer.nonnewline_esc^0 -local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Strings. -local sq_str = lexer.delimited_range("'") -local dq_str = lexer.delimited_range('"') -local triple_sq_str = "'''" * (lexer.any - "'''")^0 * P("'''")^-1 -local triple_dq_str = '"""' * (lexer.any - '"""')^0 * P('"""')^-1 +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') +local tq_str = lexer.range("'''") + lexer.range('"""') +local string = token(lexer.STRING, tq_str + sq_str + dq_str) local regex_str = #P('/') * lexer.last_char_includes('=~|!<>+-*?&,:;([{') * - lexer.delimited_range('/', true) -lex:add_rule('string', token(lexer.STRING, triple_sq_str + triple_dq_str + - sq_str + dq_str) + - token(lexer.REGEX, regex_str)) + lexer.range('/', true) +local regex = token(lexer.REGEX, regex_str) +lex:add_rule('string', string + regex) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('=~|!<>+-/*?&.,:;()[]{}'))) diff --git a/lexlua/gtkrc.lua b/lexlua/gtkrc.lua index c500c033c..fbeaa86ba 100644 --- a/lexlua/gtkrc.lua +++ b/lexlua/gtkrc.lua @@ -35,18 +35,19 @@ lex:add_rule('function', token(lexer.FUNCTION, word_match[[ -- Identifiers. lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.alpha * - (lexer.alnum + S('_-'))^0)) + (lexer.alnum + S('_-'))^0)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + - lexer.delimited_range('"', true))) +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#'))) -- Numbers. lex:add_rule('number', token(lexer.NUMBER, lexer.digit^1 * - ('.' * lexer.digit^1)^-1)) + ('.' * lexer.digit^1)^-1)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S(':=,*()[]{}'))) diff --git a/lexlua/haskell.lua b/lexlua/haskell.lua index 524773301..2b36473a3 100644 --- a/lexlua/haskell.lua +++ b/lexlua/haskell.lua @@ -22,22 +22,23 @@ local op = lexer.punct - S('()[]{}') -- Types & type constructors. lex:add_rule('type', token(lexer.TYPE, (lexer.upper * word) + - (":" * (op^1 - ":")))) + (":" * (op^1 - ":")))) -- Identifiers. lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alpha + '_') * word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + - lexer.delimited_range('"'))) +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"') +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Comments. -local line_comment = '--' * lexer.nonnewline_esc^0 -local block_comment = '{-' * (lexer.any - '-}')^0 * P('-}')^-1 +local line_comment = lexer.to_eol('--', true) +local block_comment = lexer.range('{-', '-}') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, op)) diff --git a/lexlua/html.lua b/lexlua/html.lua index b77d8453c..465a828b6 100644 --- a/lexlua/html.lua +++ b/lexlua/html.lua @@ -12,12 +12,11 @@ local ws = token(lexer.WHITESPACE, lexer.space^1) lex:add_rule('whitespace', ws) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '<!--' * (lexer.any - '-->')^0 * - P('-->')^-1)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.range('<!--', '-->'))) -- Doctype. -lex:add_rule('doctype', token('doctype', '<!' * word_match([[doctype]], true) * - (lexer.any - '>')^1 * '>')) +lex:add_rule('doctype', token('doctype', + lexer.range('<!' * word_match([[doctype]], true), '>'))) lex:add_style('doctype', lexer.STYLE_COMMENT) -- Elements. @@ -37,12 +36,12 @@ local paired_element = token('element', '<' * P('/')^-1 * word_match([[ ]], true)) local known_element = single_element + paired_element local unknown_element = token('unknown_element', '<' * P('/')^-1 * - (lexer.alnum + '-')^1) + (lexer.alnum + '-')^1) local element = known_element + unknown_element lex:add_rule('element', element) lex:add_style('single_element', lexer.STYLE_KEYWORD) lex:add_style('element', lexer.STYLE_KEYWORD) -lex:add_style('unknown_element', lexer.STYLE_KEYWORD..',italics') +lex:add_style('unknown_element', lexer.STYLE_KEYWORD .. ',italics') -- Closing tags. local tag_close = token('element', P('/')^-1 * '>') @@ -66,7 +65,7 @@ local unknown_attribute = token('unknown_attribute', (lexer.alnum + '-')^1) local attribute = (known_attribute + unknown_attribute) * #(lexer.space^0 * '=') lex:add_rule('attribute', attribute) lex:add_style('attribute', lexer.STYLE_TYPE) -lex:add_style('unknown_attribute', lexer.STYLE_TYPE..',italics') +lex:add_style('unknown_attribute', lexer.STYLE_TYPE .. ',italics') -- TODO: performance is terrible on large files. local in_tag = P(function(input, index) @@ -83,17 +82,16 @@ local equals = token(lexer.OPERATOR, '=') --* in_tag -- Strings. local string = #S('\'"') * lexer.last_char_includes('=') * - token(lexer.STRING, lexer.delimited_range("'") + - lexer.delimited_range('"')) + token(lexer.STRING, lexer.range("'") + lexer.range('"')) lex:add_rule('string', string) -- Numbers. lex:add_rule('number', #lexer.digit * lexer.last_char_includes('=') * - token(lexer.NUMBER, lexer.digit^1 * P('%')^-1))--*in_tag) + token(lexer.NUMBER, lexer.digit^1 * P('%')^-1))--*in_tag) -- Entities. lex:add_rule('entity', token('entity', '&' * (lexer.any - lexer.space - ';')^1 * - ';')) + ';')) lex:add_style('entity', lexer.STYLE_COMMENT) -- Fold points. @@ -113,15 +111,13 @@ lex:add_fold_point(lexer.COMMENT, '<!--', '-->') -- Tags that start embedded languages. -- Export these patterns for proxy lexers (e.g. ASP) that need them. lex.embed_start_tag = element * - (ws * attribute * ws^-1 * equals * ws^-1 * string)^0 * - ws^-1 * tag_close + (ws * attribute * ws^-1 * equals * ws^-1 * string)^0 * ws^-1 * tag_close lex.embed_end_tag = element * tag_close -- Embedded CSS (<style type="text/css"> ... </style>). local css = lexer.load('css') local style_element = word_match([[style]], true) -local css_start_rule = #(P('<') * style_element * - ('>' + P(function(input, index) +local css_start_rule = #('<' * style_element * ('>' + P(function(input, index) if input:find('^%s+type%s*=%s*(["\'])text/css%1', index) then return index end @@ -132,8 +128,7 @@ lex:embed(css, css_start_rule, css_end_rule) -- Embedded JavaScript (<script type="text/javascript"> ... </script>). local js = lexer.load('javascript') local script_element = word_match([[script]], true) -local js_start_rule = #(P('<') * script_element * - ('>' + P(function(input, index) +local js_start_rule = #('<' * script_element * ('>' + P(function(input, index) if input:find('^%s+type%s*=%s*(["\'])text/javascript%1', index) then return index end @@ -142,13 +137,13 @@ local js_end_rule = #('</' * script_element * ws^-1 * '>') * lex.embed_end_tag local js_line_comment = '//' * (lexer.nonnewline_esc - js_end_rule)^0 local js_block_comment = '/*' * (lexer.any - '*/' - js_end_rule)^0 * P('*/')^-1 js:modify_rule('comment', token(lexer.COMMENT, js_line_comment + - js_block_comment)) + js_block_comment)) lex:embed(js, js_start_rule, js_end_rule) -- Embedded CoffeeScript (<script type="text/coffeescript"> ... </script>). local cs = lexer.load('coffeescript') local script_element = word_match([[script]], true) -local cs_start_rule = #(P('<') * script_element * P(function(input, index) +local cs_start_rule = #('<' * script_element * P(function(input, index) if input:find('^[^>]+type%s*=%s*(["\'])text/coffeescript%1', index) then return index end diff --git a/lexlua/icon.lua b/lexlua/icon.lua index 08a4e3f62..a850f03a1 100644 --- a/lexlua/icon.lua +++ b/lexlua/icon.lua @@ -32,16 +32,16 @@ lex:add_style('special_keyword', lexer.STYLE_TYPE) lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") + - lexer.delimited_range('"'))) +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline_esc^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#', true))) -- Numbers. local radix_literal = P('-')^-1 * lexer.dec_num * S('rR') * lexer.alnum^1 -lex:add_rule('number', token(lexer.NUMBER, radix_literal + lexer.float + - lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, radix_literal + lexer.number)) -- Preprocessor. local preproc_word = word_match[[ diff --git a/lexlua/idl.lua b/lexlua/idl.lua index f28652a37..83fb65a8c 100644 --- a/lexlua/idl.lua +++ b/lexlua/idl.lua @@ -27,23 +27,24 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + - lexer.delimited_range('"', true))) +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Comments. -local line_comment = '//' * lexer.nonnewline_esc^0 -local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Preprocessor. local preproc_word = word_match[[ define undef ifdef ifndef if elif else endif include warning pragma ]] lex:add_rule('preproc', token(lexer.PREPROCESSOR, lexer.starts_line('#') * - preproc_word)) + preproc_word)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('!<>=+-/*%&|^~.,:;?()[]{}'))) diff --git a/lexlua/inform.lua b/lexlua/inform.lua index 1ac6dcd8b..2b6193a15 100644 --- a/lexlua/inform.lua +++ b/lexlua/inform.lua @@ -54,17 +54,18 @@ lex:add_style('action', lexer.STYLE_VARIABLE) lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") + - lexer.delimited_range('"'))) +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '!' * lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('!'))) -- Numbers. local inform_hex = '$' * lexer.xdigit^1 local inform_bin = '$$' * S('01')^1 lex:add_rule('number', token(lexer.NUMBER, lexer.integer + inform_hex + - inform_bin)) + inform_bin)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('@~=+-*/%^#=<>;:,.{}[]()&|?'))) diff --git a/lexlua/ini.lua b/lexlua/ini.lua index d5445ba12..d56539d42 100644 --- a/lexlua/ini.lua +++ b/lexlua/ini.lua @@ -17,19 +17,19 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ -- Identifiers. lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alpha + '_') * - (lexer.alnum + S('_.'))^0)) + (lexer.alnum + S('_.'))^0)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") + - lexer.delimited_range('"'))) +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Labels. -lex:add_rule('label', token(lexer.LABEL, - lexer.delimited_range('[]', true, true))) +lex:add_rule('label', token(lexer.LABEL, lexer.range('[', ']', true))) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, lexer.starts_line(S(';#')) * - lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, + lexer.to_eol(lexer.starts_line(S(';#'))))) -- Numbers. local dec = lexer.digit^1 * ('_' * lexer.digit^1)^0 diff --git a/lexlua/io_lang.lua b/lexlua/io_lang.lua index 44143dc6f..573bd7ca8 100644 --- a/lexlua/io_lang.lua +++ b/lexlua/io_lang.lua @@ -26,22 +26,22 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -local sq_str = lexer.delimited_range("'") -local dq_str = lexer.delimited_range('"') -local tq_str = '"""' * (lexer.any - '"""')^0 * P('"""')^-1 +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') +local tq_str = lexer.range('"""') lex:add_rule('string', token(lexer.STRING, tq_str + sq_str + dq_str)) -- Comments. -local line_comment = (P('#') + '//') * lexer.nonnewline^0 -local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +local line_comment = lexer.to_eol(P('#') + '//') +local block_comment = lexer.range('/*', '*/') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, - S('`~@$%^&*-+/=\\<>?.,:;()[]{}'))) + S('`~@$%^&*-+/=\\<>?.,:;()[]{}'))) -- Fold points. lex:add_fold_point(lexer.OPERATOR, '(', ')') diff --git a/lexlua/java.lua b/lexlua/java.lua index cefac563f..ad83c5745 100644 --- a/lexlua/java.lua +++ b/lexlua/java.lua @@ -14,7 +14,7 @@ lex:add_rule('whitespace', ws) -- Classes. lex:add_rule('classdef', token(lexer.KEYWORD, P('class')) * ws * - token(lexer.CLASS, lexer.word)) + token(lexer.CLASS, lexer.word)) -- Keywords. lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ @@ -39,17 +39,17 @@ lex:add_rule('function', token(lexer.FUNCTION, lexer.word) * #P('(')) lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + - lexer.delimited_range('"', true))) +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Comments. -local line_comment = '//' * lexer.nonnewline_esc^0 -local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) * - S('LlFfDd')^-1)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('LlFfDd')^-1)) -- Annotations. lex:add_rule('annotation', token('annotation', '@' * lexer.word)) diff --git a/lexlua/javascript.lua b/lexlua/javascript.lua index b8bbfd242..a2733a1cb 100644 --- a/lexlua/javascript.lua +++ b/lexlua/javascript.lua @@ -24,20 +24,22 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Comments. -local line_comment = '//' * lexer.nonnewline_esc^0 -local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Strings. +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') +local bq_str = lexer.range('`') +local string = token(lexer.STRING, sq_str + dq_str + bq_str) local regex_str = #P('/') * lexer.last_char_includes('+-*%^!=&|?:;,([{<>') * - lexer.delimited_range('/', true) * S('igm')^0 -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") + - lexer.delimited_range('"') + - lexer.delimited_range('`')) + - token(lexer.REGEX, regex_str)) + lexer.range('/', true) * S('igm')^0 +local regex = token(lexer.REGEX, regex_str) +lex:add_rule('string', string + regex) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%^!=&|?:;,.()[]{}<>'))) diff --git a/lexlua/json.lua b/lexlua/json.lua index 787fc99a5..c6feb3a60 100644 --- a/lexlua/json.lua +++ b/lexlua/json.lua @@ -12,15 +12,16 @@ local lex = lexer.new('json') lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + - lexer.delimited_range('"', true))) +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Keywords. lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[true false null]])) -- Comments. -local line_comment = '//' * lexer.nonnewline_esc^0 -local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Numbers. diff --git a/lexlua/latex.lua b/lexlua/latex.lua index c4df8e641..bb2f70823 100644 --- a/lexlua/latex.lua +++ b/lexlua/latex.lua @@ -13,10 +13,9 @@ local lex = lexer.new('latex') lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Comments. -local line_comment = '%' * lexer.nonnewline^0 -local block_comment = '\\begin' * P(' ')^0 * '{comment}' * - (lexer.any - '\\end' * P(' ')^0 * '{comment}')^0 * - P('\\end' * P(' ')^0 * '{comment}')^-1 +local line_comment = lexer.to_eol('%') +local block_comment = lexer.range('\\begin' * P(' ')^0 * '{comment}', + '\\end' * P(' ')^0 * '{comment}') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Math environments. @@ -24,15 +23,13 @@ local math_word = word_match[[ align displaymath eqnarray equation gather math multline ]] local math_begin_end = (P('begin') + P('end')) * P(' ')^0 * - '{' * math_word * P('*')^-1 * '}' + '{' * math_word * P('*')^-1 * '}' lex:add_rule('math', token('math', '$' + '\\' * (S('[]()') + math_begin_end))) lex:add_style('math', lexer.STYLE_FUNCTION) -- LaTeX environments. lex:add_rule('environment', token('environment', '\\' * - (P('begin') + P('end')) * - P(' ')^0 * '{' * lexer.word * - P('*')^-1 * '}')) + (P('begin') + P('end')) * P(' ')^0 * '{' * lexer.word * P('*')^-1 * '}')) lex:add_style('environment', lexer.STYLE_KEYWORD) -- Sections. @@ -43,7 +40,7 @@ lex:add_style('section', lexer.STYLE_CLASS) -- Commands. lex:add_rule('command', token('command', '\\' * - (lexer.alpha^1 + S('#$&~_^%{}')))) + (lexer.alpha^1 + S('#$&~_^%{}')))) lex:add_style('command', lexer.STYLE_KEYWORD) -- Operators. diff --git a/lexlua/ledger.lua b/lexlua/ledger.lua index 01e270d1d..2daaab46a 100644 --- a/lexlua/ledger.lua +++ b/lexlua/ledger.lua @@ -10,32 +10,31 @@ local lex = lexer.new('ledger', {lex_by_line = true}) local delim = P('\t') + P(' ') -- Account. -lex:add_rule('account', token(lexer.VARIABLE, - lexer.starts_line(S(' \t')^1 * - (lexer.print - delim)^1))) +lex:add_rule('account', token(lexer.VARIABLE, lexer.starts_line(S(' \t')^1 * + (lexer.print - delim)^1))) -- Amount. lex:add_rule('amount', token(lexer.NUMBER, delim * (1 - S(';\r\n'))^1)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, S(';#') * lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol(S(';#')))) -- Whitespace. lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Strings. -local sq_str = lexer.delimited_range("'") -local dq_str = lexer.delimited_range('"') -local label = lexer.delimited_range('[]', true, true) +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') +local label = lexer.range('[', ']', true) lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + label)) -- Date. lex:add_rule('date', token(lexer.CONSTANT, - lexer.starts_line((lexer.digit + S('/-'))^1))) + lexer.starts_line((lexer.digit + S('/-'))^1))) -- Automated transactions. lex:add_rule('auto_tx', token(lexer.PREPROCESSOR, - lexer.starts_line(S('=~') * lexer.nonnewline^0))) + lexer.to_eol(lexer.starts_line(S('=~'))))) -- Directives. local directive_word = word_match[[ @@ -43,6 +42,6 @@ local directive_word = word_match[[ endfixed include payee apply tag test year ]] + S('AYNDCIiOobh') lex:add_rule('directive', token(lexer.KEYWORD, - lexer.starts_line(S('!@')^-1 * directive_word))) + lexer.starts_line(S('!@')^-1 * directive_word))) return lex diff --git a/lexlua/less.lua b/lexlua/less.lua index 5534369c6..ee8fc8896 100644 --- a/lexlua/less.lua +++ b/lexlua/less.lua @@ -9,11 +9,11 @@ local S = lpeg.S local lex = lexer.new('less', {inherit = lexer.load('css')}) -- Line comments. -lex:add_rule('line_comment', token(lexer.COMMENT, '//' * lexer.nonnewline^0)) +lex:add_rule('line_comment', token(lexer.COMMENT, lexer.to_eol('//'))) -- Variables. lex:add_rule('variable', token(lexer.VARIABLE, '@' * - (lexer.alnum + S('_-{}'))^1)) + (lexer.alnum + S('_-{}'))^1)) -- Fold points. lex:add_fold_point(lexer.COMMENT, '//', lexer.fold_line_comments('//')) diff --git a/lexlua/lexer.lua b/lexlua/lexer.lua index 68183aa29..d133eb11d 100644 --- a/lexlua/lexer.lua +++ b/lexlua/lexer.lua @@ -139,10 +139,10 @@ local M = {} -- [`lexer.punct`](), [`lexer.space`](), [`lexer.newline`](), -- [`lexer.nonnewline`](), [`lexer.nonnewline_esc`](), [`lexer.dec_num`](), -- [`lexer.hex_num`](), [`lexer.oct_num`](), [`lexer.integer`](), --- [`lexer.float`](), and [`lexer.word`](). You may use your own token names if --- none of the above fit your language, but an advantage to using predefined --- token names is that your lexer's tokens will inherit the universal syntax --- highlighting color theme used by your text editor. +-- [`lexer.float`](), [`lexer.number`](), and [`lexer.word`](). You may use your +-- own token names if none of the above fit your language, but an advantage to +-- using predefined token names is that your lexer's tokens will inherit the +-- universal syntax highlighting color theme used by your text editor. -- -- ##### Example Tokens -- @@ -185,9 +185,8 @@ local M = {} -- -- Line-style comments with a prefix character(s) are easy to express with LPeg: -- --- local shell_comment = token(lexer.COMMENT, '#' * lexer.nonnewline^0) --- local c_line_comment = token(lexer.COMMENT, --- '//' * lexer.nonnewline_esc^0) +-- local shell_comment = token(lexer.COMMENT, lexer.to_eol('#')) +-- local c_line_comment = token(lexer.COMMENT, lexer.to_eol('//', true)) -- -- The comments above start with a '#' or "//" and go to the end of the line. -- The second comment recognizes the next line also as a comment if the current @@ -196,8 +195,7 @@ local M = {} -- C-style "block" comments with a start and end delimiter are also easy to -- express: -- --- local c_comment = token(lexer.COMMENT, '/*' * (lexer.any - '*/')^0 * --- P('*/')^-1) +-- local c_comment = token(lexer.COMMENT, lexer.range('/*', '*/')) -- -- This comment starts with a "/\*" sequence and contains anything up to and -- including an ending "\*/" sequence. The ending "\*/" is optional so the lexer @@ -205,21 +203,13 @@ local M = {} -- -- **Strings** -- --- It is tempting to think that a string is not much different from the block --- comment shown above in that both have start and end delimiters: +-- Most programming languages allow escape sequences in strings such that a +-- sequence like "\\"" in a double-quoted string indicates that the +-- '"' is not the end of the string. [`lexer.range()`]() handles escapes +-- inherently. -- --- local dq_str = '"' * (lexer.any - '"')^0 * P('"')^-1 --- local sq_str = "'" * (lexer.any - "'")^0 * P("'")^-1 --- local simple_string = token(lexer.STRING, dq_str + sq_str) --- --- However, most programming languages allow escape sequences in strings such --- that a sequence like "\\"" in a double-quoted string indicates that the --- '"' is not the end of the string. The above token incorrectly matches --- such a string. Instead, use the [`lexer.delimited_range()`]() convenience --- function. --- --- local dq_str = lexer.delimited_range('"') --- local sq_str = lexer.delimited_range("'") +-- local dq_str = lexer.range('"') +-- local sq_str = lexer.range("'") -- local string = token(lexer.STRING, dq_str + sq_str) -- -- In this case, the lexer treats '\' as an escape character in a string @@ -228,9 +218,9 @@ local M = {} -- **Numbers** -- -- Most programming languages have the same format for integer and float tokens, --- so it might be as simple as using a couple of predefined LPeg patterns: +-- so it might be as simple as using a predefined LPeg pattern: -- --- local number = token(lexer.NUMBER, lexer.float + lexer.integer) +-- local number = token(lexer.NUMBER, lexer.number) -- -- However, some languages allow postfix characters on integers. -- @@ -714,9 +704,9 @@ local M = {} -- lex:add_rule('custom', token('custom', P('quux'))) -- lex:add_style('custom', lexer.STYLE_KEYWORD .. ',bold') -- lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) --- lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"'))) --- lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) --- lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +-- lex:add_rule('string', token(lexer.STRING, lexer.range('"'))) +-- lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#'))) +-- lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- lex:add_rule('operator', token(lexer.OPERATOR, S('+-*/%^=<>,.()[]{}'))) -- -- lex:add_fold_point(lexer.OPERATOR, '{', '}') @@ -769,7 +759,7 @@ local M = {} -- #### Acknowledgements -- -- Thanks to Peter Odding for his [lexer post][] on the Lua mailing list --- that inspired me, and thanks to Roberto Ierusalimschy for LPeg. +-- that provided inspiration, and thanks to Roberto Ierusalimschy for LPeg. -- -- [lexer post]: http://lua-users.org/lists/lua-l/2007-04/msg00116.html -- @field DEFAULT (string) @@ -906,6 +896,9 @@ local M = {} -- A pattern that matches either a decimal, hexadecimal, or octal number. -- @field float (pattern) -- A pattern that matches a floating point number. +-- @field number (pattern) +-- A pattern that matches a typical number, either a floating point, decimal, +-- hexadecimal, or octal number. -- @field word (pattern) -- A pattern that matches a typical word. Words begin with a letter or -- underscore and consist of alphanumeric and underscore characters. @@ -965,7 +958,8 @@ local function searchpath(name, path) local tried = {} for part in path:gmatch('[^;]+') do local filename = part:gsub('%?', name) - if loadfile(filename) then return filename end + local ok, errmsg = loadfile(filename) + if ok or not errmsg:find('cannot open') then return filename end tried[#tried + 1] = string.format("no file '%s'", filename) end return nil, table.concat(tried, '\n') @@ -1605,6 +1599,7 @@ M.float = lpeg_S('+-')^-1 * ( (M.digit^0 * '.' * M.digit^1 + M.digit^1 * '.' * M.digit^0 * -lpeg_P('.')) * (lpeg_S('eE') * lpeg_S('+-')^-1 * M.digit^1)^-1 + (M.digit^1 * lpeg_S('eE') * lpeg_S('+-')^-1 * M.digit^1)) +M.number = M.float + M.integer M.word = (M.alpha + '_') * (M.alnum + '_')^0 @@ -1625,6 +1620,69 @@ function M.token(name, patt) end --- +-- Creates and returns a pattern that matches from string or pattern *prefix* +-- until the end of the line. +-- *escape* indicates whether the end of the line can be escaped with a '\' +-- character. +-- @param prefix String or pattern prefix to start matching at. +-- @param escape Optional flag indicating whether or not newlines can be escaped +-- by a '\' character. The default value is `false`. +-- @return pattern +-- @usage local line_comment = lexer.to_eol('//') +-- @usage local line_comment = lexer.to_eol(P('#') + ';') +-- @name to_eol +function M.to_eol(prefix, escape) + return prefix * (not escape and M.nonnewline or M.nonnewline_esc)^0 +end + +--- +-- Creates and returns a pattern that matches a range of text bounded by strings +-- or patterns *s* and *e*. +-- This is a convenience function for matching more complicated ranges like +-- strings with escape characters, balanced parentheses, and block comments +-- (nested or not). *e* is optional and defaults to *s*. *single_line* indicates +-- whether or not the range must be on a single line; *escapes* indicates +-- whether or not to allow '\' as an escape character; and *balanced* indicates +-- whether or not to handle balanced ranges like parentheses, and requires *s* +-- and *e* to be different. +-- @param s String or pattern start of a range. +-- @param e Optional string or pattern end of a range. The default value is *s*. +-- @param single_line Optional flag indicating whether or not the range must be +-- on a single line. +-- @param escapes Optional flag indicating whether or not the range end may +-- be escaped by a '\' character. +-- The default value is `false` unless *s* and *e* are identical, +-- single-character strings. In that case, the default value is `true`. +-- @param balanced Optional flag indicating whether or not to match a balanced +-- range, like the "%b" Lua pattern. This flag only applies if *s* and *e* are +-- different. +-- @return pattern +-- @usage local dq_str_escapes = lexer.range('"') +-- @usage local dq_str_noescapes = lexer.range('"', false, false) +-- @usage local unbalanced_parens = lexer.range('(', ')') +-- @usage local balanced_parens = lexer.range('(', ')', false, false, true) +-- @name range +function M.range(s, e, single_line, escapes, balanced) + if type(e) ~= 'string' and type(e) ~= 'userdata' then + e, single_line, escapes, balanced = s, e, single_line, escapes + end + local any = M.any - e + if single_line then any = any - '\n' end + if balanced then any = any - s end + if escapes == nil then + -- Only allow escapes by default for ranges with identical, single-character + -- string delimiters. + escapes = type(s) == 'string' and #s == 1 and s == e + end + if escapes then any = any - '\\' + '\\' * M.any end + if balanced and s ~= e then + return lpeg_P{s * (any + lpeg_V(1))^0 * lpeg_P(e)^-1} + else + return s * any^0 * lpeg_P(e)^-1 + end +end + +-- Deprecated function. Use `lexer.range()` instead. -- Creates and returns a pattern that matches a range of text bounded by -- *chars* characters. -- This is a convenience function for matching more complicated delimited ranges @@ -1647,9 +1705,10 @@ end -- @usage local unbalanced_parens = lexer.delimited_range('()') -- @usage local balanced_parens = lexer.delimited_range('()', false, false, -- true) --- @see nested_pair +-- @see range -- @name delimited_range function M.delimited_range(chars, single_line, no_escape, balanced) + print("lexer.delimited_range() is deprecated, use lexer.range()") local s = chars:sub(1, 1) local e = #chars == 2 and chars:sub(2, 2) or s local range @@ -1692,7 +1751,7 @@ end -- @param s String character set like one passed to `lpeg.S()`. -- @return pattern -- @usage local regex = lexer.last_char_includes('+-*!%^&|=,([{') * --- lexer.delimited_range('/') +-- lexer.range('/') -- @name last_char_includes function M.last_char_includes(s) s = string.format('[%s]', s:gsub('[-%%%[]', '%%%1')) @@ -1704,7 +1763,7 @@ function M.last_char_includes(s) end) end ---- +-- Deprecated function. Use `lexer.range()` instead. -- Returns a pattern that matches a balanced range of text that starts with -- string *start_chars* and ends with string *end_chars*. -- With single-character delimiters, this function is identical to @@ -1713,9 +1772,10 @@ end -- @param end_chars The string ending a nested sequence. -- @return pattern -- @usage local nested_comment = lexer.nested_pair('/*', '*/') --- @see delimited_range +-- @see range -- @name nested_pair function M.nested_pair(start_chars, end_chars) + print("lexer.nested_pair() is deprecated, use lexer.range()") local s, e = start_chars, lpeg_P(end_chars)^-1 return lpeg_P{s * (M.any - s - end_chars + lpeg_V(1))^0 * e} end diff --git a/lexlua/lilypond.lua b/lexlua/lilypond.lua index a7775016a..645a64fbc 100644 --- a/lexlua/lilypond.lua +++ b/lexlua/lilypond.lua @@ -18,12 +18,11 @@ lex:add_rule('keyword', token(lexer.KEYWORD, '\\' * lexer.word)) lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, - lexer.delimited_range('"', false, true))) +lex:add_rule('string', token(lexer.STRING, lexer.range('"', false, false))) -- Comments. -- TODO: block comment. -lex:add_rule('comment', token(lexer.COMMENT, '%' * lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('%'))) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S("{}'~<>|"))) diff --git a/lexlua/lisp.lua b/lexlua/lisp.lua index 374956b1a..88d6488f0 100644 --- a/lexlua/lisp.lua +++ b/lexlua/lisp.lua @@ -36,17 +36,16 @@ local word = lexer.alpha * (lexer.alnum + '_' + '-')^0 lex:add_rule('identifier', token(lexer.IDENTIFIER, word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, "'" * word + - lexer.delimited_range('"'))) +lex:add_rule('string', token(lexer.STRING, "'" * word + lexer.range('"'))) -- Comments. -local line_comment = ';' * lexer.nonnewline^0 -local block_comment = '#|' * (lexer.any - '|#')^0 * P('|#')^-1 +local line_comment = lexer.to_eol(';') +local block_comment = lexer.range('#|', '|#') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Numbers. lex:add_rule('number', token(lexer.NUMBER, P('-')^-1 * lexer.digit^1 * - (S('./') * lexer.digit^1)^-1)) + (S('./') * lexer.digit^1)^-1)) -- Entities. lex:add_rule('entity', token('entity', '&' * word)) diff --git a/lexlua/litcoffee.lua b/lexlua/litcoffee.lua index 916b4e78f..499a788c7 100644 --- a/lexlua/litcoffee.lua +++ b/lexlua/litcoffee.lua @@ -17,6 +17,6 @@ lex:embed(coffeescript, coffee_start_rule, coffee_end_rule) -- Use 'markdown_whitespace' instead of lexer.WHITESPACE since the latter would -- expand to 'litcoffee_whitespace'. lex:modify_rule('whitespace', token('markdown_whitespace', S(' \t')^1 + - S('\r\n')^1)) + S('\r\n')^1)) return lex diff --git a/lexlua/logtalk.lua b/lexlua/logtalk.lua index bc9a1d810..245af99fd 100644 --- a/lexlua/logtalk.lua +++ b/lexlua/logtalk.lua @@ -65,6 +65,6 @@ local operators = [[ as ]] lex:modify_rule('operator', token(lexer.OPERATOR, word_match(operators)) + - lex:get_rule('operator')) + lex:get_rule('operator')) return lex diff --git a/lexlua/lua.lua b/lexlua/lua.lua index c648475ef..655f237f5 100644 --- a/lexlua/lua.lua +++ b/lexlua/lua.lua @@ -32,7 +32,7 @@ local deprecated_func = token('deprecated_function', word_match[[ getfenv loadstring module setfenv unpack ]]) lex:add_rule('function', func + deprecated_func) -lex:add_style('deprecated_function', lexer.STYLE_FUNCTION..',italics') +lex:add_style('deprecated_function', lexer.STYLE_FUNCTION .. ',italics') -- Constants. lex:add_rule('constant', token(lexer.CONSTANT, word_match[[ @@ -103,26 +103,28 @@ local deprecated_library = token('deprecated_library', word_match[[ ]]) lex:add_rule('library', library + deprecated_library) lex:add_style('library', lexer.STYLE_TYPE) -lex:add_style('deprecated_library', lexer.STYLE_TYPE..',italics') +lex:add_style('deprecated_library', lexer.STYLE_TYPE .. ',italics') -- Identifiers. lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) local longstring = lpeg.Cmt('[' * lpeg.C(P('=')^0) * '[', - function(input, index, eq) - local _, e = input:find(']'..eq..']', index, true) - return (e or #input) + 1 - end) + function(input, index, eq) + local _, e = input:find(']' .. eq .. ']', index, true) + return (e or #input) + 1 + end) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") + - lexer.delimited_range('"')) + - token('longstring', longstring)) +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str) + + token('longstring', longstring)) lex:add_style('longstring', lexer.STYLE_STRING) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '--' * (longstring + - lexer.nonnewline^0))) +local line_comment = lexer.to_eol('--') +local block_comment = '--' * longstring +lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment)) -- Numbers. local lua_integer = P('-')^-1 * (lexer.hex_num + lexer.dec_num) @@ -133,7 +135,7 @@ lex:add_rule('label', token(lexer.LABEL, '::' * lexer.word * '::')) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, '..' + - S('+-*/%^#=<>&|~;:,.{}[]()'))) + S('+-*/%^#=<>&|~;:,.{}[]()'))) -- Fold points. local function fold_longcomment(text, pos, line, s, symbol) diff --git a/lexlua/makefile.lua b/lexlua/makefile.lua index e1f9fef99..ee8f0ac59 100644 --- a/lexlua/makefile.lua +++ b/lexlua/makefile.lua @@ -29,7 +29,7 @@ local special_target = token(lexer.CONSTANT, word_match[[ ]]) local normal_target = token('target', (lexer.any - lexer.space - S(':#='))^1) lex:add_rule('target', lexer.starts_line((special_target + normal_target) * - ws^0 * #(':' * -P('=')))) + ws^0 * #(':' * -P('=')))) lex:add_style('target', lexer.STYLE_LABEL) -- Variables. @@ -52,21 +52,22 @@ local implicit_var = word_match[[ DESTDIR MAKE MAKEFLAGS MAKEOVERRIDES MFLAGS ]] * #(ws^0 * assign) local computed_var = token(lexer.OPERATOR, '$' * S('({')) * - token(lexer.FUNCTION, word_match[[ - -- Functions for String Substitution and Analysis. - subst patsubst strip findstring filter filter-out sort word wordlist words - firstword lastword - -- Functions for File Names. - dir notdir suffix basename addsuffix addprefix join wildcard realpath abspath - -- Functions for Conditionals. - if or and - -- Miscellaneous Functions. - foreach call value eval origin flavor shell - -- Functions That Control Make. - error warning info -]]) + token(lexer.FUNCTION, word_match[[ + -- Functions for String Substitution and Analysis. + subst patsubst strip findstring filter filter-out sort word wordlist words + firstword lastword + -- Functions for File Names. + dir notdir suffix basename addsuffix addprefix join wildcard realpath + abspath + -- Functions for Conditionals. + if or and + -- Miscellaneous Functions. + foreach call value eval origin flavor shell + -- Functions That Control Make. + error warning info + ]]) local variable = token(lexer.VARIABLE, expanded_var + auto_var + special_var + - implicit_var) + computed_var + implicit_var) + computed_var lex:add_rule('variable', variable) -- Operators. @@ -76,14 +77,14 @@ lex:add_rule('operator', token(lexer.OPERATOR, assign + S(':$(){}'))) lex:add_rule('identifier', token(lexer.IDENTIFIER, word_char^1)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#'))) -- Embedded Bash. local bash = lexer.load('bash') bash:modify_rule('variable', token(lexer.VARIABLE, '$$' * word_char^1) + - bash:get_rule('variable') + variable) + bash:get_rule('variable') + variable) local bash_start_rule = token(lexer.WHITESPACE, P('\t')) + - token(lexer.OPERATOR, P(';')) + token(lexer.OPERATOR, P(';')) local bash_end_rule = token(lexer.WHITESPACE, P('\n')) lex:embed(bash, bash_start_rule, bash_end_rule) diff --git a/lexlua/man.lua b/lexlua/man.lua index efe765480..3a875f6b9 100644 --- a/lexlua/man.lua +++ b/lexlua/man.lua @@ -11,19 +11,16 @@ local lex = lexer.new('man') lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Markup. -lex:add_rule('rule1', token(lexer.STRING, '.' * ('B' * P('R')^-1 + - 'I' * P('PR')^-1) * - lexer.nonnewline^0)) -lex:add_rule('rule2', token(lexer.NUMBER, '.' * S('ST') * 'H' * - lexer.nonnewline^0)) +lex:add_rule('rule1', token(lexer.STRING, '.' * + lexer.to_eol('B' * P('R')^-1 + 'I' * P('PR')^-1))) +lex:add_rule('rule2', token(lexer.NUMBER, lexer.to_eol('.' * S('ST') * 'H'))) lex:add_rule('rule3', token(lexer.KEYWORD, P('.br') + '.DS' + '.RS' + '.RE' + - '.PD')) + '.PD')) lex:add_rule('rule4', token(lexer.LABEL, '.' * (S('ST') * 'H' + '.TP'))) lex:add_rule('rule5', token(lexer.VARIABLE, '.B' * P('R')^-1 + - '.I' * S('PR')^-1 + - '.PP')) + '.I' * S('PR')^-1 + '.PP')) lex:add_rule('rule6', token(lexer.TYPE, '\\f' * S('BIPR'))) lex:add_rule('rule7', token(lexer.PREPROCESSOR, lexer.starts_line('.') * - lexer.alpha^1)) + lexer.alpha^1)) return lex diff --git a/lexlua/markdown.lua b/lexlua/markdown.lua index e4bba29a0..cac5c8322 100644 --- a/lexlua/markdown.lua +++ b/lexlua/markdown.lua @@ -8,58 +8,40 @@ local P, R, S = lpeg.P, lpeg.R, lpeg.S local lex = lexer.new('markdown') -- Block elements. -lex:add_rule('header', - token('h6', lexer.starts_line('######') * lexer.nonnewline^0) + - token('h5', lexer.starts_line('#####') * lexer.nonnewline^0) + - token('h4', lexer.starts_line('####') * lexer.nonnewline^0) + - token('h3', lexer.starts_line('###') * lexer.nonnewline^0) + - token('h2', lexer.starts_line('##') * lexer.nonnewline^0) + - token('h1', lexer.starts_line('#') * lexer.nonnewline^0)) -local font_size = lexer.property_int['fontsize'] > 0 and - lexer.property_int['fontsize'] or 10 -local hstyle = 'fore:$(color.red)' -lex:add_style('h6', hstyle) -lex:add_style('h5', hstyle..',size:'..(font_size + 1)) -lex:add_style('h4', hstyle..',size:'..(font_size + 2)) -lex:add_style('h3', hstyle..',size:'..(font_size + 3)) -lex:add_style('h2', hstyle..',size:'..(font_size + 4)) -lex:add_style('h1', hstyle..',size:'..(font_size + 5)) - -lex:add_rule('blockquote', - token(lexer.STRING, - lpeg.Cmt(lexer.starts_line(S(' \t')^0 * '>'), - function(input, index) - local _, e = input:find('\n[ \t]*\r?\n', index) - return (e or #input) + 1 - end))) - -lex:add_rule('list', token('list', lexer.starts_line(S(' \t')^0 * (S('*+-') + - R('09')^1 * '.')) * - S(' \t'))) +local function h(n) + return token('h' .. n, lexer.to_eol(lexer.starts_line(string.rep('#', n)))) +end +lex:add_rule('header', h(6) + h(5) + h(4) + h(3) + h(2) + h(1)) +local function add_header_style(n) + local font_size = lexer.property_int['fontsize'] > 0 and + lexer.property_int['fontsize'] or 10 + lex:add_style('h' .. n, 'fore:$(color.red),size:' .. (font_size + (6 - n))) +end +for i = 1, 6 do add_header_style(i) end + +lex:add_rule('blockquote', token(lexer.STRING, + lpeg.Cmt(lexer.starts_line(S(' \t')^0 * '>'), function(input, index) + local _, e = input:find('\n[ \t]*\r?\n', index) + return (e or #input) + 1 + end))) + +lex:add_rule('list', token('list', + lexer.starts_line(S(' \t')^0 * (S('*+-') + R('09')^1 * '.')) * S(' \t'))) lex:add_style('list', lexer.STYLE_CONSTANT) -lex:add_rule('block_code', - token('code', lexer.starts_line(P(' ')^4 + P('\t')) * -P('<') * - lexer.nonnewline^0 * lexer.newline^-1) + - token('code', lexer.starts_line(P('```')) * (lexer.any - '```')^0 * - P('```')^-1)) -lex:add_rule('inline_code', - token('code', P('``') * (lexer.any - '``')^0 * P('``')^-1 + - lexer.delimited_range('`', false, true))) -lex:add_style('code', lexer.STYLE_EMBEDDED..',eolfilled') - -lex:add_rule('hr', - token('hr', - lpeg.Cmt(lexer.starts_line(S(' \t')^0 * lpeg.C(S('*-_'))), - function(input, index, c) - local line = input:match('[^\r\n]*', index) - line = line:gsub('[ \t]', '') - if line:find('[^'..c..']') or #line < 2 then - return nil - end - return (select(2, input:find('\r?\n', index)) or - #input) + 1 - end))) +local code_line = lexer.to_eol(lexer.starts_line(P(' ')^4 + '\t') * -P('<')) * + lexer.newline^-1 +local code_block = lexer.range(lexer.starts_line('```'), '```') +local code_inline = lexer.range('``') + lexer.range('`', false, false) +lex:add_rule('block_code', token('code', code_line + code_block + code_inline)) +lex:add_style('code', lexer.STYLE_EMBEDDED .. ',eolfilled') + +lex:add_rule('hr', token('hr', lpeg.Cmt( + lexer.starts_line(S(' \t')^0 * lpeg.C(S('*-_'))), function(input, index, c) + local line = input:match('[^\r\n]*', index):gsub('[ \t]', '') + if line:find('[^' .. c .. ']') or #line < 2 then return nil end + return (select(2, input:find('\r?\n', index)) or #input) + 1 + end))) lex:add_style('hr', 'back:$(color.black),eolfilled') -- Whitespace. @@ -69,23 +51,22 @@ lex:add_rule('whitespace', ws) -- Span elements. lex:add_rule('escape', token(lexer.DEFAULT, P('\\') * 1)) -lex:add_rule('link_label', - token('link_label', lexer.delimited_range('[]') * ':') * ws * - token('link_url', (lexer.any - lexer.space)^1) * - (ws * token(lexer.STRING, lexer.delimited_range('"', false, true) + - lexer.delimited_range("'", false, true) + - lexer.delimited_range('()')))^-1) +local ref_link_label = token('link_label', lexer.range('[', ']', true) * ':') +local ref_link_url = token('link_url', (lexer.any - lexer.space)^1) +local ref_link_title = token(lexer.STRING, lexer.range('"', true, false) + + lexer.range("'", true, false) + lexer.range('(', ')', true)) +lex:add_rule('link_label', ref_link_label * ws * ref_link_url * + (ws * ref_link_title)^-1) lex:add_style('link_label', lexer.STYLE_LABEL) lex:add_style('link_url', 'underlined') -lex:add_rule('link', - token('link', P('!')^-1 * lexer.delimited_range('[]') * - (P('(') * (lexer.any - S(') \t'))^0 * - (S(' \t')^1 * - lexer.delimited_range('"', false, true))^-1 * ')' + - S(' \t')^0 * lexer.delimited_range('[]')) + - 'http' * P('s')^-1 * '://' * - (lexer.any - lexer.space)^1)) +local link_label = P('!')^-1 * lexer.range('[', ']', true) +local link_target = P('(') * (lexer.any - S(') \t'))^0 * + (S(' \t')^1 * lexer.range('"', false, false))^-1 * ')' +local link_ref = S(' \t')^0 * lexer.range('[', ']', true) +local link_url = 'http' * P('s')^-1 * '://' * (lexer.any - lexer.space)^1 +lex:add_rule('link', token('link', link_label * (link_target + link_ref) + + link_url)) lex:add_style('link', 'underlined') local punct_space = lexer.punct + lexer.space @@ -96,29 +77,27 @@ local punct_space = lexer.punct + lexer.space local function flanked_range(s, not_inword) local fl_char = lexer.any - s - lexer.space local left_fl = lpeg.B(punct_space - s) * s * #fl_char + - s * #(fl_char - lexer.punct) + s * #(fl_char - lexer.punct) local right_fl = lpeg.B(lexer.punct) * s * #(punct_space - s) + - lpeg.B(fl_char) * s + lpeg.B(fl_char) * s return left_fl * (lexer.any - (not_inword and s * #punct_space or s))^0 * - right_fl + right_fl end -lex:add_rule('strong', - token('strong', flanked_range('**') + - (lpeg.B(punct_space) + #lexer.starts_line('_')) * - flanked_range('__', true) * #(punct_space + -1))) +lex:add_rule('strong', token('strong', flanked_range('**') + + (lpeg.B(punct_space) + #lexer.starts_line('_')) * flanked_range('__', true) * + #(punct_space + -1))) lex:add_style('strong', 'bold') -lex:add_rule('em', - token('em', flanked_range('*') + - (lpeg.B(punct_space) + #lexer.starts_line('_')) * - flanked_range('_', true) * #(punct_space + -1))) +lex:add_rule('em', token('em', flanked_range('*') + + (lpeg.B(punct_space) + #lexer.starts_line('_')) * flanked_range('_', true) * + #(punct_space + -1))) lex:add_style('em', 'italics') -- Embedded HTML. local html = lexer.load('html') local start_rule = lexer.starts_line(S(' \t')^0) * #P('<') * - html:get_rule('element') + html:get_rule('element') local end_rule = token(lexer.DEFAULT, P('\n')) -- TODO: lexer.WHITESPACE errors lex:embed(html, start_rule, end_rule) diff --git a/lexlua/matlab.lua b/lexlua/matlab.lua index d371ebc96..7800a421c 100644 --- a/lexlua/matlab.lua +++ b/lexlua/matlab.lua @@ -54,23 +54,22 @@ lex:add_rule('variable', token(lexer.VARIABLE, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + - lexer.delimited_range('"') + - lexer.delimited_range('`'))) +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"') +local bq_str = lexer.range('`') +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + bq_str)) -- Comments. -local line_comment = (P('%') + '#') * lexer.nonnewline^0 -local block_comment = '%{' * (lexer.any - '%}')^0 * P('%}')^-1 +local line_comment = lexer.to_eol(P('%') + '#') +local block_comment = lexer.range('%{', '%}') lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer + - lexer.dec_num + lexer.hex_num + - lexer.oct_num)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, - S('!%^&*()[]{}-=+/\\|:;.,?<>~`´'))) + S('!%^&*()[]{}-=+/\\|:;.,?<>~`´'))) -- Fold points. lex:add_fold_point(lexer.KEYWORD, 'if', 'end') diff --git a/lexlua/mediawiki.lua b/lexlua/mediawiki.lua index 6a8a3a704..27a7409d8 100644 --- a/lexlua/mediawiki.lua +++ b/lexlua/mediawiki.lua @@ -9,18 +9,15 @@ local P, R, S, B = lpeg.P, lpeg.R, lpeg.S, lpeg.B local lex = lexer.new('mediawiki') -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '<!--' * (lexer.any - '-->')^0 * - P('-->')^-1)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.range('<!--', '-->'))) -- HTML-like tags local tag_start = token('tag_start', '<' * P('/')^-1 * lexer.alnum^1 * - lexer.space^0) + lexer.space^0) local tag_attr = token('tag_attr', lexer.alpha^1 * lexer.space^0 * - ('=' * lexer.space^0 * - ('"' * ((lexer.any - S('>"\\')) + - ('\\' * lexer.any))^0 * '"' + - (lexer.any - lexer.space - '>')^0)^-1)^0 * - lexer.space^0) + ('=' * lexer.space^0 * + ('"' * ((lexer.any - S('>"\\')) + ('\\' * lexer.any))^0 * '"' + + (lexer.any - lexer.space - '>')^0)^-1)^0 * lexer.space^0) local tag_end = token('tag_end', P('/')^-1 * '>') lex:add_rule('tag', tag_start * tag_attr^0 * tag_end) lex:add_style('tag_start', lexer.STYLE_KEYWORD) @@ -30,18 +27,17 @@ lex:add_style('tag_end', lexer.STYLE_KEYWORD) -- Link lex:add_rule('link', token(lexer.STRING, S('[]'))) lex:add_rule('internal_link', B('[[') * - token('link_article', (lexer.any - '|' - ']]')^1)) -lex:add_style('link_article', lexer.STYLE_STRING..',underlined') + token('link_article', (lexer.any - '|' - ']]')^1)) +lex:add_style('link_article', lexer.STYLE_STRING .. ',underlined') -- Templates and parser functions. lex:add_rule('template', token(lexer.OPERATOR, S('{}'))) lex:add_rule('parser_func', B('{{') * - token('parser_func', P('#') * lexer.alpha^1 + - lexer.upper^1 * ':')) + token('parser_func', P('#') * lexer.alpha^1 + lexer.upper^1 * ':')) lex:add_rule('template_name', B('{{') * - token('template_name', (lexer.any - S('{}|'))^1)) + token('template_name', (lexer.any - S('{}|'))^1)) lex:add_style('parser_func', lexer.STYLE_FUNCTION) -lex:add_style('template_name', lexer.STYLE_OPERATOR..',underlined') +lex:add_style('template_name', lexer.STYLE_OPERATOR .. ',underlined') -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('-=|#~!'))) @@ -49,10 +45,9 @@ lex:add_rule('operator', token(lexer.OPERATOR, S('-=|#~!'))) -- Behavior switches local start_pat = P(function(_, pos) return pos == 1 end) lex:add_rule('behavior_switch', (B(lexer.space) + start_pat) * - token('behavior_switch', - '__' * (P('TOC') + 'FORCETOC' + 'NOTOC' + - 'NOEDITSECTION' + 'NOCC' + - 'NOINDEX') * '__') * #lexer.space) + token('behavior_switch', '__' * + (P('TOC') + 'FORCETOC' + 'NOTOC' + 'NOEDITSECTION' + 'NOCC' + 'NOINDEX') * + '__') * #lexer.space) lex:add_style('behavior_switch', lexer.STYLE_KEYWORD) return lex diff --git a/lexlua/moonscript.lua b/lexlua/moonscript.lua index 933130b74..d57cd55a9 100644 --- a/lexlua/moonscript.lua +++ b/lexlua/moonscript.lua @@ -110,24 +110,25 @@ lex:add_style('proper_ident', lexer.STYLE_CLASS) lex:add_style('tbl_key', lexer.STYLE_REGEX) local longstring = lpeg.Cmt('[' * lpeg.C(P('=')^0) * '[', - function(input, index, eq) - local _, e = input:find(']'..eq..']', index, true) - return (e or #input) + 1 - end) + function(input, index, eq) + local _, e = input:find(']' .. eq .. ']', index, true) + return (e or #input) + 1 + end) -- Strings. -local sq_str = lexer.delimited_range("'", false, true) -local dq_str = lexer.delimited_range('"', false, true) +local sq_str = lexer.range("'", false, false) +local dq_str = lexer.range('"', false, false) lex:add_rule('string', token(lexer.STRING, sq_str + dq_str) + - token('longstring', longstring)) + token('longstring', longstring)) lex:add_style('longstring', lexer.STYLE_STRING) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '--' * (longstring + - lexer.nonnewline^0))) +local line_comment = lexer.to_eol('--') +local block_comment = '--' * longstring +lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Function definition. lex:add_rule('fndef', token('fndef', P('->') + '=>')) diff --git a/lexlua/myrddin.lua b/lexlua/myrddin.lua index e836a0b24..1277468c4 100644 --- a/lexlua/myrddin.lua +++ b/lexlua/myrddin.lua @@ -26,17 +26,14 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Comments. -local line_comment = '//' * lexer.nonnewline_esc^0 -local block_comment = P{ - V'part' * P'*/'^-1, - part = '/*' * (V'full' + (lexer.any - '/*' - '*/'))^0, - full = V'part' * '*/', -} +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/', false, false, true) lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + - lexer.delimited_range('"', true))) +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Numbers. local digit = lexer.digit + '_' @@ -44,8 +41,9 @@ local bdigit = R'01' + '_' local xdigit = lexer.xdigit + '_' local odigit = R'07' + '_' local integer = '0x' * xdigit^1 + '0o' * odigit^1 + '0b' * bdigit^1 + digit^1 -local float = digit^1 * (('.' * digit^1) * (S'eE' * S'+-'^-1 * digit^1)^-1 + - ('.' * digit^1)^-1 * S'eE' * S'+-'^-1 * digit^1) +local float = digit^1 * ( + ('.' * digit^1) * (S'eE' * S'+-'^-1 * digit^1)^-1 + + ('.' * digit^1)^-1 * S'eE' * S'+-'^-1 * digit^1) lex:add_rule('number', token(lexer.NUMBER, float + integer)) -- Operators. diff --git a/lexlua/nemerle.lua b/lexlua/nemerle.lua index f12cae300..196d0f028 100644 --- a/lexlua/nemerle.lua +++ b/lexlua/nemerle.lua @@ -28,20 +28,20 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[ ]])) -- Strings. -local sq_str = P('L')^-1 * lexer.delimited_range("'", true) -local dq_str = P('L')^-1 * lexer.delimited_range('"', true) +local sq_str = P('L')^-1 * lexer.range("'", true) +local dq_str = P('L')^-1 * lexer.range('"', true) lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Identifiers. lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Comments. -local line_comment = '//' * lexer.nonnewline_esc^0 -local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Preprocessor. local preproc_word = word_match[[ @@ -49,7 +49,7 @@ local preproc_word = word_match[[ undef using warning ]] lex:add_rule('preproc', token(lexer.PREPROCESSOR, lexer.starts_line('#') * - S('\t ')^0 * preproc_word)) + S('\t ')^0 * preproc_word)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}'))) diff --git a/lexlua/nim.lua b/lexlua/nim.lua index 203ed3628..8840aa8fc 100644 --- a/lexlua/nim.lua +++ b/lexlua/nim.lua @@ -69,18 +69,17 @@ lex:add_rule('constant', token(lexer.CONSTANT, word_match[[ ]])) -- Strings. -local sq_str = lexer.delimited_range("'", true) -local dq_str = lexer.delimited_range('"', true) -local triple_dq_str = '"""' * (lexer.any - '"""')^0 * P('"""')^-1 -local raw_dq_str = 'r' * lexer.delimited_range('"', false, true) -lex:add_rule('string', token(lexer.STRING, triple_dq_str + sq_str + dq_str + - raw_dq_str)) +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +local tq_str = lexer.range('"""') +local raw_str = 'r' * lexer.range('"', false, false) +lex:add_rule('string', token(lexer.STRING, tq_str + sq_str + dq_str + raw_str)) -- Identifiers. lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline_esc^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#', true))) -- Numbers. local dec = lexer.digit^1 * ('_' * lexer.digit^1)^0 @@ -88,14 +87,14 @@ local hex = '0' * S('xX') * lexer.xdigit^1 * ('_' * lexer.xdigit^1)^0 local bin = '0' * S('bB') * S('01')^1 * ('_' * S('01')^1)^0 local oct = '0o' * R('07')^1 local integer = S('+-')^-1 * (bin + hex + oct + dec) * - ("'" * S('iIuUfF') * (P('8') + '16' + '32' + '64'))^-1 + ("'" * S('iIuUfF') * (P('8') + '16' + '32' + '64'))^-1 local float = lexer.digit^1 * ('_' * lexer.digit^1)^0 * - ('.' * ('_' * lexer.digit)^0)^-1 * S('eE') * S('+-')^-1 * - lexer.digit^1 * ('_' * lexer.digit^1)^0 + ('.' * ('_' * lexer.digit)^0)^-1 * S('eE') * S('+-')^-1 * lexer.digit^1 * + ('_' * lexer.digit^1)^0 lex:add_rule('number', token(lexer.NUMBER, lexer.float + integer)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, - S('=+-*/<>@$~&%|!?^.:\\`()[]{},;'))) + S('=+-*/<>@$~&%|!?^.:\\`()[]{},;'))) return lex diff --git a/lexlua/nsis.lua b/lexlua/nsis.lua index 466550bee..39be082d3 100644 --- a/lexlua/nsis.lua +++ b/lexlua/nsis.lua @@ -12,14 +12,15 @@ local lex = lexer.new('nsis') lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Comments (4.1). -local line_comment = (P(';') + '#') * lexer.nonnewline^0 -local block_comment = '/*' * (lexer.any - '*/')^0 * '*/' +local line_comment = lexer.to_eol(P(';') + '#') +local block_comment = lexer.range('/*', '*/') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") + - lexer.delimited_range('"') + - lexer.delimited_range('`'))) +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') +local bq_str = lexer.range('`') +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + bq_str)) -- Constants (4.2.3). lex:add_rule('constant', token(lexer.CONSTANT, word_match[[ diff --git a/lexlua/objective_c.lua b/lexlua/objective_c.lua index 48aaaa1d3..348b2a9b8 100644 --- a/lexlua/objective_c.lua +++ b/lexlua/objective_c.lua @@ -32,20 +32,20 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[ ]])) -- Strings. -local sq_str = P('L')^-1 * lexer.delimited_range("'", true) -local dq_str = P('L')^-1 * lexer.delimited_range('"', true) +local sq_str = P('L')^-1 * lexer.range("'", true) +local dq_str = P('L')^-1 * lexer.range('"', true) lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Identifiers. lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Comments. -local line_comment = '//' * lexer.nonnewline_esc^0 -local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Preprocessor. local preproc_word = word_match[[ @@ -53,8 +53,7 @@ local preproc_word = word_match[[ warning ]] lex:add_rule('preprocessor', #lexer.starts_line('#') * - token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * - preproc_word)) + token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * preproc_word)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}'))) diff --git a/lexlua/pascal.lua b/lexlua/pascal.lua index e5db67982..05cb3b5c0 100644 --- a/lexlua/pascal.lua +++ b/lexlua/pascal.lua @@ -38,23 +38,21 @@ lex:add_rule('type', token(lexer.TYPE, word_match([[ ]], true))) -- Strings. -lex:add_rule('string', token(lexer.STRING, - S('uUrR')^-1 * - lexer.delimited_range("'", true, true))) +lex:add_rule('string', token(lexer.STRING, S('uUrR')^-1 * + lexer.range("'", true, false))) -- Identifiers. lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Comments. -local line_comment = '//' * lexer.nonnewline_esc^0 -local bblock_comment = '{' * (lexer.any - '}')^0 * P('}')^-1 -local pblock_comment = '(*' * (lexer.any - '*)')^0 * P('*)')^-1 +local line_comment = lexer.to_eol('//', true) +local bblock_comment = lexer.range('{', '}') +local pblock_comment = lexer.range('(*', '*)') lex:add_rule('comment', token(lexer.COMMENT, line_comment + bblock_comment + - pblock_comment)) + pblock_comment)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) * - S('LlDdFf')^-1)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('LlDdFf')^-1)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('.,;^@:=<>+-/*()[]'))) diff --git a/lexlua/perl.lua b/lexlua/perl.lua index 6686dcaf0..819b2a1a3 100644 --- a/lexlua/perl.lua +++ b/lexlua/perl.lua @@ -21,7 +21,7 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ -- Markers. lex:add_rule('marker', token(lexer.COMMENT, word_match[[__DATA__ __END__]] * - lexer.any^0)) + lexer.any^0)) -- Functions. lex:add_rule('function', token(lexer.FUNCTION, word_match[[ @@ -46,22 +46,22 @@ lex:add_rule('function', token(lexer.FUNCTION, word_match[[ ]])) local delimiter_matches = {['('] = ')', ['['] = ']', ['{'] = '}', ['<'] = '>'} -local literal_delimitted = P(function(input, index) -- for single delimiter sets +local literal_delimited = P(function(input, index) -- for single delimiter sets local delimiter = input:sub(index, index) if not delimiter:find('%w') then -- only non alpha-numerics local match_pos, patt if delimiter_matches[delimiter] then -- Handle nested delimiter/matches in strings. local s, e = delimiter, delimiter_matches[delimiter] - patt = lexer.delimited_range(s..e, false, false, true) + patt = lexer.range(s, e, false, true, true) else - patt = lexer.delimited_range(delimiter) + patt = lexer.range(delimiter) end match_pos = lpeg.match(patt, input, index) return match_pos or #input + 1 end end) -local literal_delimitted2 = P(function(input, index) -- for 2 delimiter sets +local literal_delimited2 = P(function(input, index) -- for 2 delimiter sets local delimiter = input:sub(index, index) -- Only consider non-alpha-numerics and non-spaces as delimiters. The -- non-spaces are used to ignore operators like "-s". @@ -70,9 +70,9 @@ local literal_delimitted2 = P(function(input, index) -- for 2 delimiter sets if delimiter_matches[delimiter] then -- Handle nested delimiter/matches in strings. local s, e = delimiter, delimiter_matches[delimiter] - patt = lexer.delimited_range(s..e, false, false, true) + patt = lexer.range(s, e, false, true, true) else - patt = lexer.delimited_range(delimiter) + patt = lexer.range(delimiter) end first_match_pos = lpeg.match(patt, input, index) final_match_pos = lpeg.match(patt, input, first_match_pos - 1) @@ -84,50 +84,47 @@ local literal_delimitted2 = P(function(input, index) -- for 2 delimiter sets end) -- Strings. -local sq_str = lexer.delimited_range("'") -local dq_str = lexer.delimited_range('"') -local cmd_str = lexer.delimited_range('`') +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') +local cmd_str = lexer.range('`') local heredoc = '<<' * P(function(input, index) local s, e, delimiter = input:find('([%a_][%w_]*)[\n\r\f;]+', index) if s == index and delimiter then local end_heredoc = '[\n\r\f]+' - local _, e = input:find(end_heredoc..delimiter, e) + local _, e = input:find(end_heredoc .. delimiter, e) return e and e + 1 or #input + 1 end end) -local lit_str = 'q' * P('q')^-1 * literal_delimitted -local lit_array = 'qw' * literal_delimitted -local lit_cmd = 'qx' * literal_delimitted -local lit_tr = (P('tr') + 'y') * literal_delimitted2 * S('cds')^0 +local lit_str = 'q' * P('q')^-1 * literal_delimited +local lit_array = 'qw' * literal_delimited +local lit_cmd = 'qx' * literal_delimited +local lit_tr = (P('tr') + 'y') * literal_delimited2 * S('cds')^0 +local string = token(lexer.STRING, sq_str + dq_str + cmd_str + heredoc + + lit_str + lit_array + lit_cmd + lit_tr) local regex_str = #P('/') * lexer.last_char_includes('-<>+*!~\\=%&|^?:;([{') * - lexer.delimited_range('/', true) * S('imosx')^0 -local lit_regex = 'qr' * literal_delimitted * S('imosx')^0 -local lit_match = 'm' * literal_delimitted * S('cgimosx')^0 -local lit_sub = 's' * literal_delimitted2 * S('ecgimosx')^0 -lex:add_rule('string', - token(lexer.STRING, sq_str + dq_str + cmd_str + heredoc + lit_str + - lit_array + lit_cmd + lit_tr) + - token(lexer.REGEX, regex_str + lit_regex + lit_match + lit_sub)) + lexer.range('/', true) * S('imosx')^0 +local lit_regex = 'qr' * literal_delimited * S('imosx')^0 +local lit_match = 'm' * literal_delimited * S('cgimosx')^0 +local lit_sub = 's' * literal_delimited2 * S('ecgimosx')^0 +local regex = token(lexer.REGEX, regex_str + lit_regex + lit_match + lit_sub) +lex:add_rule('string', string + regex) -- Identifiers. lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Comments. -local line_comment = '#' * lexer.nonnewline_esc^0 -local block_comment = lexer.starts_line('=') * lexer.alpha * - (lexer.any - lexer.newline * '=cut')^0 * - (lexer.newline * '=cut')^-1 +local line_comment = lexer.to_eol('#', true) +local block_comment = lexer.range(lexer.starts_line('=' * lexer.alpha), + lexer.starts_line('=cut')) lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Variables. -local special_var = '$' * ('^' * S('ADEFHILMOPSTWX')^-1 + - S('\\"[]\'&`+*.,;=%~?@<>(|/!-') + - ':' * (lexer.any - ':') + - P('$') * -lexer.word + - lexer.digit^1) +local special_var = '$' * ( + '^' * S('ADEFHILMOPSTWX')^-1 + S('\\"[]\'&`+*.,;=%~?@<>(|/!-') + + ':' * (lexer.any - ':') + P('$') * -lexer.word + lexer.digit^1) local plain_var = ('$#' + S('$@%')) * P('$')^0 * lexer.word + '$#' lex:add_rule('variable', token(lexer.VARIABLE, special_var + plain_var)) diff --git a/lexlua/php.lua b/lexlua/php.lua index adf7ef432..257bc955d 100644 --- a/lexlua/php.lua +++ b/lexlua/php.lua @@ -22,7 +22,7 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ ]])) local word = (lexer.alpha + '_' + R('\127\255')) * - (lexer.alnum + '_' + R('\127\255'))^0 + (lexer.alnum + '_' + R('\127\255'))^0 -- Identifiers. lex:add_rule('identifier', token(lexer.IDENTIFIER, word)) @@ -31,26 +31,26 @@ lex:add_rule('identifier', token(lexer.IDENTIFIER, word)) lex:add_rule('variable', token(lexer.VARIABLE, '$' * word)) -- Strings. -local sq_str = lexer.delimited_range("'") -local dq_str = lexer.delimited_range('"') -local bt_str = lexer.delimited_range('`') +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') +local bq_str = lexer.range('`') local heredoc = '<<<' * P(function(input, index) local _, e, delimiter = input:find('([%a_][%w_]*)[\n\r\f]+', index) if delimiter then - local _, e = input:find('[\n\r\f]+'..delimiter, e) + local _, e = input:find('[\n\r\f]+' .. delimiter, e) return e and e + 1 end end) -lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + bt_str + heredoc)) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + bq_str + heredoc)) -- TODO: interpolated code. -- Comments. -local line_comment = (P('//') + '#') * lexer.nonnewline^0 -local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +local line_comment = lexer.to_eol(P('//') + '#') +local block_comment = lexer.range('/*', '*/') lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('!@%^*&()-+=|/?.,;:<>[]{}'))) diff --git a/lexlua/pico8.lua b/lexlua/pico8.lua index 3e82aa9e1..c0061277c 100644 --- a/lexlua/pico8.lua +++ b/lexlua/pico8.lua @@ -20,7 +20,7 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Comments -lex:add_rule('comment', token(lexer.COMMENT, '//' * lexer.nonnewline_esc^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('//', true))) -- Numbers lex:add_rule('number', token(lexer.NUMBER, lexer.integer)) diff --git a/lexlua/pike.lua b/lexlua/pike.lua index c17d1b4b4..3dff044ac 100644 --- a/lexlua/pike.lua +++ b/lexlua/pike.lua @@ -29,21 +29,21 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + - lexer.delimited_range('"', true) + - '#' * lexer.delimited_range('"'))) +local sq_str = lexer.range("'", true) +local dq_str = P('#')^-1 * lexer.range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '//' * lexer.nonnewline_esc^0 + - lexer.nested_pair('/*', '*/'))) +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/', false, false, true) +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) * - S('lLdDfF')^-1)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('lLdDfF')^-1)) -- Preprocessors. -lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, lexer.starts_line('#') * - lexer.nonnewline^0)) +lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, + lexer.to_eol(lexer.starts_line('#')))) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('<>=!+-/*%&|^~@`.,:;()[]{}'))) diff --git a/lexlua/pkgbuild.lua b/lexlua/pkgbuild.lua index 759cf55ec..7916487c3 100644 --- a/lexlua/pkgbuild.lua +++ b/lexlua/pkgbuild.lua @@ -11,24 +11,24 @@ local lex = lexer.new('pkgbuild') lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#'))) -- Strings. -local sq_str = lexer.delimited_range("'", false, true) -local dq_str = lexer.delimited_range('"') -local ex_str = lexer.delimited_range('`') +local sq_str = lexer.range("'", false, false) +local dq_str = lexer.range('"') +local ex_str = lexer.range('`') local heredoc = '<<' * P(function(input, index) - local s, e, _, delimiter = - input:find('(["\']?)([%a_][%w_]*)%1[\n\r\f;]+', index) + local s, e, _, delimiter = input:find('(["\']?)([%a_][%w_]*)%1[\n\r\f;]+', + index) if s == index and delimiter then - local _, e = input:find('[\n\r\f]+'..delimiter, e) + local _, e = input:find('[\n\r\f]+' .. delimiter, e) return e and e + 1 or #input + 1 end end) lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + ex_str + heredoc)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Keywords. lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ @@ -59,14 +59,14 @@ lex:add_rule('constant', token(lexer.CONSTANT, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Variables. -lex:add_rule('variable', token(lexer.VARIABLE, - '$' * (S('!#?*@$') + - lexer.delimited_range('()', true, true) + - lexer.delimited_range('[]', true, true) + - lexer.delimited_range('{}', true, true) + - lexer.delimited_range('`', true, true) + - lexer.digit^1 + - lexer.word))) +local symbol = S('!#?*@$') +local parens = lexer.range('(', ')', true) +local brackets = lexer.range('[', ']', true) +local braces = lexer.range('{', '}', true) +local backticks = lexer.range('`', true, false) +local number = lexer.digit^1 +lex:add_rule('variable', token(lexer.VARIABLE, '$' * + (symbol + parens + brackets + braces + backticks + number + lexer.word))) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/*^~.,:;?()[]{}'))) diff --git a/lexlua/powershell.lua b/lexlua/powershell.lua index b0ef56224..d69f8f328 100644 --- a/lexlua/powershell.lua +++ b/lexlua/powershell.lua @@ -12,7 +12,7 @@ local lex = lexer.new('powershell') lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#'))) -- Keywords. lex:add_rule('keyword', token(lexer.KEYWORD, word_match([[ @@ -44,15 +44,14 @@ lex:add_rule('type', token(lexer.KEYWORD, '[' * word_match([[ ]], true) * ']')) -- Variables. -lex:add_rule('variable', token(lexer.VARIABLE, - '$' * (lexer.digit^1 + lexer.word + - lexer.delimited_range('{}', true, true)))) +lex:add_rule('variable', token(lexer.VARIABLE, '$' * + (lexer.digit^1 + lexer.word + lexer.range('{', '}', true)))) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"', true))) +lex:add_rule('string', token(lexer.STRING, lexer.range('"', true))) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/*^&|~.,:;?()[]{}%`'))) diff --git a/lexlua/prolog.lua b/lexlua/prolog.lua index 79a3fbe10..c65748311 100644 --- a/lexlua/prolog.lua +++ b/lexlua/prolog.lua @@ -31,7 +31,7 @@ local P, R, S, B, V, C = lpeg.P, lpeg.R, lpeg.S, lpeg.B, lpeg.V, lpeg.C local lex = lexer.new('prolog') local dialects = setmetatable({gprolog = 'gprolog', swipl = 'swipl'}, - {__index = function(_, _) return 'iso' end}) + {__index = function(_, _) return 'iso' end}) local dialect = dialects[lexer.property['prolog.dialect']] -- Directives. @@ -71,10 +71,10 @@ directives.swipl = directives.iso .. [[ module multifile op reexport thread_local use_module volatile ]] lex:add_rule('directive', - token(lexer.WHITESPACE, lexer.starts_line(S(' \t'))^0) * - token(lexer.OPERATOR, P':-') * - token(lexer.WHITESPACE, S(' \t')^0) * - token(lexer.PREPROCESSOR, P(word_match(directives[dialect])))) + token(lexer.WHITESPACE, lexer.starts_line(S(' \t'))^0) * + token(lexer.OPERATOR, P':-') * + token(lexer.WHITESPACE, S(' \t')^0) * + token(lexer.PREPROCESSOR, P(word_match(directives[dialect])))) -- Whitespace. lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) @@ -278,10 +278,9 @@ one_plus_arity_keywords.swipl = [[ set_random prolog_stack_property put_char unload_file nb_setval put_byte current_signal put_code write_length string read_string text_to_string ]] -lex:add_rule('keyword', - token(lexer.KEYWORD, word_match(zero_arity_keywords[dialect]) + - (word_match(one_plus_arity_keywords[dialect]) * - #(P'(')))) +lex:add_rule('keyword', token(lexer.KEYWORD, + word_match(zero_arity_keywords[dialect]) + + (word_match(one_plus_arity_keywords[dialect]) * #(P'(')))) -- BIFs. local bifs = {} @@ -311,16 +310,15 @@ local decimal_group = S('+-')^-1 * (lexer.digit + '_')^1 local binary_number = '0b' * (S('01') + '_')^1 local character_code = '0\'' * S('\\')^-1 * (lexer.print - lexer.space) local decimal_number = decimal_group * ('.' * decimal_group)^-1 * - ('e' * decimal_group)^-1 + ('e' * decimal_group)^-1 local hexadecimal_number = '0x' * (lexer.xdigit + '_')^1 local octal_number = '0o' * (S('01234567') + '_')^1 lex:add_rule('number', token(lexer.NUMBER, character_code + binary_number + - hexadecimal_number + octal_number + - decimal_number)) + hexadecimal_number + octal_number + decimal_number)) -- Comments. -local line_comment = '%' * lexer.nonnewline^0 -local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +local line_comment = lexer.to_eol('%') +local block_comment = lexer.range('/*', '*/') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Operators. @@ -338,18 +336,18 @@ operators.swipl = [[ initialization rem ]] lex:add_rule('operator', token(lexer.OPERATOR, word_match(operators[dialect]) + - S('-!+\\|=:;&<>()[]{}/*^@?.'))) + S('-!+\\|=:;&<>()[]{}/*^@?.'))) -- Variables. -lex:add_rule('variable', - token(lexer.VARIABLE, (lexer.upper + '_') * - (lexer.word^1 + lexer.digit^1 + P('_')^1)^0)) +lex:add_rule('variable', token(lexer.VARIABLE, (lexer.upper + '_') * + (lexer.word^1 + lexer.digit^1 + P('_')^1)^0)) -- Identifiers. lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + - lexer.delimited_range('"', true))) +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) return lex diff --git a/lexlua/props.lua b/lexlua/props.lua index 74281a8dd..ab0bde27b 100644 --- a/lexlua/props.lua +++ b/lexlua/props.lua @@ -12,22 +12,22 @@ lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Colors. lex:add_rule('color', token('color', '#' * lexer.xdigit * lexer.xdigit * - lexer.xdigit * lexer.xdigit * - lexer.xdigit * lexer.xdigit)) + lexer.xdigit * lexer.xdigit * lexer.xdigit * lexer.xdigit)) lex:add_style('color', lexer.STYLE_NUMBER) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#'))) -- Equals. lex:add_rule('equals', token(lexer.OPERATOR, '=')) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") + - lexer.delimited_range('"'))) +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Variables. -lex:add_rule('variable', token(lexer.VARIABLE, '$(' * (lexer.any - ')')^1 * - ')')) +lex:add_rule('variable', token(lexer.VARIABLE, '$' * + lexer.range('(', ')', true))) return lex diff --git a/lexlua/protobuf.lua b/lexlua/protobuf.lua index c90778407..71284dbdf 100644 --- a/lexlua/protobuf.lua +++ b/lexlua/protobuf.lua @@ -24,20 +24,20 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[ ]])) -- Strings. -local sq_str = P('L')^-1 * lexer.delimited_range("'", true) -local dq_str = P('L')^-1 * lexer.delimited_range('"', true) +local sq_str = P('L')^-1 * lexer.range("'", true) +local dq_str = P('L')^-1 * lexer.range('"', true) lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Identifiers. lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Comments. -local line_comment = '//' * lexer.nonnewline_esc^0 -local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('<>=|;,.()[]{}'))) diff --git a/lexlua/ps.lua b/lexlua/ps.lua index 255a92339..8ebde45fd 100644 --- a/lexlua/ps.lua +++ b/lexlua/ps.lua @@ -28,15 +28,15 @@ local word = (lexer.alpha + '-') * (lexer.alnum + '-')^0 lex:add_rule('identifier', token(lexer.IDENTIFIER, word)) -- Strings. -local arrow_string = lexer.delimited_range('<>') -local nested_string = lexer.delimited_range('()', false, false, true) +local arrow_string = lexer.range('<', '>') +local nested_string = lexer.range('(', ')', false, false, true) lex:add_rule('string', token(lexer.STRING, arrow_string + nested_string)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '%' * lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('%'))) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Labels. lex:add_rule('label', token(lexer.LABEL, '/' * word)) diff --git a/lexlua/pure.lua b/lexlua/pure.lua index db75233b7..5e8f04aa9 100644 --- a/lexlua/pure.lua +++ b/lexlua/pure.lua @@ -11,8 +11,8 @@ local lex = lexer.new('pure') lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Comments. -local line_comment = '//' * lexer.nonnewline^0 -local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +local line_comment = lexer.to_eol('//') +local block_comment = lexer.range('/*', '*/') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Pragmas. @@ -45,6 +45,6 @@ lex:add_rule('operator', token(lexer.OPERATOR, dots + punct)) lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"', true))) +lex:add_rule('string', token(lexer.STRING, lexer.range('"', true))) return lex diff --git a/lexlua/python.lua b/lexlua/python.lua index 58c6ba308..72e70d70e 100644 --- a/lexlua/python.lua +++ b/lexlua/python.lua @@ -73,19 +73,17 @@ lex:add_style('self', lexer.STYLE_TYPE) lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline_esc^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#', true))) -- Strings. -local sq_str = P('u')^-1 * lexer.delimited_range("'", true) -local dq_str = P('U')^-1 * lexer.delimited_range('"', true) -local triple_sq_str = "'''" * (lexer.any - "'''")^0 * P("'''")^-1 -local triple_dq_str = '"""' * (lexer.any - '"""')^0 * P('"""')^-1 +local sq_str = P('u')^-1 * lexer.range("'", true) +local dq_str = P('U')^-1 * lexer.range('"', true) +local tq_str = lexer.range("'''") + lexer.range('"""') -- TODO: raw_strs cannot end in single \. -local raw_sq_str = P('u')^-1 * 'r' * lexer.delimited_range("'", false, true) -local raw_dq_str = P('U')^-1 * 'R' * lexer.delimited_range('"', false, true) -lex:add_rule('string', token(lexer.STRING, triple_sq_str + triple_dq_str + - sq_str + dq_str + raw_sq_str + - raw_dq_str)) +local raw_sq_str = P('u')^-1 * 'r' * lexer.range("'", false, false) +local raw_dq_str = P('U')^-1 * 'R' * lexer.range('"', false, false) +lex:add_rule('string', token(lexer.STRING, tq_str + sq_str + dq_str + + raw_sq_str + raw_dq_str)) -- Numbers. local dec = lexer.digit^1 * S('Ll')^-1 @@ -95,7 +93,7 @@ local integer = S('+-')^-1 * (bin + lexer.hex_num + oct + dec) lex:add_rule('number', token(lexer.NUMBER, lexer.float + integer)) -- Decorators. -lex:add_rule('decorator', token('decorator', '@' * lexer.nonnewline^0)) +lex:add_rule('decorator', token('decorator', lexer.to_eol('@'))) lex:add_style('decorator', lexer.STYLE_PREPROCESSOR) -- Operators. diff --git a/lexlua/rc.lua b/lexlua/rc.lua index 8c257c6fb..3639cc556 100644 --- a/lexlua/rc.lua +++ b/lexlua/rc.lua @@ -20,32 +20,31 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -local str = lexer.delimited_range("'", false, true) +local str = lexer.range("'", false, false) local heredoc = '<<' * P(function(input, index) local s, e, _, delimiter = input:find('[ \t]*(["\']?)([%w!"%%+,-./:?@_~]+)%1', - index) + index) if s == index and delimiter then delimiter = delimiter:gsub('[%%+-.?]', '%%%1') - local _, e = input:find('[\n\r]'..delimiter..'[\n\r]', e) + local _, e = input:find('[\n\r]' .. delimiter .. '[\n\r]', e) return e and e + 1 or #input + 1 end end) lex:add_rule('string', token(lexer.STRING, str + heredoc)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#'))) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.integer + lexer.float)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Variables. lex:add_rule('variable', token(lexer.VARIABLE, '$' * S('"#')^-1 * - ('*' + lexer.digit^1 + - lexer.word))) + ('*' + lexer.digit^1 + lexer.word))) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('@`=!<>*&^|;?()[]{}') + - '\\\n')) + '\\\n')) -- Fold points. lex:add_fold_point(lexer.OPERATOR, '{', '}') diff --git a/lexlua/rebol.lua b/lexlua/rebol.lua index 7cc8a2186..5994a4cd5 100644 --- a/lexlua/rebol.lua +++ b/lexlua/rebol.lua @@ -11,9 +11,8 @@ local lex = lexer.new('rebol') lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Comments. -local line_comment = ';' * lexer.nonnewline^0; -local block_comment = 'comment' * P(' ')^-1 * - lexer.delimited_range('{}', false, true) +local line_comment = lexer.to_eol(';') +local block_comment = 'comment' * P(' ')^-1 * lexer.range('{', '}') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Keywords. @@ -80,12 +79,13 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ -- Identifiers. lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alpha + '-') * - (lexer.alnum + '-')^0)) + (lexer.alnum + '-')^0)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"', true) + - lexer.delimited_range('{}') + - "'" * lexer.word)) +local dq_str = lexer.range('"', true) +local br_str = lexer.range('{', '}', false, false, true) +local word_str = "'" * lexer.word +lex:add_rule('string', token(lexer.STRING, dq_str + br_str + word_str)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('=<>+/*:()[]'))) diff --git a/lexlua/rest.lua b/lexlua/rest.lua index 3d7177311..a4060a8bb 100644 --- a/lexlua/rest.lua +++ b/lexlua/rest.lua @@ -15,11 +15,11 @@ local any_indent = S(' \t')^0 local adornment_chars = lpeg.C(S('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~')) local adornment = lpeg.C(adornment_chars^2 * any_indent) * (l.newline + -1) local overline = lpeg.Cmt(starts_line(adornment), function(input, index, adm, c) - if not adm:find('^%'..c..'+%s*$') then return nil end + if not adm:find('^%' .. c .. '+%s*$') then return nil end local rest = input:sub(index) local lines = 1 for line, e in rest:gmatch('([^\r\n]+)()') do - if lines > 1 and line:match('^(%'..c..'+)%s*$') == adm then + if lines > 1 and line:match('^(%' .. c .. '+)%s*$') == adm then return index + e - 1 end if lines > 3 or #line > #adm then return nil end @@ -28,7 +28,7 @@ local overline = lpeg.Cmt(starts_line(adornment), function(input, index, adm, c) return #input + 1 end) local underline = lpeg.Cmt(starts_line(adornment), function(_, index, adm, c) - local pos = adm:match('^%'..c..'+()%s*$') + local pos = adm:match('^%' .. c .. '+()%s*$') return pos and index - #adm + pos - 1 or nil end) -- Token needs to be a predefined one in order for folder to work. @@ -37,16 +37,15 @@ local title = token(l.CONSTANT, overline + underline) -- Lists. local bullet_list = S('*+-') -- TODO: '•‣⁃', as lpeg does not support UTF-8 local enum_list = P('(')^-1 * - (l.digit^1 + S('ivxlcmIVXLCM')^1 + l.alnum + '#') * S('.)') + (l.digit^1 + S('ivxlcmIVXLCM')^1 + l.alnum + '#') * S('.)') local field_list = ':' * (l.any - ':')^1 * P(':')^-1 local option_word = l.alnum * (l.alnum + '-')^0 local option = S('-/') * option_word * (' ' * option_word)^-1 + - '--' * option_word * ('=' * option_word)^-1 + '--' * option_word * ('=' * option_word)^-1 local option_list = option * (',' * l.space^1 * option)^-1 local list = #(l.space^0 * (S('*+-:/') + enum_list)) * - starts_line(token('list', l.space^0 * (option_list + bullet_list + - enum_list + field_list) * - l.space)) + starts_line(token('list', l.space^0 * + (option_list + bullet_list + enum_list + field_list) * l.space)) -- Literal block. local block = P('::') * (l.newline + -1) * function(input, index) @@ -55,7 +54,7 @@ local block = P('::') * (l.newline + -1) * function(input, index) for pos, indent, line in rest:gmatch('()[ \t]*()([^\r\n]+)') do local no_indent = (indent - pos < level and line ~= ' ' or level == 0) local quoted = no_indent and line:find(quote or '^%s*%W') - if quoted and not quote then quote = '^%s*%'..line:match('^%s*(%W)') end + if quoted and not quote then quote = '^%s*%' .. line:match('^%s*(%W)') end if no_indent and not quoted and pos > 1 then return index + pos - 1 end end return #input + 1 @@ -74,8 +73,7 @@ local footnote = token('footnote_block', prefix * footnote_label * l.space) local citation_label = '[' * word * ']' local citation = token('citation_block', prefix * citation_label * l.space) local link = token('link_block', prefix * '_' * - (l.delimited_range('`') + (P('\\') * 1 + - l.nonnewline - ':')^1) * ':' * l.space) + (l.range('`') + (P('\\') * 1 + l.nonnewline - ':')^1) * ':' * l.space) local markup_block = #prefix * starts_line(footnote + citation + link) -- Directives. @@ -102,8 +100,8 @@ local directive_type = word_match({ 'include', 'raw', 'class', 'role', 'default-role', 'title', 'restructuredtext-test-directive', }, '-') -local known_directive = token('directive', - prefix * directive_type * '::' * l.space) +local known_directive = token('directive', prefix * directive_type * '::' * + l.space) local sphinx_directive_type = word_match({ -- The TOC tree. 'toctree', @@ -115,12 +113,12 @@ local sphinx_directive_type = word_match({ -- Miscellaneous 'sectionauthor', 'index', 'only', 'tabularcolumns' }, '-') -local sphinx_directive = token('sphinx_directive', - prefix * sphinx_directive_type * '::' * l.space) -local unknown_directive = token('unknown_directive', - prefix * word * '::' * l.space) +local sphinx_directive = token('sphinx_directive', prefix * + sphinx_directive_type * '::' * l.space) +local unknown_directive = token('unknown_directive', prefix * word * '::' * + l.space) local directive = #prefix * starts_line(known_directive + sphinx_directive + - unknown_directive) + unknown_directive) -- Sphinx code block. local indented_block = function(input, index) @@ -134,42 +132,37 @@ local indented_block = function(input, index) return #input + 1 end local code_block = prefix * 'code-block::' * S(' \t')^1 * l.nonnewline^0 * - (l.newline + -1) * indented_block + (l.newline + -1) * indented_block local sphinx_block = #prefix * token('code_block', starts_line(code_block)) -- Substitution definitions. -local substitution = #prefix * - token('substitution', - starts_line(prefix * l.delimited_range('|') * - l.space^1 * word * '::' * l.space)) +local substitution = #prefix * token('substitution', + starts_line(prefix * l.range('|') * l.space^1 * word * '::' * l.space)) -- Comments. -local line_comment = prefix * l.nonnewline^0 +local line_comment = l.to_eol(prefix) local bprefix = any_indent * '..' local block_comment = bprefix * l.newline * indented_block -local comment = #bprefix * - token(l.COMMENT, starts_line(line_comment + block_comment)) +local comment = #bprefix * token(l.COMMENT, + starts_line(line_comment + block_comment)) -- Inline markup. -local em = token('em', l.delimited_range('*')) -local strong = token('strong', ('**' * (l.any - '**')^0 * P('**')^-1)) +local em = token('em', l.range('*')) +local strong = token('strong', l.range('**', '**')) local role = token('role', ':' * word * ':' * (word * ':')^-1) -local interpreted = role^-1 * token('interpreted', l.delimited_range('`')) * - role^-1 -local inline_literal = token('inline_literal', - '``' * (l.any - '``')^0 * P('``')^-1) -local link_ref = token('link', - (word + l.delimited_range('`')) * '_' * P('_')^-1 + - '_' * l.delimited_range('`')) +local interpreted = role^-1 * token('interpreted', l.range('`')) * role^-1 +local inline_literal = token('inline_literal', l.range('``', '``')) +local postfix_link = (word + l.range('`')) * '_' * P('_')^-1 +local prefix_link = '_' * l.range('`') +local link_ref = token('link', postfix_link + prefix_link) local footnote_ref = token('footnote', footnote_label * '_') local citation_ref = token('citation', citation_label * '_') -local substitution_ref = token('substitution', l.delimited_range('|', true) * - ('_' * P('_')^-1)^-1) +local substitution_ref = token('substitution', l.range('|', true) * + ('_' * P('_')^-1)^-1) local link = token('link', l.alpha * (l.alnum + S('-.'))^1 * ':' * - (l.alnum + S('/.+-%@'))^1) + (l.alnum + S('/.+-%@'))^1) local inline_markup = (strong + em + inline_literal + link_ref + interpreted + - footnote_ref + citation_ref + substitution_ref + link) * - -l.alnum + footnote_ref + citation_ref + substitution_ref + link) * -l.alnum -- Other. local non_space = token(l.DEFAULT, l.alnum * (l.any - l.space)^0) @@ -193,14 +186,14 @@ M._rules = { M._tokenstyles = { list = l.STYLE_TYPE, - literal_block = l.STYLE_EMBEDDED..',eolfilled', + literal_block = l.STYLE_EMBEDDED .. ',eolfilled', footnote_block = l.STYLE_LABEL, citation_block = l.STYLE_LABEL, link_block = l.STYLE_LABEL, directive = l.STYLE_KEYWORD, - sphinx_directive = l.STYLE_KEYWORD..',bold', - unknown_directive = l.STYLE_KEYWORD..',italics', - code_block = l.STYLE_EMBEDDED..',eolfilled', + sphinx_directive = l.STYLE_KEYWORD .. ',bold', + unknown_directive = l.STYLE_KEYWORD .. ',italics', + code_block = l.STYLE_EMBEDDED .. ',eolfilled', substitution = l.STYLE_VARIABLE, strong = 'bold', em = 'italics', @@ -219,7 +212,7 @@ local sphinx_levels = { -- Section-based folding. M._fold = function(text, start_pos, start_line, start_level) local folds, line_starts = {}, {} - for pos in (text..'\n'):gmatch('().-\r?\n') do + for pos in (text .. '\n'):gmatch('().-\r?\n') do line_starts[#line_starts + 1] = pos end local style_at, CONSTANT, level = l.style_at, l.CONSTANT, start_level @@ -231,7 +224,7 @@ M._fold = function(text, start_pos, start_line, start_level) local c = text:sub(pos, pos) local line_num = start_line + i - 1 folds[line_num] = level - if style_at[start_pos + pos] == CONSTANT and c:find('^[^%w%s]') then + if style_at[start_pos + pos - 1] == CONSTANT and c:find('^[^%w%s]') then local sphinx_level = FOLD_BASE + (sphinx_levels[c] or #sphinx_levels) level = not sphinx and level - 1 or sphinx_level if level < FOLD_BASE then level = FOLD_BASE end @@ -249,11 +242,11 @@ l.property['fold.by.sphinx.convention'] = '0' --[[ Embedded languages. local bash = l.load('bash') local bash_indent_level -local start_rule = #(prefix * 'code-block' * '::' * l.space^1 * 'bash' * - (l.newline + -1)) * sphinx_directive * - token('bash_begin', P(function(input, index) - bash_indent_level = #input:match('^([ \t]*)', index) - return index - end))]] +local start_rule = + #(prefix * 'code-block' * '::' * l.space^1 * 'bash' * (l.newline + -1)) * + sphinx_directive * token('bash_begin', P(function(input, index) + bash_indent_level = #input:match('^([ \t]*)', index) + return index + end))]] return M diff --git a/lexlua/rexx.lua b/lexlua/rexx.lua index 576df8b18..e33a613fc 100644 --- a/lexlua/rexx.lua +++ b/lexlua/rexx.lua @@ -48,20 +48,21 @@ local word = lexer.alpha * (lexer.alnum + S('@#$\\.!?_'))^0 lex:add_rule('identifier', token(lexer.IDENTIFIER, word)) -- Strings. -local sq_str = lexer.delimited_range("'", true, true) -local dq_str = lexer.delimited_range('"', true, true) +local sq_str = lexer.range("'", true, false) +local dq_str = lexer.range('"', true, false) lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '--' * lexer.nonnewline_esc^0 + - lexer.nested_pair('/*', '*/'))) +local line_comment = lexer.to_eol('--', true) +local block_comment = lexer.range('/*', '*/', false, false, true) +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Preprocessor. -lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, lexer.starts_line('#') * - lexer.nonnewline^0)) +lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, + lexer.to_eol(lexer.starts_line('#')))) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/\\*%&|^~.,:;(){}'))) diff --git a/lexlua/rstats.lua b/lexlua/rstats.lua index 978a73c25..d499dc500 100644 --- a/lexlua/rstats.lua +++ b/lexlua/rstats.lua @@ -26,15 +26,15 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + - lexer.delimited_range('"', true))) +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#'))) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) * - P('i')^-1)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number * P('i')^-1)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('<->+*/^=.,:;|$()[]{}'))) diff --git a/lexlua/ruby.lua b/lexlua/ruby.lua index efa2a1779..e25ca6000 100644 --- a/lexlua/ruby.lua +++ b/lexlua/ruby.lua @@ -33,23 +33,22 @@ local word = (lexer.alpha + '_') * word_char^0 lex:add_rule('identifier', token(lexer.IDENTIFIER, word)) -- Comments. -local line_comment = '#' * lexer.nonnewline_esc^0 -local block_comment = lexer.starts_line('=begin') * - (lexer.any - lexer.newline * '=end')^0 * - (lexer.newline * '=end')^-1 +local line_comment = lexer.to_eol('#', true) +local block_comment = lexer.range(lexer.starts_line('=begin'), + lexer.starts_line('=end')) lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment)) local delimiter_matches = {['('] = ')', ['['] = ']', ['{'] = '}'} -local literal_delimitted = P(function(input, index) +local literal_delimited = P(function(input, index) local delimiter = input:sub(index, index) if not delimiter:find('[%w\r\n\f\t ]') then -- only non alpha-numerics local match_pos, patt if delimiter_matches[delimiter] then -- Handle nested delimiter/matches in strings. local s, e = delimiter, delimiter_matches[delimiter] - patt = lexer.delimited_range(s..e, false, false, true) + patt = lexer.range(s, e, false, true, true) else - patt = lexer.delimited_range(delimiter) + patt = lexer.range(delimiter) end match_pos = lpeg.match(patt, input, index) return match_pos or #input + 1 @@ -57,29 +56,29 @@ local literal_delimitted = P(function(input, index) end) -- Strings. -local cmd_str = lexer.delimited_range('`') -local lit_cmd = '%x' * literal_delimitted -local lit_array = '%w' * literal_delimitted -local sq_str = lexer.delimited_range("'") -local dq_str = lexer.delimited_range('"') -local lit_str = '%' * S('qQ')^-1 * literal_delimitted +local cmd_str = lexer.range('`') +local lit_cmd = '%x' * literal_delimited +local lit_array = '%w' * literal_delimited +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') +local lit_str = '%' * S('qQ')^-1 * literal_delimited local heredoc = '<<' * P(function(input, index) - local s, e, indented, _, delimiter = - input:find('(%-?)(["`]?)([%a_][%w_]*)%2[\n\r\f;]+', index) + local s, e, indented, _, delimiter = input:find( + '(%-?)(["`]?)([%a_][%w_]*)%2[\n\r\f;]+', index) if s == index and delimiter then local end_heredoc = (#indented > 0 and '[\n\r\f]+ *' or '[\n\r\f]+') - local _, e = input:find(end_heredoc..delimiter, e) + local _, e = input:find(end_heredoc .. delimiter, e) return e and e + 1 or #input + 1 end end) +local string = token(lexer.STRING, (sq_str + dq_str + lit_str + heredoc + + cmd_str + lit_cmd + lit_array) * S('f')^-1) -- TODO: regex_str fails with `obj.method /patt/` syntax. local regex_str = #P('/') * lexer.last_char_includes('!%^&*([{-=+|:;,?<>~') * - lexer.delimited_range('/', true, false) * S('iomx')^0 -local lit_regex = '%r' * literal_delimitted * S('iomx')^0 -lex:add_rule('string', token(lexer.STRING, (sq_str + dq_str + lit_str + - heredoc + cmd_str + lit_cmd + - lit_array) * S('f')^-1) + - token(lexer.REGEX, regex_str + lit_regex)) + lexer.range('/', true) * S('iomx')^0 +local lit_regex = '%r' * literal_delimited * S('iomx')^0 +local regex = token(lexer.REGEX, regex_str + lit_regex) +lex:add_rule('string', string + regex) -- Numbers. local dec = lexer.digit^1 * ('_' * lexer.digit^1)^0 * S('ri')^-1 @@ -88,15 +87,15 @@ local integer = S('+-')^-1 * (bin + lexer.hex_num + lexer.oct_num + dec) -- TODO: meta, control, etc. for numeric_literal. local numeric_literal = '?' * (lexer.any - lexer.space) * -word_char lex:add_rule('number', token(lexer.NUMBER, lexer.float * S('ri')^-1 + integer + - numeric_literal)) + numeric_literal)) -- Variables. local global_var = '$' * (word + S('!@L+`\'=~/\\,.;<>_*"$?:') + lexer.digit + - '-' * S('0FadiIKlpvw')) + '-' * S('0FadiIKlpvw')) local class_var = '@@' * word local inst_var = '@' * word lex:add_rule('variable', token(lexer.VARIABLE, global_var + class_var + - inst_var)) + inst_var)) -- Symbols. lex:add_rule('symbol', token('symbol', ':' * P(function(input, index) @@ -110,7 +109,7 @@ lex:add_rule('operator', token(lexer.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~'))) -- Fold points. local function disambiguate(text, pos, line, s) return line:sub(1, s - 1):match('^%s*$') and - not text:sub(1, pos - 1):match('\\[ \t]*\r?\n$') and 1 or 0 + not text:sub(1, pos - 1):match('\\[ \t]*\r?\n$') and 1 or 0 end lex:add_fold_point(lexer.KEYWORD, 'begin', 'end') lex:add_fold_point(lexer.KEYWORD, 'class', 'end') diff --git a/lexlua/rust.lua b/lexlua/rust.lua index 7546e8c6d..427b12d04 100644 --- a/lexlua/rust.lua +++ b/lexlua/rust.lua @@ -25,7 +25,7 @@ lex:add_rule('macro', token(lexer.FUNCTION, lexer.word * S("!"))) -- Library types lex:add_rule('library', token(lexer.LABEL, lexer.upper * - (lexer.lower + lexer.dec_num)^1)) + (lexer.lower + lexer.dec_num)^1)) -- Numbers. local identifier = P('r#')^-1 * lexer.word @@ -37,22 +37,21 @@ end local function opt_cap(patt) return C(patt^-1) end -local float = decimal_literal * - (Cmt(opt_cap('.' * decimal_literal) * - opt_cap(S('eE') * S('+-')^-1 * integer_suffix(digit)) * - opt_cap(P('f32') + 'f64'), - function (input, index, decimals, exponent, type) - return decimals ~= "" or exponent ~= "" or type ~= "" - end) + - '.' * -(S('._') + identifier)) +local float = decimal_literal * (Cmt( + opt_cap('.' * decimal_literal) * opt_cap(S('eE') * S('+-')^-1 * + integer_suffix(digit)) * opt_cap(P('f32') + 'f64'), + function (input, index, decimals, exponent, type) + return decimals ~= "" or exponent ~= "" or type ~= "" + end) + '.' * -(S('._') + identifier)) local function prefixed_integer(prefix, digit) return P(prefix) * integer_suffix(digit) end -local integer = (prefixed_integer('0b', S('01')) + - prefixed_integer('0o', R('07')) + - prefixed_integer('0x', lexer.xdigit) + - decimal_literal) * - (S('iu') * (P('8') + '16' + '32' + '64' + '128' + 'size'))^-1 +local integer = ( + prefixed_integer('0b', S('01')) + + prefixed_integer('0o', R('07')) + + prefixed_integer('0x', lexer.xdigit) + + decimal_literal +) * (S('iu') * (P('8') + '16' + '32' + '64' + '128' + 'size'))^-1 lex:add_rule('number', token(lexer.NUMBER, float + integer)) -- Types. @@ -61,31 +60,30 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[ ]])) -- Strings. -local sq_str = P('b')^-1 * lexer.delimited_range("'", true) -local dq_str = P('b')^-1 * lexer.delimited_range('"') +local sq_str = P('b')^-1 * lexer.range("'", true) +local dq_str = P('b')^-1 * lexer.range('"') local raw_str = Cmt(P('b')^-1 * P('r') * C(P('#')^0) * '"', - function(input, index, hashes) - local _, e = input:find('"'..hashes, index, true) - return (e or #input) + 1 - end) + function(input, index, hashes) + local _, e = input:find('"' .. hashes, index, true) + return (e or #input) + 1 + end) lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + raw_str)) -- Identifiers. lex:add_rule('identifier', token(lexer.IDENTIFIER, identifier)) -- Comments. -local line_comment = '//' * lexer.nonnewline_esc^0 -local block_comment = lexer.nested_pair('/*', '*/') +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/', false, false, true) lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) +-- Attributes. +lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, '#' * + lexer.range('[', ']', true))) + -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, - S('+-/*%<>!=`^~@&|?#~:;,.()[]{}'))) - --- Attributes. -lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, - "#[" * (lexer.nonnewline - ']')^0 * - P("]")^-1)) + S('+-/*%<>!=`^~@&|?#~:;,.()[]{}'))) -- Fold points. lex:add_fold_point(lexer.COMMENT, '/*', '*/') diff --git a/lexlua/sass.lua b/lexlua/sass.lua index 1c6d8640f..02dcf75c3 100644 --- a/lexlua/sass.lua +++ b/lexlua/sass.lua @@ -9,7 +9,7 @@ local P, S = lpeg.P, lpeg.S local lex = lexer.new('sass', {inherit = lexer.load('css')}) -- Line comments. -lex:add_rule('line_comment', token(lexer.COMMENT, '//' * lexer.nonnewline^0)) +lex:add_rule('line_comment', token(lexer.COMMENT, lexer.to_eol('//'))) -- Variables. lex:add_rule('variable', token(lexer.VARIABLE, '$' * (lexer.alnum + S('_-'))^1)) diff --git a/lexlua/scala.lua b/lexlua/scala.lua index 38d328b54..f2959396f 100644 --- a/lexlua/scala.lua +++ b/lexlua/scala.lua @@ -13,7 +13,7 @@ lex:add_rule('whitespace', ws) -- Classes. lex:add_rule('class', token(lexer.KEYWORD, P('class')) * ws^1 * - token(lexer.CLASS, lexer.word)) + token(lexer.CLASS, lexer.word)) -- Keywords. lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ @@ -37,18 +37,17 @@ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. local symbol = "'" * lexer.word -local dq_str = lexer.delimited_range('"', true) -local tq_str = '"""' * (lexer.any - '"""')^0 * P('"""')^-1 +local dq_str = lexer.range('"', true) +local tq_str = lexer.range('"""') lex:add_rule('string', token(lexer.STRING, tq_str + symbol + dq_str)) -- Comments. -local line_comment = '//' * lexer.nonnewline_esc^0 -local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) * - S('LlFfDd')^-1)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('LlFfDd')^-1)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}'))) diff --git a/lexlua/scheme.lua b/lexlua/scheme.lua index 1d37c65c6..326f52cf5 100644 --- a/lexlua/scheme.lua +++ b/lexlua/scheme.lua @@ -53,17 +53,17 @@ lex:add_rule('identifier', token(lexer.IDENTIFIER, word)) -- Strings. local literal = (P("'") + '#' * S('\\bdox')) * lexer.word -local dq_str = lexer.delimited_range('"') +local dq_str = lexer.range('"') lex:add_rule('string', token(lexer.STRING, literal + dq_str)) -- Comments. -local line_comment = ';' * lexer.nonnewline^0 -local block_comment = '#|' * (lexer.any - '|#')^0 * P('|#')^-1 +local line_comment = lexer.to_eol(';') +local block_comment = lexer.range('#|', '|#') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Numbers. lex:add_rule('number', token(lexer.NUMBER, P('-')^-1 * lexer.digit^1 * - (S('./') * lexer.digit^1)^-1)) + (S('./') * lexer.digit^1)^-1)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('<>=*/+-`@%:()'))) diff --git a/lexlua/smalltalk.lua b/lexlua/smalltalk.lua index 086ce79e6..0acad115f 100644 --- a/lexlua/smalltalk.lua +++ b/lexlua/smalltalk.lua @@ -24,15 +24,15 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") + - '$' * lexer.word)) +local sq_str = lexer.range("'") +local word_str = '$' * lexer.word +lex:add_rule('string', token(lexer.STRING, sq_str + word_str)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, - lexer.delimited_range('"', false, true))) +lex:add_rule('comment', token(lexer.COMMENT, lexer.range('"', false, false))) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S(':=_<>+-/*!()[]'))) diff --git a/lexlua/sml.lua b/lexlua/sml.lua index e1d00cfe6..9aa4a6922 100644 --- a/lexlua/sml.lua +++ b/lexlua/sml.lua @@ -11,11 +11,11 @@ end local ws = token(lexer.WHITESPACE, lexer.space^1) -- single line comments are valid in successor ml -local cl = '(*)' * lexer.nonnewline^0 -local comment = token(lexer.COMMENT, cl + lexer.nested_pair('(*', '*)')) +local line_comment = lexer.to_eol('(*)') +local block_comment = lexer.range('(*', '*)', false, false, true) +local comment = token(lexer.COMMENT, line_comment + block_comment) -local string = token(lexer.STRING, lpeg.P('#')^-1 * - lexer.delimited_range('"', true)) +local string = token(lexer.STRING, lpeg.P('#')^-1 * lexer.range('"', true)) local function num(digit) return digit * (digit^0 * lpeg.P('_'))^0 * digit^1 + digit @@ -29,15 +29,10 @@ local real = int * frac^-1 * exp + int * frac * exp^-1 local hex = num(lexer.xdigit) local bin = num(lpeg.S('01')) -local number = token(lexer.NUMBER, - lpeg.P('0w') * int - + (lpeg.P('0wx') + lpeg.P('0xw')) * hex - + (lpeg.P('0wb') + lpeg.P('0bw')) * bin - + minus * lpeg.P('0x') * hex - + minus * lpeg.P('0b') * bin - + minus * real - + minus * int -) +local number = token(lexer.NUMBER, lpeg.P('0w') * int + + (lpeg.P('0wx') + lpeg.P('0xw')) * hex + + (lpeg.P('0wb') + lpeg.P('0bw')) * bin + minus * lpeg.P('0x') * hex + + minus * lpeg.P('0b') * bin + minus * real + minus * int) local keyword = token(lexer.KEYWORD, mlword{ 'abstype', 'and', 'andalso', 'as', 'case', 'do', 'datatype', 'else', 'end', @@ -51,7 +46,7 @@ local keyword = token(lexer.KEYWORD, mlword{ -- includes valid symbols for identifiers local operator = token(lexer.OPERATOR, - lpeg.S('!*/+-^:@=<>()[]{},;._|#%&$?~`\\')) + lpeg.S('!*/+-^:@=<>()[]{},;._|#%&$?~`\\')) local type = token(lexer.TYPE, mlword{ 'int', 'real', 'word', 'bool', 'char', 'string', 'unit', @@ -78,14 +73,11 @@ local c = mlword{'true', 'false', 'nil'} local const = token(lexer.CONSTANT, lexer.upper * id + c) local structure = token(lexer.CLASS, aid * lpeg.P('.')) -local open - = token(lexer.KEYWORD, mlword{'open', 'structure', 'functor'}) - * ws * token(lexer.CLASS, longid) +local open = token(lexer.KEYWORD, mlword{'open', 'structure', 'functor'}) * ws * + token(lexer.CLASS, longid) -local struct_dec - = token(lexer.KEYWORD, lpeg.P('structure')) * ws - * token(lexer.CLASS, aid) * ws - * token(lexer.OPERATOR, lpeg.P('=')) * ws +local struct_dec = token(lexer.KEYWORD, lpeg.P('structure')) * ws * + token(lexer.CLASS, aid) * ws * token(lexer.OPERATOR, lpeg.P('=')) * ws local struct_new = struct_dec * token(lexer.KEYWORD, lpeg.P('struct')) local struct_alias = struct_dec * token(lexer.CLASS, longid) diff --git a/lexlua/snobol4.lua b/lexlua/snobol4.lua index ad31aa782..1723a127f 100644 --- a/lexlua/snobol4.lua +++ b/lexlua/snobol4.lua @@ -11,8 +11,8 @@ local M = { _NAME = 'snobol4' } -- Helper patterns. local dotted_id = lexer.word * (P'.' * lexer.word)^0 -local dq_str = lexer.delimited_range('"', true, true) -local sq_str = lexer.delimited_range("'", true, true) +local dq_str = lexer.range('"', true, false) +local sq_str = lexer.range("'", true, false) local branch = B(lexer.space * P':(') * dotted_id * #P')' local sbranch = B(lexer.space * P':' * S'SF' * '(') * dotted_id * #P')' @@ -27,9 +27,9 @@ local bif = token(lexer.FUNCTION, word_match({ 'REVERSE', 'RPAD', 'RSORT', 'SERV_LISTEN', 'SET', 'SETEXIT', 'SIZE', 'SORT', 'SQRT', 'SSET', 'SUBSTR', 'TABLE', 'THAW', 'TIME', 'TRACE', 'TRIM', 'UNLOAD', 'VALUE', 'VDIFFER', -}, '', true) * #lexer.delimited_range('()', false, true, true)) -local comment = token(lexer.COMMENT, - lexer.starts_line(S'*#|;!' * lexer.nonnewline^0)) +}, '', true) * #lexer.range('(', ')', false, false, true)) +local comment = token(lexer.COMMENT, lexer.starts_line(S'*#|;!' * + lexer.nonnewline^0)) local control = token(lexer.PREPROCESSOR, lexer.starts_line(P'-' * lexer.word)) local identifier = token(lexer.DEFAULT, dotted_id) local keyword = token(lexer.KEYWORD, word_match({ @@ -42,7 +42,7 @@ local operator = token(lexer.OPERATOR, S'¬?$.!%*/#+-@⊥&^~\\=') local pattern = lexer.token(lexer.CLASS, word_match({ -- keep distinct 'ABORT', 'ANY', 'ARB', 'ARBNO', 'BAL', 'BREAK', 'BREAKX', 'FAIL', 'FENCE', 'LEN', 'NOTANY', 'POS', 'REM', 'RPOS', 'RTAB', 'SPAN', 'SUCCEED', 'TAB', -}, '', true) * #lexer.delimited_range('()', false, true, true)) +}, '', true) * #lexer.range('(', ')', false, false, true)) local str = token(lexer.STRING, sq_str + dq_str) local target = token(lexer.LABEL, branch + sbranch + sbranchx) local ws = token(lexer.WHITESPACE, lexer.space^1) diff --git a/lexlua/sql.lua b/lexlua/sql.lua index b38e48501..f0049f552 100644 --- a/lexlua/sql.lua +++ b/lexlua/sql.lua @@ -41,17 +41,18 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match([[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") + - lexer.delimited_range('"') + - lexer.delimited_range('`'))) +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') +local bq_str = lexer.range('`') +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + bq_str)) -- Comments. -local line_comment = (P('--') + '#') * lexer.nonnewline^0 -local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +local line_comment = lexer.to_eol(P('--') + '#') +local block_comment = lexer.range('/*', '*/') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S(',()'))) diff --git a/lexlua/taskpaper.lua b/lexlua/taskpaper.lua index de9270286..a7a390c68 100644 --- a/lexlua/taskpaper.lua +++ b/lexlua/taskpaper.lua @@ -19,14 +19,12 @@ local overdue_tag = token('overdue_tag', P('@overdue')) local plain_tag = token('plain_tag', P('@') * lexer.word) -local extended_tag = token('extended_tag', - P('@') * lexer.word * P('(') * - (lexer.word + R('09') + P('-'))^1 * P(')')) +local extended_tag = token('extended_tag', P('@') * lexer.word * P('(') * + (lexer.word + R('09') + P('-'))^1 * P(')')) -- Projects -local project = token('project', - lexer.nested_pair(lexer.starts_line(lexer.alnum), ':') * - lexer.newline) +local project = token('project', lexer.range(lexer.starts_line(lexer.alnum), + ':', false, false, true) * lexer.newline) -- Notes local note = token('note', delimiter^1 * lexer.alnum * lexer.nonnewline^0) diff --git a/lexlua/tcl.lua b/lexlua/tcl.lua index 45e3ccf9d..0f0a0d80a 100644 --- a/lexlua/tcl.lua +++ b/lexlua/tcl.lua @@ -13,11 +13,12 @@ local lex = lexer.new('tcl') lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Comment. -lex:add_rule('comment', token(lexer.COMMENT, '#' * P(function(input, index) - local i = index - 2 - while i > 0 and input:find('^[ \t]', i) do i = i - 1 end - if i < 1 or input:find('^[\r\n;]', i) then return index end -end) * lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#' * + P(function(input, index) + local i = index - 2 + while i > 0 and input:find('^[ \t]', i) do i = i - 1 end + if i < 1 or input:find('^[\r\n;]', i) then return index end + end)))) -- Separator (semicolon). lex:add_rule('separator', token(lexer.CLASS, P(';'))) @@ -32,15 +33,13 @@ lex:add_rule('brackets', token(lexer.VARIABLE, S('[]'))) -- Variable substitution. lex:add_rule('variable', token(lexer.STRING, '$' * - (lexer.alnum + '_' + P(':')^2)^0)) + (lexer.alnum + '_' + P(':')^2)^0)) -- Backslash substitution. -lex:add_rule('backslash', token(lexer.TYPE, - '\\' * (lexer.digit * lexer.digit^-2 + - 'x' * lexer.xdigit^1 + - 'u' * lexer.xdigit * lexer.xdigit^-3 + - 'U' * lexer.xdigit * lexer.xdigit^-7 + - 1))) +local oct = lexer.digit * lexer.digit^-2 +local hex = 'x' * lexer.xdigit^1 +local unicode = 'u' * lexer.xdigit * lexer.xdigit^-3 +lex:add_rule('backslash', token(lexer.TYPE, '\\' * (oct + hex + unicode + 1))) -- Fold points. lex:add_fold_point(lexer.KEYWORD, '{', '}') diff --git a/lexlua/template.txt b/lexlua/template.txt index 730479384..a4dda44c4 100644 --- a/lexlua/template.txt +++ b/lexlua/template.txt @@ -18,14 +18,15 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") + - lexer.delimited_range('"'))) +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#'))) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('+-*/%^=<>,.{}[]()'))) diff --git a/lexlua/tex.lua b/lexlua/tex.lua index 9e707f9be..8c9e76860 100644 --- a/lexlua/tex.lua +++ b/lexlua/tex.lua @@ -12,16 +12,16 @@ local lex = lexer.new('tex') lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '%' * lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('%'))) -- TeX environments. lex:add_rule('environment', token('environment', '\\' * (P('begin') + 'end') * - lexer.word)) + lexer.word)) lex:add_style('environment', lexer.STYLE_KEYWORD) -- Commands. -lex:add_rule('command', token(lexer.KEYWORD, '\\' * - (lexer.alpha^1 + S('#$&~_^%{}')))) +lex:add_rule('command', token(lexer.KEYWORD, '\\' * (lexer.alpha^1 + + S('#$&~_^%{}')))) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('$&#{}[]'))) diff --git a/lexlua/texinfo.lua b/lexlua/texinfo.lua index 89628715e..bb5ba55c6 100644 --- a/lexlua/texinfo.lua +++ b/lexlua/texinfo.lua @@ -79,7 +79,7 @@ local directives_base = word_match([[ bye ]], true) lex:add_rule('directive', token('directives', ('@end' * lexer.space^1 + '@') * - directives_base)) + directives_base)) lex:add_style('directives', lexer.STYLE_FUNCTION) -- Chapters. @@ -103,7 +103,7 @@ local chapters_base = word_match([[ chapheading majorheading heading subheading subsubheading ]], true) lex:add_rule('chapter', token('chapters', ('@end' * lexer.space^1 + '@') * - chapters_base)) + chapters_base)) lex:add_style('chapters', lexer.STYLE_CLASS) -- Common keywords. @@ -175,35 +175,32 @@ local keyword_base = word_match([[ -- not implemented ]], true) lex:add_rule('keyword', token(lexer.KEYWORD, ('@end' * lexer.space^1 + '@') * - keyword_base)) + keyword_base)) + +local nested_braces = lexer.range('{', '}', false, false, true) -- Italics -lex:add_rule('emph', token('emph', - '@emph' * - lexer.delimited_range('{}', false, true, true))) -lex:add_style('emph', lexer.STYLE_STRING..',italics') +lex:add_rule('emph', token('emph', '@emph' * nested_braces)) + +lex:add_style('emph', lexer.STYLE_STRING .. ',italics') -- Bold -lex:add_rule('strong', token('strong', - '@strong' * - lexer.delimited_range('{}', false, true, true))) -lex:add_style('strong', lexer.STYLE_STRING..',bold') +lex:add_rule('strong', token('strong', '@strong' * nested_braces)) +lex:add_style('strong', lexer.STYLE_STRING .. ',bold') -- Identifiers lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, - lexer.delimited_range('{}', false, true, true))) +lex:add_rule('string', token(lexer.STRING, nested_braces)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Comments. -local line_comment = '@c' * lexer.nonnewline_esc^0 ---local line_comment_long = '@comment' * lexer.nonnewline_esc^0 -local block_comment = '@ignore' * (lexer.any - '@end ignore')^0 * - P('@end ignore')^-1 +local line_comment = lexer.to_eol('@c', true) +--local line_comment_long = lexer.to_eol('@comment', true) +local block_comment = lexer.range('@ignore', '@end ignore') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Fold points. diff --git a/lexlua/toml.lua b/lexlua/toml.lua index 7b32c0c1d..ae6835174 100644 --- a/lexlua/toml.lua +++ b/lexlua/toml.lua @@ -9,10 +9,9 @@ local lex = lexer.new('toml', {fold_by_indentation = true}) -- Whitespace lex:add_rule('indent', #lexer.starts_line(S(' \t')) * - (token(lexer.WHITESPACE, ' ') + - token('indent_error', '\t'))^1) + (token(lexer.WHITESPACE, ' ') + token('indent_error', '\t'))^1) lex:add_rule('whitespace', token(lexer.WHITESPACE, S(' \t')^1 + - lexer.newline^1)) + lexer.newline^1)) lex:add_style('indent_error', 'back:%(color.red)') -- kewwords. @@ -22,32 +21,32 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[true false]])) lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") + - lexer.delimited_range('"'))) +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('#'))) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('#=+-,.{}[]()'))) -- Datetime. -lex:add_rule('datetime', - token('timestamp', - lexer.digit * lexer.digit * lexer.digit * lexer.digit * -- yr - '-' * lexer.digit * lexer.digit^-1 * -- month - '-' * lexer.digit * lexer.digit^-1 * -- day - ((S(' \t')^1 + S('tT'))^-1 * -- separator - lexer.digit * lexer.digit^-1 * -- hour - ':' * lexer.digit * lexer.digit * -- minute - ':' * lexer.digit * lexer.digit * -- second - ('.' * lexer.digit^0)^-1 * -- fraction - ('Z' + -- timezone - S(' \t')^0 * S('-+') * lexer.digit * lexer.digit^-1 * - (':' * lexer.digit * lexer.digit)^-1)^-1)^-1)) +local year = lexer.digit * lexer.digit * lexer.digit * lexer.digit +local month = lexer.digit * lexer.digit^-1 +local day = lexer.digit * lexer.digit^-1 +local date = year * '-' * month * '-' * day +local hours = lexer.digit * lexer.digit^-1 +local minutes = lexer.digit * lexer.digit +local seconds = lexer.digit * lexer.digit +local fraction = '.' * lexer.digit^0 +local time = hours * ':' * minutes * ':' * seconds * fraction^-1 +local T = S(' \t')^1 + S('tT') +local zone = 'Z' + S(' \t')^0 * S('-+') * hours * (':' * minutes)^-1 +lex:add_rule('datetime', token('timestamp', date * (T * time * zone^-1))) lex:add_style('timestamp', lexer.STYLE_NUMBER) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) return lex diff --git a/lexlua/txt2tags.lua b/lexlua/txt2tags.lua index 828b8b8fe..1fca8a695 100644 --- a/lexlua/txt2tags.lua +++ b/lexlua/txt2tags.lua @@ -16,81 +16,51 @@ local ws = token(lexer.WHITESPACE, (lexer.space - lexer.newline)^1) -- Titles local alphanumeric = R('AZ') + R('az') + R('09') + P('_') + P('-') local header_label = token('header_label_start', '[') * - token('header_label', alphanumeric^1) * - token('header_label_end', ']') -local header = (token('h5', ('=====' * (lexer.nonnewline - '=')^1 * '=====') + - ('+++++' * (lexer.nonnewline - '+')^1 * '+++++')) * - header_label^-1) + - (token('h4', ('====' * (lexer.nonnewline - '=')^1 * '====') + - ('++++' * (lexer.nonnewline - '+')^1 * '++++')) * - header_label^-1) + - (token('h3', ('===' * (lexer.nonnewline - '=')^1 * '===') + - ('+++' * (lexer.nonnewline - '+')^1 * '+++')) * - header_label^-1) + - (token('h2', ('==' * (lexer.nonnewline - '=')^1 * '==') + - ('++' * (lexer.nonnewline - '+')^1 * '++')) * - header_label^-1) + - (token('h1', ('=' * (lexer.nonnewline - '=')^1 * '=') + - ('+' * (lexer.nonnewline - '+')^1 * '+')) * - header_label^-1) + token('header_label', alphanumeric^1) * token('header_label_end', ']') +local function h(level) + local equal = string.rep('=', level) * (lexer.nonnewline - '=')^1 * + string.rep('=', level) + local plus = string.rep('+', level) * (lexer.nonnewline - '+')^1 * + string.rep('+', level) + return token('h' .. level, equal + plus) * header_label^-1 +end +local header = h(5) + h(4) + h(3) + h(2) + h(1) -- Comments. -local line_comment = lexer.starts_line('%') * lexer.nonnewline^0 -local block_comment = lexer.starts_line('%%%') * - (lexer.space - lexer.newline)^0 * lexer.newline * - (lexer.any - '%%%')^0 * lexer.starts_line('%%%')^-1 +local line_comment = lexer.to_eol(lexer.starts_line('%')) +local block_comment = lexer.range(lexer.starts_line('%%%')) local comment = token(lexer.COMMENT, block_comment + line_comment) -- Inline. -local bold = token('bold', ('**' * nonspace * '**' * S('*')^0) + - ('**' * nonspace * - (lexer.nonnewline - (nonspace * '**'))^0 * - nonspace * '**' * S('*')^0)) -local italic = token('italic', ('//' * nonspace * '//' * S('/')^0) + - ('//' * nonspace * - (lexer.nonnewline - (nonspace * '//'))^0 * - nonspace * '//' * S('/')^0)) -local underline = token('underline', ('__' * nonspace * '__' * S('_')^0) + - ('__' * nonspace * - (lexer.nonnewline - (nonspace * '__'))^0 * - nonspace * '__' * S('_')^0)) -local strike = token('strike', ('--' * nonspace * '--' * S('-')^0) + - ('--' * nonspace * - (lexer.nonnewline - (nonspace * '--'))^0 * - nonspace * '--' * S('-')^0)) -local mono = token('mono', ('``' * nonspace * '``' * S('`')^0) + - ('``' * nonspace * - (lexer.nonnewline - (nonspace * '``'))^0 * - nonspace * '``' * S('`')^0)) -local raw = token('raw', ('""' * nonspace * '""' * S('"')^0) + - ('""' * nonspace * - (lexer.nonnewline - (nonspace * '""'))^0 * nonspace * - '""' * S('"')^0)) -local tagged = token('tagged', ('\'\'' * nonspace * '\'\'' * S('\'')^0) + - ('\'\'' * nonspace * - (lexer.nonnewline - (nonspace * '\'\''))^0 * - nonspace * '\'\'' * S('\'')^0)) +local function span(name, delimiter) + return token(name, (delimiter * nonspace * delimiter * S(delimiter)^0) + ( + delimiter * nonspace * (lexer.nonnewline - nonspace * delimiter)^0 * + nonspace * delimiter * S(delimiter)^0)) +end +local bold = span('bold', '**') +local italic = span('italic', '//') +local underline = span('underline', '__') +local strike = span('strike', '--') +local mono = span('mono', '``') +local raw = span('raw', '""') +local tagged = span('tagged', "''") local inline = bold + italic + underline + strike + mono + raw + tagged -- Link. local email = token('email', (nonspace - '@')^1 * '@' * (nonspace - '.')^1 * - ('.' * (nonspace - '.' - '?')^1)^1 * - ('?' * nonspace^1)^-1) + ('.' * (nonspace - '.' - '?')^1)^1 * ('?' * nonspace^1)^-1) local host = token('host', (P('www') + P('WWW') + P('ftp') + P('FTP')) * - (nonspace - '.')^0 * '.' * (nonspace - '.')^1 * '.' * - (nonspace - ',' - '.')^1) + (nonspace - '.')^0 * '.' * (nonspace - '.')^1 * '.' * + (nonspace - ',' - '.')^1) local url = token('url', (nonspace - '://')^1 * '://' * - (nonspace - ',' - '.')^1 * - ('.' * (nonspace - ',' - '.' - '/' - '?' - '#')^1)^1 * - ('/' * (nonspace - '.' - '/' - '?' - '#')^0 * - ('.' * (nonspace - ',' - '.' - '?' - '#')^1)^0)^0 * - ('?' * (nonspace - '#')^1)^-1 * ('#' * nonspace^0)^-1) -local label_with_address = token('label_start', '[') * - lexer.space^0 * - token('address_label', ((nonspace - ']')^1 * - lexer.space^1)^1) * - token('address', (nonspace - ']')^1) * - token('label_end', ']') + (nonspace - ',' - '.')^1 * + ('.' * (nonspace - ',' - '.' - '/' - '?' - '#')^1)^1 * + ('/' * (nonspace - '.' - '/' - '?' - '#')^0 * + ('.' * (nonspace - ',' - '.' - '?' - '#')^1)^0)^0 * + ('?' * (nonspace - '#')^1)^-1 * ('#' * nonspace^0)^-1) +local label_with_address = token('label_start', '[') * lexer.space^0 * + token('address_label', ((nonspace - ']')^1 * lexer.space^1)^1) * + token('address', (nonspace - ']')^1) * token('label_end', ']') local link = label_with_address + url + host + email -- Line. @@ -98,54 +68,43 @@ local line = token('line', (P('-') + P('=') + P('_'))^20) -- Image. local image_only = token('image_start', '[') * - token('image', (nonspace - ']')^1) * token('image_end', ']') + token('image', (nonspace - ']')^1) * token('image_end', ']') local image_link = token('image_link_start', '[') * image_only * - token('image_link_sep', lexer.space^1) * - token('image_link', (nonspace - ']')^1) * - token('image_link_end', ']') + token('image_link_sep', lexer.space^1) * + token('image_link', (nonspace - ']')^1) * token('image_link_end', ']') local image = image_link + image_only -- Macro. local macro = token('macro', '%%' * (nonspace - '(')^1 * - ('(' * (lexer.nonnewline - ')')^0 * ')')^-1) + lexer.range('(', ')', true)^-1) -- Verbatim. -local verbatim_line = lexer.starts_line('```') * (lexer.space - lexer.newline) * - lexer.nonnewline^0 -local verbatim_block = lexer.starts_line('```') * - (lexer.space - lexer.newline)^0 * lexer.newline * - (lexer.any - '```')^0 * lexer.starts_line('```')^-1 +local verbatim_line = lexer.to_eol(lexer.starts_line('```') * S(' \t')) +local verbatim_block = lexer.range(lexer.starts_line('```')) local verbatim_area = token('verbatim_area', verbatim_block + verbatim_line) -- Raw. -local raw_line = lexer.starts_line('"""') * (lexer.space - lexer.newline) * - lexer.nonnewline^0 -local raw_block = lexer.starts_line('"""') * (lexer.space - lexer.newline)^0 * - lexer.newline * (lexer.any - '"""')^0 * - lexer.starts_line('"""')^-1 +local raw_line = lexer.to_eol(lexer.starts_line('"""') * S(' \t')) +local raw_block = lexer.range(lexer.starts_line('"""')) local raw_area = token('raw_area', raw_block + raw_line) -- Tagged. -local tagged_line = lexer.starts_line('\'\'\'') * - (lexer.space - lexer.newline) * lexer.nonnewline^0 -local tagged_block = lexer.starts_line('\'\'\'') * - (lexer.space - lexer.newline)^0 * lexer.newline * - (lexer.any - '\'\'\'')^0 * lexer.starts_line('\'\'\'')^-1 +local tagged_line = lexer.to_eol(lexer.starts_line('\'\'\'') * S(' \t')) +local tagged_block = lexer.range(lexer.starts_line('\'\'\'')) local tagged_area = token('tagged_area', tagged_block + tagged_line) -- Table. local table_sep = token('table_sep', '|') local cell_content = inline + link + image + macro + - token('cell_content', lexer.nonnewline - ' |') -local header_cell_content = token('header_cell_content', - lexer.nonnewline - ' |') + token('cell_content', lexer.nonnewline - ' |') +local header_cell_content = token('header_cell_content', lexer.nonnewline - + ' |') local field_sep = ' ' * table_sep^1 * ' ' local table_row_end = P(' ')^0 * table_sep^0 local table_row = lexer.starts_line(P(' ')^0 * table_sep) * cell_content^0 * - (field_sep * cell_content^0)^0 * table_row_end + (field_sep * cell_content^0)^0 * table_row_end local table_row_header = lexer.starts_line(P(' ')^0 * table_sep * table_sep) * - header_cell_content^0 * - (field_sep * header_cell_content^0)^0 * table_row_end + header_cell_content^0 * (field_sep * header_cell_content^0)^0 * table_row_end local table = table_row_header + table_row lex:add_rule('table', table) @@ -162,15 +121,15 @@ lex:add_rule('raw_area', raw_area) lex:add_rule('tagged_area', tagged_area) local font_size = lexer.property_int['fontsize'] > 0 and - lexer.property_int['fontsize'] or 10 + lexer.property_int['fontsize'] or 10 local hstyle = 'fore:$(color.red)' lex:add_style('line', 'bold') -lex:add_style('h5', hstyle..',size:'..(font_size + 1)) -lex:add_style('h4', hstyle..',size:'..(font_size + 2)) -lex:add_style('h3', hstyle..',size:'..(font_size + 3)) -lex:add_style('h2', hstyle..',size:'..(font_size + 4)) -lex:add_style('h1', hstyle..',size:'..(font_size + 5)) +lex:add_style('h5', hstyle .. ',size:' .. (font_size + 1)) +lex:add_style('h4', hstyle .. ',size:' .. (font_size + 2)) +lex:add_style('h3', hstyle .. ',size:' .. (font_size + 3)) +lex:add_style('h2', hstyle .. ',size:' .. (font_size + 4)) +lex:add_style('h1', hstyle .. ',size:' .. (font_size + 5)) lex:add_style('header_label', lexer.STYLE_LABEL) lex:add_style('email', 'underlined') lex:add_style('host', 'underlined') diff --git a/lexlua/vala.lua b/lexlua/vala.lua index 456841187..3a2b16fbf 100644 --- a/lexlua/vala.lua +++ b/lexlua/vala.lua @@ -34,20 +34,19 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -local sq_str = lexer.delimited_range("'", true) -local dq_str = lexer.delimited_range('"', true) -local tq_str = '"""' * (lexer.any - '"""')^0 * P('"""')^-1 -local ml_str = '@' * lexer.delimited_range('"', false, true) +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +local tq_str = lexer.range('"""') +local ml_str = '@' * lexer.range('"', false, false) lex:add_rule('string', token(lexer.STRING, tq_str + sq_str + dq_str + ml_str)) -- Comments. -local line_comment = '//' * lexer.nonnewline_esc^0 -local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) * - S('uUlLfFdDmM')^-1)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('uUlLfFdDmM')^-1)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}'))) diff --git a/lexlua/vb.lua b/lexlua/vb.lua index 128cff63e..f85f8f875 100644 --- a/lexlua/vb.lua +++ b/lexlua/vb.lua @@ -33,19 +33,16 @@ lex:add_rule('type', token(lexer.TYPE, word_match([[ -- Comments. lex:add_rule('comment', token(lexer.COMMENT, - (P("'") + word_match([[rem]], true)) * - lexer.nonnewline^0)) + lexer.to_eol("'" + word_match([[rem]], true)))) -- Identifiers. lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, - lexer.delimited_range('"', true, true))) +lex:add_rule('string', token(lexer.STRING, lexer.range('"', true, false))) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) * - S('LlUuFf')^-2)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('LlUuFf')^-2)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('=><+-*^&:.,_()'))) diff --git a/lexlua/vbscript.lua b/lexlua/vbscript.lua index c0d5ba221..0bf2c0e12 100644 --- a/lexlua/vbscript.lua +++ b/lexlua/vbscript.lua @@ -33,19 +33,16 @@ lex:add_rule('type', token(lexer.TYPE, word_match([[ -- Comments. lex:add_rule('comment', token(lexer.COMMENT, - (P("'") + word_match([[rem]], true)) * - lexer.nonnewline^0)) + lexer.to_eol("'" + word_match([[rem]], true)))) -- Identifiers. lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, - lexer.delimited_range('"', true, true))) +lex:add_rule('string', token(lexer.STRING, lexer.range('"', true, false))) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) * - S('LlUuFf')^-2)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number * S('LlUuFf')^-2)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('=><+-*^&:.,_()'))) diff --git a/lexlua/vcard.lua b/lexlua/vcard.lua index 52ca4035a..0ea39b284 100644 --- a/lexlua/vcard.lua +++ b/lexlua/vcard.lua @@ -44,35 +44,30 @@ local identifier = lexer.alpha^1 * lexer.digit^0 * (P('-') * lexer.alnum^1)^0 -- Extension. local extension = token(lexer.TYPE, lexer.starts_line(S('xX') * P('-') * - identifier * #S(':;'))) + identifier * #S(':;'))) -- Parameter. local parameter = token(lexer.IDENTIFIER, - lexer.starts_line(identifier * #S(':='))) + - token(lexer.STRING, identifier) * #S(':=') + lexer.starts_line(identifier * #S(':='))) + token(lexer.STRING, identifier) * + #S(':=') -- Operators. local operator = token(lexer.OPERATOR, S('.:;=')) -- Group and property. local group_sequence = token(lexer.CONSTANT, lexer.starts_line(identifier)) * - token(lexer.OPERATOR, P('.')) * - (required_property + supported_property + - lexer.token(lexer.TYPE, S('xX') * P('-') * identifier) * - #S(':;')) + token(lexer.OPERATOR, P('.')) * (required_property + supported_property + + lexer.token(lexer.TYPE, S('xX') * P('-') * identifier) * #S(':;')) -- Begin vCard, end vCard. local begin_sequence = token(lexer.KEYWORD, P('BEGIN')) * - token(lexer.OPERATOR, P(':')) * - token(lexer.COMMENT, P('VCARD')) + token(lexer.OPERATOR, P(':')) * token(lexer.COMMENT, P('VCARD')) local end_sequence = token(lexer.KEYWORD, P('END')) * - token(lexer.OPERATOR, P(':')) * - token(lexer.COMMENT, P('VCARD')) + token(lexer.OPERATOR, P(':')) * token(lexer.COMMENT, P('VCARD')) -- vCard version (in v3.0 and v4.0 must appear immediately after BEGIN:VCARD). local version_sequence = token(lexer.KEYWORD, P('VERSION')) * - token(lexer.OPERATOR, P(':')) * - token(lexer.CONSTANT, lexer.digit^1 * - (P('.') * lexer.digit^1)^-1) + token(lexer.OPERATOR, P(':')) * + token(lexer.CONSTANT, lexer.digit^1 * (P('.') * lexer.digit^1)^-1) -- Data. local data = token(lexer.IDENTIFIER, lexer.any) diff --git a/lexlua/verilog.lua b/lexlua/verilog.lua index efae1ebbd..e3b5bf454 100644 --- a/lexlua/verilog.lua +++ b/lexlua/verilog.lua @@ -46,11 +46,11 @@ lex:add_rule('type', token(lexer.TYPE, word_match[[ lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"'))) +lex:add_rule('string', token(lexer.STRING, lexer.range('"'))) -- Comments. -local line_comment = '//' * lexer.nonnewline^0 -local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +local line_comment = lexer.to_eol('//') +local block_comment = lexer.range('/*', '*/') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Numbers. @@ -58,9 +58,8 @@ local bin_suffix = S('bB') * S('01_xXzZ')^1 local oct_suffix = S('oO') * S('01234567_xXzZ')^1 local dec_suffix = S('dD') * S('0123456789_xXzZ')^1 local hex_suffix = S('hH') * S('0123456789abcdefABCDEF_xXzZ')^1 -lex:add_rule('number', token(lexer.NUMBER, (lexer.digit + '_')^1 + - "'" * (bin_suffix + oct_suffix + - dec_suffix + hex_suffix))) +lex:add_rule('number', token(lexer.NUMBER, (lexer.digit + '_')^1 + "'" * + (bin_suffix + oct_suffix + dec_suffix + hex_suffix))) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('=~+-/*<>%&|^~,:;()[]{}'))) diff --git a/lexlua/vhdl.lua b/lexlua/vhdl.lua index 938f738cb..7570de7f5 100644 --- a/lexlua/vhdl.lua +++ b/lexlua/vhdl.lua @@ -50,18 +50,18 @@ lex:add_rule('constant', token(lexer.CONSTANT, word_match[[ -- Identifiers. lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alpha + "'") * - (lexer.alnum + S("_'"))^1)) + (lexer.alnum + S("_'"))^1)) -- Strings. -local sq_str = lexer.delimited_range("'", true, true) -local dq_str = lexer.delimited_range('"', true) +local sq_str = lexer.range("'", true, false) +local dq_str = lexer.range('"', true) lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '--' * lexer.nonnewline^0)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('--'))) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('=/!:;<>+-/*%&|^~()'))) diff --git a/lexlua/wsf.lua b/lexlua/wsf.lua index 2d64356cc..dfa14b1eb 100644 --- a/lexlua/wsf.lua +++ b/lexlua/wsf.lua @@ -13,12 +13,11 @@ local ws = token(lexer.WHITESPACE, lexer.space^1) lex:add_rule('whitespace', ws) -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '<!--' * (lexer.any - '-->')^0 * - P('-->')^-1)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.range('<!--', '-->'))) local alpha = R('az', 'AZ', '\127\255') -local word_char = lexer.alnum + S('_-:.??') -local identifier = (alpha + S('_-:.??')) * word_char^0 +local word_char = lexer.alnum + S('_-:.?') +local identifier = (alpha + S('_-:.?')) * word_char^0 -- Elements. local element = token('element', '<' * P('/')^-1 * identifier) @@ -47,14 +46,15 @@ local equals = token(lexer.OPERATOR, '=') * in_tag lex:add_rule('equals', equals) -- Strings. +local sq_str = lexer.range("'", false, false) +local dq_str = lexer.range('"', false, false) local string = #S('\'"') * lexer.last_char_includes('=') * - token(lexer.STRING, lexer.delimited_range("'", false, true) + - lexer.delimited_range('"', false, true)) + token(lexer.STRING, sq_str + dq_str) lex:add_rule('string', string) -- Numbers. lex:add_rule('number', #lexer.digit * lexer.last_char_includes('=') * - token(lexer.NUMBER, lexer.digit^1 * P('%')^-1) * in_tag) + token(lexer.NUMBER, lexer.digit^1 * P('%')^-1) * in_tag) -- Entities. lex:add_rule('entity', token('entity', '&' * word_match[[ @@ -74,8 +74,7 @@ lex:add_fold_point(lexer.COMMENT, '<!--', '-->') -- Tags that start embedded languages. local embed_start_tag = element * - (ws^1 * attribute * ws^0 * equals * ws^0 * string)^0 * - ws^0 * tag_close + (ws^1 * attribute * ws^0 * equals * ws^0 * string)^0 * ws^0 * tag_close local embed_end_tag = element * tag_close -- Embedded JavaScript. diff --git a/lexlua/xml.lua b/lexlua/xml.lua index 3d6b59b63..3acee7d22 100644 --- a/lexlua/xml.lua +++ b/lexlua/xml.lua @@ -12,31 +12,29 @@ local ws = token(lexer.WHITESPACE, lexer.space^1) lex:add_rule('whitespace', ws) -- Comments and CDATA. -lex:add_rule('comment', token(lexer.COMMENT, '<!--' * (lexer.any - '-->')^0 * - P('-->')^-1)) -lex:add_rule('cdata', token('cdata', '<![CDATA[' * (lexer.any - ']]>')^0 * - P(']]>')^-1)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.range('<!--', '-->'))) +lex:add_rule('cdata', token('cdata', lexer.range('<![CDATA[', ']]>'))) lex:add_style('cdata', lexer.STYLE_COMMENT) local alpha = R('az', 'AZ', '\127\255') local word_char = lexer.alnum + S('_-:.??') -local identifier = (alpha + S('_-:.??')) * word_char^0 +local identifier = (alpha + S('_-:.?')) * word_char^0 -- Doctypes and other markup tags. lex:add_rule('doctype', token('doctype', P('<!DOCTYPE')) * ws * - token('doctype', identifier) * (ws * identifier)^-1 * - (1 - P('>'))^0 * token('doctype', '>')) + token('doctype', identifier) * (ws * identifier)^-1 * (1 - P('>'))^0 * + token('doctype', '>')) lex:add_style('doctype', lexer.STYLE_COMMENT) -- Processing instructions. lex:add_rule('proc_insn', token('proc_insn', P('<?') * (1 - P('?>'))^0 * - P('?>')^-1)) + P('?>')^-1)) lex:add_style('proc_insn', lexer.STYLE_COMMENT) -- Elements. local namespace = token(lexer.OPERATOR, ':') * token('namespace', identifier) lex:add_rule('element', token('element', '<' * P('/')^-1 * identifier) * - namespace^-1) + namespace^-1) lex:add_style('element', lexer.STYLE_KEYWORD) lex:add_style('namespace', lexer.STYLE_CLASS) @@ -45,7 +43,7 @@ lex:add_rule('close_tag', token('element', P('/')^-1 * '>')) -- Attributes. lex:add_rule('attribute', token('attribute', identifier) * namespace^-1 * - #(lexer.space^0 * '=')) + #(lexer.space^0 * '=')) lex:add_style('attribute', lexer.STYLE_TYPE) -- TODO: performance is terrible on large files. @@ -61,14 +59,14 @@ end) --lex:add_rule('equal', token(lexer.OPERATOR, '=')) -- * in_tag -- Strings. +local sq_str = lexer.range("'", false, false) +local dq_str = lexer.range('"', false, false) lex:add_rule('string', #S('\'"') * lexer.last_char_includes('=') * - token(lexer.STRING, - lexer.delimited_range("'", false, true) + - lexer.delimited_range('"', false, true))) + token(lexer.STRING, sq_str + dq_str)) -- Numbers. lex:add_rule('number', #lexer.digit * lexer.last_char_includes('=') * - token(lexer.NUMBER, lexer.digit^1 * P('%')^-1))--*in_tag) + token(lexer.NUMBER, lexer.digit^1 * P('%')^-1))--*in_tag) -- Entities. lex:add_rule('entity', token('entity', '&' * word_match[[ diff --git a/lexlua/xtend.lua b/lexlua/xtend.lua index d8efbb574..c54bc137d 100644 --- a/lexlua/xtend.lua +++ b/lexlua/xtend.lua @@ -13,7 +13,7 @@ lex:add_rule('whitespace', ws) -- Classes. lex:add_rule('class', token(lexer.KEYWORD, P('class')) * ws^1 * - token(lexer.CLASS, lexer.word)) + token(lexer.CLASS, lexer.word)) -- Keywords. lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ @@ -41,17 +41,17 @@ lex:add_rule('function', token(lexer.FUNCTION, lexer.word) * #P('(')) lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Templates. -lex:add_rule('template', token('template', "'''" * (lexer.any - P("'''"))^0 * - P("'''")^-1)) +lex:add_rule('template', token('template', lexer.range("'''"))) lex:add_style('template', lexer.STYLE_EMBEDDED) -- Strings. -lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + - lexer.delimited_range('"', true))) +local sq_str = lexer.range("'", true) +local dq_str = lexer.range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) -- Comments. -local line_comment = '//' * lexer.nonnewline_esc^0 -local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +local line_comment = lexer.to_eol('//', true) +local block_comment = lexer.range('/*', '*/') lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) -- Numbers. diff --git a/lexlua/yaml.lua b/lexlua/yaml.lua index 2cd54d210..fd70182fc 100644 --- a/lexlua/yaml.lua +++ b/lexlua/yaml.lua @@ -10,15 +10,16 @@ local M = {_NAME = 'yaml'} -- Whitespace. local indent = #lexer.starts_line(S(' \t')) * - (token(lexer.WHITESPACE, ' ') + token('indent_error', '\t'))^1 + (token(lexer.WHITESPACE, ' ') + token('indent_error', '\t'))^1 local ws = token(lexer.WHITESPACE, S(' \t')^1 + lexer.newline^1) -- Comments. -local comment = token(lexer.COMMENT, '#' * lexer.nonnewline^0) +local comment = token(lexer.COMMENT, lexer.to_eol('#')) -- Strings. -local string = token(lexer.STRING, lexer.delimited_range("'") + - lexer.delimited_range('"')) +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') +local string = token(lexer.STRING, sq_str + dq_str) -- Numbers. local integer = lexer.dec_num + lexer.hex_num + '0' * S('oO') * R('07')^1 @@ -26,22 +27,23 @@ local special_num = '.' * word_match({'inf', 'nan'}, nil, true) local number = token(lexer.NUMBER, special_num + lexer.float + integer) -- Timestamps. -local ts = token('timestamp', - lexer.digit * lexer.digit * lexer.digit * lexer.digit * -- year - '-' * lexer.digit * lexer.digit^-1 * -- month - '-' * lexer.digit * lexer.digit^-1 * -- day - ((S(' \t')^1 + S('tT'))^-1 * -- separator - lexer.digit * lexer.digit^-1 * -- hour - ':' * lexer.digit * lexer.digit * -- minute - ':' * lexer.digit * lexer.digit * -- second - ('.' * lexer.digit^0)^-1 * -- fraction - ('Z' + -- timezone - S(' \t')^0 * S('-+') * lexer.digit * lexer.digit^-1 * - (':' * lexer.digit * lexer.digit)^-1)^-1)^-1) +local year = lexer.digit * lexer.digit * lexer.digit * lexer.digit +local month = lexer.digit * lexer.digit^-1 +local day = lexer.digit * lexer.digit^-1 +local date = year * '-' * month * '-' * day +local hours = lexer.digit * lexer.digit^-1 +local minutes = lexer.digit * lexer.digit +local seconds = lexer.digit * lexer.digit +local fraction = '.' * lexer.digit^0 +local time = hours * ':' * minutes * ':' * seconds * fraction^-1 +local T = S(' \t')^1 + S('tT') +local zone = 'Z' + S(' \t')^0 * S('-+') * hours * (':' * minutes)^-1 +local ts = token('timestamp', date * (T * time * zone^-1)) -- Constants. -local constant = token(lexer.CONSTANT, - word_match({'null', 'true', 'false'}, nil, true)) +local constant = token(lexer.CONSTANT, word_match({ + 'null', 'true', 'false' +}, nil, true)) -- Types. local type = token(lexer.TYPE, '!!' * word_match({ @@ -50,38 +52,36 @@ local type = token(lexer.TYPE, '!!' * word_match({ -- Scalar types. 'binary', 'bool', 'float', 'int', 'merge', 'null', 'str', 'timestamp', 'value', 'yaml' -}, nil, true) + '!' * lexer.delimited_range('<>')) +}, nil, true) + '!' * lexer.range('<', '>', true)) -- Document boundaries. local doc_bounds = token('document', lexer.starts_line(P('---') + '...')) -- Directives local directive = token('directive', lexer.starts_line('%') * - lexer.nonnewline^1) + lexer.nonnewline^1) local word = (lexer.alpha + '-' * -lexer.space) * (lexer.alnum + '-')^0 -- Keys and literals. local colon = S(' \t')^0 * ':' * (lexer.space + -1) -local key = token(lexer.KEYWORD, - #word * (lexer.nonnewline - colon)^1 * #colon * - P(function(input, index) - local line = input:sub(1, index - 1):match('[^\r\n]+$') - return not line:find('[%w-]+:') and index - end)) +local key = token(lexer.KEYWORD, #word * (lexer.nonnewline - colon)^1 * #colon * + P(function(input, index) + local line = input:sub(1, index - 1):match('[^\r\n]+$') + return not line:find('[%w-]+:') and index + end)) local value = #word * (lexer.nonnewline - lexer.space^0 * S(',]}'))^1 local block = S('|>') * S('+-')^-1 * (lexer.newline + -1) * - function(input, index) - local rest = input:sub(index) - local level = #rest:match('^( *)') - for pos, indent, line in rest:gmatch('() *()([^\r\n]+)') do - if indent - pos < level and line ~= ' ' or - level == 0 and pos > 1 then - return index + pos - 1 - end - end - return #input + 1 - end + function(input, index) + local rest = input:sub(index) + local level = #rest:match('^( *)') + for pos, indent, line in rest:gmatch('() *()([^\r\n]+)') do + if indent - pos < level and line ~= ' ' or level == 0 and pos > 1 then + return index + pos - 1 + end + end + return #input + 1 + end local literal = token('literal', value + block) -- Indicators. |