diff options
author | mitchell <unknown> | 2020-04-25 16:26:31 -0400 |
---|---|---|
committer | mitchell <unknown> | 2020-04-25 16:26:31 -0400 |
commit | fad15f79b1230b3076be515d6894c8919562809b (patch) | |
tree | 72c848ef02c3331de5ca54eff7adaea3a9a6fb88 /lexlua/perl.lua | |
parent | 1fd02a367dec125c0b49dd9246a0928433866b96 (diff) | |
download | scintilla-mirror-fad15f79b1230b3076be515d6894c8919562809b.tar.gz |
Reformatted Lua LPeg lexers and added new convenience functions and pattern.
`lexer.range()` replaces `lexer.delimited_range()` and `lexer.nested_pair()`.
`lexer.to_eol()` replaces `patt * lexer.nonnewline^0` constructs.
`lexer.number` replaces `lexer.float + lexer.integer`.
Also added unit tests for lexer functions.
Diffstat (limited to 'lexlua/perl.lua')
-rw-r--r-- | lexlua/perl.lua | 63 |
1 files changed, 30 insertions, 33 deletions
diff --git a/lexlua/perl.lua b/lexlua/perl.lua index 6686dcaf0..819b2a1a3 100644 --- a/lexlua/perl.lua +++ b/lexlua/perl.lua @@ -21,7 +21,7 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ -- Markers. lex:add_rule('marker', token(lexer.COMMENT, word_match[[__DATA__ __END__]] * - lexer.any^0)) + lexer.any^0)) -- Functions. lex:add_rule('function', token(lexer.FUNCTION, word_match[[ @@ -46,22 +46,22 @@ lex:add_rule('function', token(lexer.FUNCTION, word_match[[ ]])) local delimiter_matches = {['('] = ')', ['['] = ']', ['{'] = '}', ['<'] = '>'} -local literal_delimitted = P(function(input, index) -- for single delimiter sets +local literal_delimited = P(function(input, index) -- for single delimiter sets local delimiter = input:sub(index, index) if not delimiter:find('%w') then -- only non alpha-numerics local match_pos, patt if delimiter_matches[delimiter] then -- Handle nested delimiter/matches in strings. local s, e = delimiter, delimiter_matches[delimiter] - patt = lexer.delimited_range(s..e, false, false, true) + patt = lexer.range(s, e, false, true, true) else - patt = lexer.delimited_range(delimiter) + patt = lexer.range(delimiter) end match_pos = lpeg.match(patt, input, index) return match_pos or #input + 1 end end) -local literal_delimitted2 = P(function(input, index) -- for 2 delimiter sets +local literal_delimited2 = P(function(input, index) -- for 2 delimiter sets local delimiter = input:sub(index, index) -- Only consider non-alpha-numerics and non-spaces as delimiters. The -- non-spaces are used to ignore operators like "-s". @@ -70,9 +70,9 @@ local literal_delimitted2 = P(function(input, index) -- for 2 delimiter sets if delimiter_matches[delimiter] then -- Handle nested delimiter/matches in strings. local s, e = delimiter, delimiter_matches[delimiter] - patt = lexer.delimited_range(s..e, false, false, true) + patt = lexer.range(s, e, false, true, true) else - patt = lexer.delimited_range(delimiter) + patt = lexer.range(delimiter) end first_match_pos = lpeg.match(patt, input, index) final_match_pos = lpeg.match(patt, input, first_match_pos - 1) @@ -84,50 +84,47 @@ local literal_delimitted2 = P(function(input, index) -- for 2 delimiter sets end) -- Strings. -local sq_str = lexer.delimited_range("'") -local dq_str = lexer.delimited_range('"') -local cmd_str = lexer.delimited_range('`') +local sq_str = lexer.range("'") +local dq_str = lexer.range('"') +local cmd_str = lexer.range('`') local heredoc = '<<' * P(function(input, index) local s, e, delimiter = input:find('([%a_][%w_]*)[\n\r\f;]+', index) if s == index and delimiter then local end_heredoc = '[\n\r\f]+' - local _, e = input:find(end_heredoc..delimiter, e) + local _, e = input:find(end_heredoc .. delimiter, e) return e and e + 1 or #input + 1 end end) -local lit_str = 'q' * P('q')^-1 * literal_delimitted -local lit_array = 'qw' * literal_delimitted -local lit_cmd = 'qx' * literal_delimitted -local lit_tr = (P('tr') + 'y') * literal_delimitted2 * S('cds')^0 +local lit_str = 'q' * P('q')^-1 * literal_delimited +local lit_array = 'qw' * literal_delimited +local lit_cmd = 'qx' * literal_delimited +local lit_tr = (P('tr') + 'y') * literal_delimited2 * S('cds')^0 +local string = token(lexer.STRING, sq_str + dq_str + cmd_str + heredoc + + lit_str + lit_array + lit_cmd + lit_tr) local regex_str = #P('/') * lexer.last_char_includes('-<>+*!~\\=%&|^?:;([{') * - lexer.delimited_range('/', true) * S('imosx')^0 -local lit_regex = 'qr' * literal_delimitted * S('imosx')^0 -local lit_match = 'm' * literal_delimitted * S('cgimosx')^0 -local lit_sub = 's' * literal_delimitted2 * S('ecgimosx')^0 -lex:add_rule('string', - token(lexer.STRING, sq_str + dq_str + cmd_str + heredoc + lit_str + - lit_array + lit_cmd + lit_tr) + - token(lexer.REGEX, regex_str + lit_regex + lit_match + lit_sub)) + lexer.range('/', true) * S('imosx')^0 +local lit_regex = 'qr' * literal_delimited * S('imosx')^0 +local lit_match = 'm' * literal_delimited * S('cgimosx')^0 +local lit_sub = 's' * literal_delimited2 * S('ecgimosx')^0 +local regex = token(lexer.REGEX, regex_str + lit_regex + lit_match + lit_sub) +lex:add_rule('string', string + regex) -- Identifiers. lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) -- Comments. -local line_comment = '#' * lexer.nonnewline_esc^0 -local block_comment = lexer.starts_line('=') * lexer.alpha * - (lexer.any - lexer.newline * '=cut')^0 * - (lexer.newline * '=cut')^-1 +local line_comment = lexer.to_eol('#', true) +local block_comment = lexer.range(lexer.starts_line('=' * lexer.alpha), + lexer.starts_line('=cut')) lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment)) -- Numbers. -lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +lex:add_rule('number', token(lexer.NUMBER, lexer.number)) -- Variables. -local special_var = '$' * ('^' * S('ADEFHILMOPSTWX')^-1 + - S('\\"[]\'&`+*.,;=%~?@<>(|/!-') + - ':' * (lexer.any - ':') + - P('$') * -lexer.word + - lexer.digit^1) +local special_var = '$' * ( + '^' * S('ADEFHILMOPSTWX')^-1 + S('\\"[]\'&`+*.,;=%~?@<>(|/!-') + + ':' * (lexer.any - ':') + P('$') * -lexer.word + lexer.digit^1) local plain_var = ('$#' + S('$@%')) * P('$')^0 * lexer.word + '$#' lex:add_rule('variable', token(lexer.VARIABLE, special_var + plain_var)) |