aboutsummaryrefslogtreecommitdiffhomepage
path: root/lexlua/perl.lua
diff options
context:
space:
mode:
authormitchell <unknown>2020-04-25 16:26:31 -0400
committermitchell <unknown>2020-04-25 16:26:31 -0400
commitfad15f79b1230b3076be515d6894c8919562809b (patch)
tree72c848ef02c3331de5ca54eff7adaea3a9a6fb88 /lexlua/perl.lua
parent1fd02a367dec125c0b49dd9246a0928433866b96 (diff)
downloadscintilla-mirror-fad15f79b1230b3076be515d6894c8919562809b.tar.gz
Reformatted Lua LPeg lexers and added new convenience functions and pattern.
`lexer.range()` replaces `lexer.delimited_range()` and `lexer.nested_pair()`. `lexer.to_eol()` replaces `patt * lexer.nonnewline^0` constructs. `lexer.number` replaces `lexer.float + lexer.integer`. Also added unit tests for lexer functions.
Diffstat (limited to 'lexlua/perl.lua')
-rw-r--r--lexlua/perl.lua63
1 files changed, 30 insertions, 33 deletions
diff --git a/lexlua/perl.lua b/lexlua/perl.lua
index 6686dcaf0..819b2a1a3 100644
--- a/lexlua/perl.lua
+++ b/lexlua/perl.lua
@@ -21,7 +21,7 @@ lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[
-- Markers.
lex:add_rule('marker', token(lexer.COMMENT, word_match[[__DATA__ __END__]] *
- lexer.any^0))
+ lexer.any^0))
-- Functions.
lex:add_rule('function', token(lexer.FUNCTION, word_match[[
@@ -46,22 +46,22 @@ lex:add_rule('function', token(lexer.FUNCTION, word_match[[
]]))
local delimiter_matches = {['('] = ')', ['['] = ']', ['{'] = '}', ['<'] = '>'}
-local literal_delimitted = P(function(input, index) -- for single delimiter sets
+local literal_delimited = P(function(input, index) -- for single delimiter sets
local delimiter = input:sub(index, index)
if not delimiter:find('%w') then -- only non alpha-numerics
local match_pos, patt
if delimiter_matches[delimiter] then
-- Handle nested delimiter/matches in strings.
local s, e = delimiter, delimiter_matches[delimiter]
- patt = lexer.delimited_range(s..e, false, false, true)
+ patt = lexer.range(s, e, false, true, true)
else
- patt = lexer.delimited_range(delimiter)
+ patt = lexer.range(delimiter)
end
match_pos = lpeg.match(patt, input, index)
return match_pos or #input + 1
end
end)
-local literal_delimitted2 = P(function(input, index) -- for 2 delimiter sets
+local literal_delimited2 = P(function(input, index) -- for 2 delimiter sets
local delimiter = input:sub(index, index)
-- Only consider non-alpha-numerics and non-spaces as delimiters. The
-- non-spaces are used to ignore operators like "-s".
@@ -70,9 +70,9 @@ local literal_delimitted2 = P(function(input, index) -- for 2 delimiter sets
if delimiter_matches[delimiter] then
-- Handle nested delimiter/matches in strings.
local s, e = delimiter, delimiter_matches[delimiter]
- patt = lexer.delimited_range(s..e, false, false, true)
+ patt = lexer.range(s, e, false, true, true)
else
- patt = lexer.delimited_range(delimiter)
+ patt = lexer.range(delimiter)
end
first_match_pos = lpeg.match(patt, input, index)
final_match_pos = lpeg.match(patt, input, first_match_pos - 1)
@@ -84,50 +84,47 @@ local literal_delimitted2 = P(function(input, index) -- for 2 delimiter sets
end)
-- Strings.
-local sq_str = lexer.delimited_range("'")
-local dq_str = lexer.delimited_range('"')
-local cmd_str = lexer.delimited_range('`')
+local sq_str = lexer.range("'")
+local dq_str = lexer.range('"')
+local cmd_str = lexer.range('`')
local heredoc = '<<' * P(function(input, index)
local s, e, delimiter = input:find('([%a_][%w_]*)[\n\r\f;]+', index)
if s == index and delimiter then
local end_heredoc = '[\n\r\f]+'
- local _, e = input:find(end_heredoc..delimiter, e)
+ local _, e = input:find(end_heredoc .. delimiter, e)
return e and e + 1 or #input + 1
end
end)
-local lit_str = 'q' * P('q')^-1 * literal_delimitted
-local lit_array = 'qw' * literal_delimitted
-local lit_cmd = 'qx' * literal_delimitted
-local lit_tr = (P('tr') + 'y') * literal_delimitted2 * S('cds')^0
+local lit_str = 'q' * P('q')^-1 * literal_delimited
+local lit_array = 'qw' * literal_delimited
+local lit_cmd = 'qx' * literal_delimited
+local lit_tr = (P('tr') + 'y') * literal_delimited2 * S('cds')^0
+local string = token(lexer.STRING, sq_str + dq_str + cmd_str + heredoc +
+ lit_str + lit_array + lit_cmd + lit_tr)
local regex_str = #P('/') * lexer.last_char_includes('-<>+*!~\\=%&|^?:;([{') *
- lexer.delimited_range('/', true) * S('imosx')^0
-local lit_regex = 'qr' * literal_delimitted * S('imosx')^0
-local lit_match = 'm' * literal_delimitted * S('cgimosx')^0
-local lit_sub = 's' * literal_delimitted2 * S('ecgimosx')^0
-lex:add_rule('string',
- token(lexer.STRING, sq_str + dq_str + cmd_str + heredoc + lit_str +
- lit_array + lit_cmd + lit_tr) +
- token(lexer.REGEX, regex_str + lit_regex + lit_match + lit_sub))
+ lexer.range('/', true) * S('imosx')^0
+local lit_regex = 'qr' * literal_delimited * S('imosx')^0
+local lit_match = 'm' * literal_delimited * S('cgimosx')^0
+local lit_sub = 's' * literal_delimited2 * S('ecgimosx')^0
+local regex = token(lexer.REGEX, regex_str + lit_regex + lit_match + lit_sub)
+lex:add_rule('string', string + regex)
-- Identifiers.
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
-- Comments.
-local line_comment = '#' * lexer.nonnewline_esc^0
-local block_comment = lexer.starts_line('=') * lexer.alpha *
- (lexer.any - lexer.newline * '=cut')^0 *
- (lexer.newline * '=cut')^-1
+local line_comment = lexer.to_eol('#', true)
+local block_comment = lexer.range(lexer.starts_line('=' * lexer.alpha),
+ lexer.starts_line('=cut'))
lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment))
-- Numbers.
-lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer))
+lex:add_rule('number', token(lexer.NUMBER, lexer.number))
-- Variables.
-local special_var = '$' * ('^' * S('ADEFHILMOPSTWX')^-1 +
- S('\\"[]\'&`+*.,;=%~?@<>(|/!-') +
- ':' * (lexer.any - ':') +
- P('$') * -lexer.word +
- lexer.digit^1)
+local special_var = '$' * (
+ '^' * S('ADEFHILMOPSTWX')^-1 + S('\\"[]\'&`+*.,;=%~?@<>(|/!-') +
+ ':' * (lexer.any - ':') + P('$') * -lexer.word + lexer.digit^1)
local plain_var = ('$#' + S('$@%')) * P('$')^0 * lexer.word + '$#'
lex:add_rule('variable', token(lexer.VARIABLE, special_var + plain_var))