From fad15f79b1230b3076be515d6894c8919562809b Mon Sep 17 00:00:00 2001 From: mitchell Date: Sat, 25 Apr 2020 16:26:31 -0400 Subject: Reformatted Lua LPeg lexers and added new convenience functions and pattern. `lexer.range()` replaces `lexer.delimited_range()` and `lexer.nested_pair()`. `lexer.to_eol()` replaces `patt * lexer.nonnewline^0` constructs. `lexer.number` replaces `lexer.float + lexer.integer`. Also added unit tests for lexer functions. --- lexlua/mediawiki.lua | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) (limited to 'lexlua/mediawiki.lua') diff --git a/lexlua/mediawiki.lua b/lexlua/mediawiki.lua index 6a8a3a704..27a7409d8 100644 --- a/lexlua/mediawiki.lua +++ b/lexlua/mediawiki.lua @@ -9,18 +9,15 @@ local P, R, S, B = lpeg.P, lpeg.R, lpeg.S, lpeg.B local lex = lexer.new('mediawiki') -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '')^0 * - P('-->')^-1)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.range(''))) -- HTML-like tags local tag_start = token('tag_start', '<' * P('/')^-1 * lexer.alnum^1 * - lexer.space^0) + lexer.space^0) local tag_attr = token('tag_attr', lexer.alpha^1 * lexer.space^0 * - ('=' * lexer.space^0 * - ('"' * ((lexer.any - S('>"\\')) + - ('\\' * lexer.any))^0 * '"' + - (lexer.any - lexer.space - '>')^0)^-1)^0 * - lexer.space^0) + ('=' * lexer.space^0 * + ('"' * ((lexer.any - S('>"\\')) + ('\\' * lexer.any))^0 * '"' + + (lexer.any - lexer.space - '>')^0)^-1)^0 * lexer.space^0) local tag_end = token('tag_end', P('/')^-1 * '>') lex:add_rule('tag', tag_start * tag_attr^0 * tag_end) lex:add_style('tag_start', lexer.STYLE_KEYWORD) @@ -30,18 +27,17 @@ lex:add_style('tag_end', lexer.STYLE_KEYWORD) -- Link lex:add_rule('link', token(lexer.STRING, S('[]'))) lex:add_rule('internal_link', B('[[') * - token('link_article', (lexer.any - '|' - ']]')^1)) -lex:add_style('link_article', lexer.STYLE_STRING..',underlined') + token('link_article', (lexer.any - '|' - ']]')^1)) +lex:add_style('link_article', lexer.STYLE_STRING .. ',underlined') -- Templates and parser functions. lex:add_rule('template', token(lexer.OPERATOR, S('{}'))) lex:add_rule('parser_func', B('{{') * - token('parser_func', P('#') * lexer.alpha^1 + - lexer.upper^1 * ':')) + token('parser_func', P('#') * lexer.alpha^1 + lexer.upper^1 * ':')) lex:add_rule('template_name', B('{{') * - token('template_name', (lexer.any - S('{}|'))^1)) + token('template_name', (lexer.any - S('{}|'))^1)) lex:add_style('parser_func', lexer.STYLE_FUNCTION) -lex:add_style('template_name', lexer.STYLE_OPERATOR..',underlined') +lex:add_style('template_name', lexer.STYLE_OPERATOR .. ',underlined') -- Operators. lex:add_rule('operator', token(lexer.OPERATOR, S('-=|#~!'))) @@ -49,10 +45,9 @@ lex:add_rule('operator', token(lexer.OPERATOR, S('-=|#~!'))) -- Behavior switches local start_pat = P(function(_, pos) return pos == 1 end) lex:add_rule('behavior_switch', (B(lexer.space) + start_pat) * - token('behavior_switch', - '__' * (P('TOC') + 'FORCETOC' + 'NOTOC' + - 'NOEDITSECTION' + 'NOCC' + - 'NOINDEX') * '__') * #lexer.space) + token('behavior_switch', '__' * + (P('TOC') + 'FORCETOC' + 'NOTOC' + 'NOEDITSECTION' + 'NOCC' + 'NOINDEX') * + '__') * #lexer.space) lex:add_style('behavior_switch', lexer.STYLE_KEYWORD) return lex -- cgit v1.2.3