aboutsummaryrefslogtreecommitdiffhomepage
path: root/lexlua/html.lua
diff options
context:
space:
mode:
authormitchell <unknown>2020-04-25 16:26:31 -0400
committermitchell <unknown>2020-04-25 16:26:31 -0400
commitfad15f79b1230b3076be515d6894c8919562809b (patch)
tree72c848ef02c3331de5ca54eff7adaea3a9a6fb88 /lexlua/html.lua
parent1fd02a367dec125c0b49dd9246a0928433866b96 (diff)
downloadscintilla-mirror-fad15f79b1230b3076be515d6894c8919562809b.tar.gz
Reformatted Lua LPeg lexers and added new convenience functions and pattern.
`lexer.range()` replaces `lexer.delimited_range()` and `lexer.nested_pair()`. `lexer.to_eol()` replaces `patt * lexer.nonnewline^0` constructs. `lexer.number` replaces `lexer.float + lexer.integer`. Also added unit tests for lexer functions.
Diffstat (limited to 'lexlua/html.lua')
-rw-r--r--lexlua/html.lua33
1 files changed, 14 insertions, 19 deletions
diff --git a/lexlua/html.lua b/lexlua/html.lua
index b77d8453c..465a828b6 100644
--- a/lexlua/html.lua
+++ b/lexlua/html.lua
@@ -12,12 +12,11 @@ local ws = token(lexer.WHITESPACE, lexer.space^1)
lex:add_rule('whitespace', ws)
-- Comments.
-lex:add_rule('comment', token(lexer.COMMENT, '<!--' * (lexer.any - '-->')^0 *
- P('-->')^-1))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.range('<!--', '-->')))
-- Doctype.
-lex:add_rule('doctype', token('doctype', '<!' * word_match([[doctype]], true) *
- (lexer.any - '>')^1 * '>'))
+lex:add_rule('doctype', token('doctype',
+ lexer.range('<!' * word_match([[doctype]], true), '>')))
lex:add_style('doctype', lexer.STYLE_COMMENT)
-- Elements.
@@ -37,12 +36,12 @@ local paired_element = token('element', '<' * P('/')^-1 * word_match([[
]], true))
local known_element = single_element + paired_element
local unknown_element = token('unknown_element', '<' * P('/')^-1 *
- (lexer.alnum + '-')^1)
+ (lexer.alnum + '-')^1)
local element = known_element + unknown_element
lex:add_rule('element', element)
lex:add_style('single_element', lexer.STYLE_KEYWORD)
lex:add_style('element', lexer.STYLE_KEYWORD)
-lex:add_style('unknown_element', lexer.STYLE_KEYWORD..',italics')
+lex:add_style('unknown_element', lexer.STYLE_KEYWORD .. ',italics')
-- Closing tags.
local tag_close = token('element', P('/')^-1 * '>')
@@ -66,7 +65,7 @@ local unknown_attribute = token('unknown_attribute', (lexer.alnum + '-')^1)
local attribute = (known_attribute + unknown_attribute) * #(lexer.space^0 * '=')
lex:add_rule('attribute', attribute)
lex:add_style('attribute', lexer.STYLE_TYPE)
-lex:add_style('unknown_attribute', lexer.STYLE_TYPE..',italics')
+lex:add_style('unknown_attribute', lexer.STYLE_TYPE .. ',italics')
-- TODO: performance is terrible on large files.
local in_tag = P(function(input, index)
@@ -83,17 +82,16 @@ local equals = token(lexer.OPERATOR, '=') --* in_tag
-- Strings.
local string = #S('\'"') * lexer.last_char_includes('=') *
- token(lexer.STRING, lexer.delimited_range("'") +
- lexer.delimited_range('"'))
+ token(lexer.STRING, lexer.range("'") + lexer.range('"'))
lex:add_rule('string', string)
-- Numbers.
lex:add_rule('number', #lexer.digit * lexer.last_char_includes('=') *
- token(lexer.NUMBER, lexer.digit^1 * P('%')^-1))--*in_tag)
+ token(lexer.NUMBER, lexer.digit^1 * P('%')^-1))--*in_tag)
-- Entities.
lex:add_rule('entity', token('entity', '&' * (lexer.any - lexer.space - ';')^1 *
- ';'))
+ ';'))
lex:add_style('entity', lexer.STYLE_COMMENT)
-- Fold points.
@@ -113,15 +111,13 @@ lex:add_fold_point(lexer.COMMENT, '<!--', '-->')
-- Tags that start embedded languages.
-- Export these patterns for proxy lexers (e.g. ASP) that need them.
lex.embed_start_tag = element *
- (ws * attribute * ws^-1 * equals * ws^-1 * string)^0 *
- ws^-1 * tag_close
+ (ws * attribute * ws^-1 * equals * ws^-1 * string)^0 * ws^-1 * tag_close
lex.embed_end_tag = element * tag_close
-- Embedded CSS (<style type="text/css"> ... </style>).
local css = lexer.load('css')
local style_element = word_match([[style]], true)
-local css_start_rule = #(P('<') * style_element *
- ('>' + P(function(input, index)
+local css_start_rule = #('<' * style_element * ('>' + P(function(input, index)
if input:find('^%s+type%s*=%s*(["\'])text/css%1', index) then
return index
end
@@ -132,8 +128,7 @@ lex:embed(css, css_start_rule, css_end_rule)
-- Embedded JavaScript (<script type="text/javascript"> ... </script>).
local js = lexer.load('javascript')
local script_element = word_match([[script]], true)
-local js_start_rule = #(P('<') * script_element *
- ('>' + P(function(input, index)
+local js_start_rule = #('<' * script_element * ('>' + P(function(input, index)
if input:find('^%s+type%s*=%s*(["\'])text/javascript%1', index) then
return index
end
@@ -142,13 +137,13 @@ local js_end_rule = #('</' * script_element * ws^-1 * '>') * lex.embed_end_tag
local js_line_comment = '//' * (lexer.nonnewline_esc - js_end_rule)^0
local js_block_comment = '/*' * (lexer.any - '*/' - js_end_rule)^0 * P('*/')^-1
js:modify_rule('comment', token(lexer.COMMENT, js_line_comment +
- js_block_comment))
+ js_block_comment))
lex:embed(js, js_start_rule, js_end_rule)
-- Embedded CoffeeScript (<script type="text/coffeescript"> ... </script>).
local cs = lexer.load('coffeescript')
local script_element = word_match([[script]], true)
-local cs_start_rule = #(P('<') * script_element * P(function(input, index)
+local cs_start_rule = #('<' * script_element * P(function(input, index)
if input:find('^[^>]+type%s*=%s*(["\'])text/coffeescript%1', index) then
return index
end