diff options
| author | mitchell <unknown> | 2020-04-25 16:26:31 -0400 | 
|---|---|---|
| committer | mitchell <unknown> | 2020-04-25 16:26:31 -0400 | 
| commit | fad15f79b1230b3076be515d6894c8919562809b (patch) | |
| tree | 72c848ef02c3331de5ca54eff7adaea3a9a6fb88 /lexlua/html.lua | |
| parent | 1fd02a367dec125c0b49dd9246a0928433866b96 (diff) | |
| download | scintilla-mirror-fad15f79b1230b3076be515d6894c8919562809b.tar.gz | |
Reformatted Lua LPeg lexers and added new convenience functions and pattern.
`lexer.range()` replaces `lexer.delimited_range()` and `lexer.nested_pair()`.
`lexer.to_eol()` replaces `patt * lexer.nonnewline^0` constructs.
`lexer.number` replaces `lexer.float + lexer.integer`.
Also added unit tests for lexer functions.
Diffstat (limited to 'lexlua/html.lua')
| -rw-r--r-- | lexlua/html.lua | 33 | 
1 files changed, 14 insertions, 19 deletions
| diff --git a/lexlua/html.lua b/lexlua/html.lua index b77d8453c..465a828b6 100644 --- a/lexlua/html.lua +++ b/lexlua/html.lua @@ -12,12 +12,11 @@ local ws = token(lexer.WHITESPACE, lexer.space^1)  lex:add_rule('whitespace', ws)  -- Comments. -lex:add_rule('comment', token(lexer.COMMENT, '<!--' * (lexer.any - '-->')^0 * -                                             P('-->')^-1)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.range('<!--', '-->')))  -- Doctype. -lex:add_rule('doctype', token('doctype', '<!' * word_match([[doctype]], true) * -                                         (lexer.any - '>')^1 * '>')) +lex:add_rule('doctype', token('doctype', +  lexer.range('<!' * word_match([[doctype]], true), '>')))  lex:add_style('doctype', lexer.STYLE_COMMENT)  -- Elements. @@ -37,12 +36,12 @@ local paired_element = token('element', '<' * P('/')^-1 * word_match([[  ]], true))  local known_element = single_element + paired_element  local unknown_element = token('unknown_element', '<' * P('/')^-1 * -                                                 (lexer.alnum + '-')^1) +  (lexer.alnum + '-')^1)  local element = known_element + unknown_element  lex:add_rule('element', element)  lex:add_style('single_element', lexer.STYLE_KEYWORD)  lex:add_style('element', lexer.STYLE_KEYWORD) -lex:add_style('unknown_element', lexer.STYLE_KEYWORD..',italics') +lex:add_style('unknown_element', lexer.STYLE_KEYWORD .. ',italics')  -- Closing tags.  local tag_close = token('element', P('/')^-1 * '>') @@ -66,7 +65,7 @@ local unknown_attribute = token('unknown_attribute', (lexer.alnum + '-')^1)  local attribute = (known_attribute + unknown_attribute) * #(lexer.space^0 * '=')  lex:add_rule('attribute', attribute)  lex:add_style('attribute', lexer.STYLE_TYPE) -lex:add_style('unknown_attribute', lexer.STYLE_TYPE..',italics') +lex:add_style('unknown_attribute', lexer.STYLE_TYPE .. ',italics')  -- TODO: performance is terrible on large files.  local in_tag = P(function(input, index) @@ -83,17 +82,16 @@ local equals = token(lexer.OPERATOR, '=') --* in_tag  -- Strings.  local string = #S('\'"') * lexer.last_char_includes('=') * -               token(lexer.STRING, lexer.delimited_range("'") + -                                   lexer.delimited_range('"')) +  token(lexer.STRING, lexer.range("'") + lexer.range('"'))  lex:add_rule('string', string)  -- Numbers.  lex:add_rule('number', #lexer.digit * lexer.last_char_includes('=') * -                       token(lexer.NUMBER, lexer.digit^1 * P('%')^-1))--*in_tag) +  token(lexer.NUMBER, lexer.digit^1 * P('%')^-1))--*in_tag)  -- Entities.  lex:add_rule('entity', token('entity', '&' * (lexer.any - lexer.space - ';')^1 * -                                       ';')) +  ';'))  lex:add_style('entity', lexer.STYLE_COMMENT)  -- Fold points. @@ -113,15 +111,13 @@ lex:add_fold_point(lexer.COMMENT, '<!--', '-->')  -- Tags that start embedded languages.  -- Export these patterns for proxy lexers (e.g. ASP) that need them.  lex.embed_start_tag = element * -                      (ws * attribute * ws^-1 * equals * ws^-1 * string)^0 * -                      ws^-1 * tag_close +  (ws * attribute * ws^-1 * equals * ws^-1 * string)^0 * ws^-1 * tag_close  lex.embed_end_tag = element * tag_close  -- Embedded CSS (<style type="text/css"> ... </style>).  local css = lexer.load('css')  local style_element = word_match([[style]], true) -local css_start_rule = #(P('<') * style_element * -                         ('>' + P(function(input, index) +local css_start_rule = #('<' * style_element * ('>' + P(function(input, index)    if input:find('^%s+type%s*=%s*(["\'])text/css%1', index) then      return index    end @@ -132,8 +128,7 @@ lex:embed(css, css_start_rule, css_end_rule)  -- Embedded JavaScript (<script type="text/javascript"> ... </script>).  local js = lexer.load('javascript')  local script_element = word_match([[script]], true) -local js_start_rule = #(P('<') * script_element * -                        ('>' + P(function(input, index) +local js_start_rule = #('<' * script_element * ('>' + P(function(input, index)    if input:find('^%s+type%s*=%s*(["\'])text/javascript%1', index) then      return index    end @@ -142,13 +137,13 @@ local js_end_rule = #('</' * script_element * ws^-1 * '>') * lex.embed_end_tag  local js_line_comment = '//' * (lexer.nonnewline_esc - js_end_rule)^0  local js_block_comment = '/*' * (lexer.any - '*/' - js_end_rule)^0 * P('*/')^-1  js:modify_rule('comment', token(lexer.COMMENT, js_line_comment + -                                               js_block_comment)) +  js_block_comment))  lex:embed(js, js_start_rule, js_end_rule)  -- Embedded CoffeeScript (<script type="text/coffeescript"> ... </script>).  local cs = lexer.load('coffeescript')  local script_element = word_match([[script]], true) -local cs_start_rule = #(P('<') * script_element * P(function(input, index) +local cs_start_rule = #('<' * script_element * P(function(input, index)    if input:find('^[^>]+type%s*=%s*(["\'])text/coffeescript%1', index) then      return index    end | 
