diff options
| author | mitchell <unknown> | 2020-04-25 16:26:31 -0400 |
|---|---|---|
| committer | mitchell <unknown> | 2020-04-25 16:26:31 -0400 |
| commit | fad15f79b1230b3076be515d6894c8919562809b (patch) | |
| tree | 72c848ef02c3331de5ca54eff7adaea3a9a6fb88 /lexlua/xml.lua | |
| parent | 1fd02a367dec125c0b49dd9246a0928433866b96 (diff) | |
| download | scintilla-mirror-fad15f79b1230b3076be515d6894c8919562809b.tar.gz | |
Reformatted Lua LPeg lexers and added new convenience functions and pattern.
`lexer.range()` replaces `lexer.delimited_range()` and `lexer.nested_pair()`.
`lexer.to_eol()` replaces `patt * lexer.nonnewline^0` constructs.
`lexer.number` replaces `lexer.float + lexer.integer`.
Also added unit tests for lexer functions.
Diffstat (limited to 'lexlua/xml.lua')
| -rw-r--r-- | lexlua/xml.lua | 26 |
1 files changed, 12 insertions, 14 deletions
diff --git a/lexlua/xml.lua b/lexlua/xml.lua index 3d6b59b63..3acee7d22 100644 --- a/lexlua/xml.lua +++ b/lexlua/xml.lua @@ -12,31 +12,29 @@ local ws = token(lexer.WHITESPACE, lexer.space^1) lex:add_rule('whitespace', ws) -- Comments and CDATA. -lex:add_rule('comment', token(lexer.COMMENT, '<!--' * (lexer.any - '-->')^0 * - P('-->')^-1)) -lex:add_rule('cdata', token('cdata', '<![CDATA[' * (lexer.any - ']]>')^0 * - P(']]>')^-1)) +lex:add_rule('comment', token(lexer.COMMENT, lexer.range('<!--', '-->'))) +lex:add_rule('cdata', token('cdata', lexer.range('<![CDATA[', ']]>'))) lex:add_style('cdata', lexer.STYLE_COMMENT) local alpha = R('az', 'AZ', '\127\255') local word_char = lexer.alnum + S('_-:.??') -local identifier = (alpha + S('_-:.??')) * word_char^0 +local identifier = (alpha + S('_-:.?')) * word_char^0 -- Doctypes and other markup tags. lex:add_rule('doctype', token('doctype', P('<!DOCTYPE')) * ws * - token('doctype', identifier) * (ws * identifier)^-1 * - (1 - P('>'))^0 * token('doctype', '>')) + token('doctype', identifier) * (ws * identifier)^-1 * (1 - P('>'))^0 * + token('doctype', '>')) lex:add_style('doctype', lexer.STYLE_COMMENT) -- Processing instructions. lex:add_rule('proc_insn', token('proc_insn', P('<?') * (1 - P('?>'))^0 * - P('?>')^-1)) + P('?>')^-1)) lex:add_style('proc_insn', lexer.STYLE_COMMENT) -- Elements. local namespace = token(lexer.OPERATOR, ':') * token('namespace', identifier) lex:add_rule('element', token('element', '<' * P('/')^-1 * identifier) * - namespace^-1) + namespace^-1) lex:add_style('element', lexer.STYLE_KEYWORD) lex:add_style('namespace', lexer.STYLE_CLASS) @@ -45,7 +43,7 @@ lex:add_rule('close_tag', token('element', P('/')^-1 * '>')) -- Attributes. lex:add_rule('attribute', token('attribute', identifier) * namespace^-1 * - #(lexer.space^0 * '=')) + #(lexer.space^0 * '=')) lex:add_style('attribute', lexer.STYLE_TYPE) -- TODO: performance is terrible on large files. @@ -61,14 +59,14 @@ end) --lex:add_rule('equal', token(lexer.OPERATOR, '=')) -- * in_tag -- Strings. +local sq_str = lexer.range("'", false, false) +local dq_str = lexer.range('"', false, false) lex:add_rule('string', #S('\'"') * lexer.last_char_includes('=') * - token(lexer.STRING, - lexer.delimited_range("'", false, true) + - lexer.delimited_range('"', false, true))) + token(lexer.STRING, sq_str + dq_str)) -- Numbers. lex:add_rule('number', #lexer.digit * lexer.last_char_includes('=') * - token(lexer.NUMBER, lexer.digit^1 * P('%')^-1))--*in_tag) + token(lexer.NUMBER, lexer.digit^1 * P('%')^-1))--*in_tag) -- Entities. lex:add_rule('entity', token('entity', '&' * word_match[[ |
