Reformatted Lua LPeg lexers and added new convenience functions and pattern.

`lexer.range()` replaces `lexer.delimited_range()` and `lexer.nested_pair()`. `lexer.to_eol()` replaces `patt * lexer.nonnewline^0` constructs. `lexer.number` replaces `lexer.float + lexer.integer`. Also added unit tests for lexer functions.
author: mitchell <unknown> 2020-04-25 16:26:31 -0400
committer: mitchell <unknown> 2020-04-25 16:26:31 -0400
commit: fad15f79b1230b3076be515d6894c8919562809b (patch)
tree: 72c848ef02c3331de5ca54eff7adaea3a9a6fb88 /lexlua/xml.lua
parent: 1fd02a367dec125c0b49dd9246a0928433866b96 (diff)
download: scintilla-mirror-fad15f79b1230b3076be515d6894c8919562809b.tar.gz
1 files changed, 12 insertions, 14 deletions
diff --git a/lexlua/xml.lua b/lexlua/xml.lua
index 3d6b59b63..3acee7d22 100644
--- a/lexlua/xml.lua
+++ b/lexlua/xml.lua
@@ -12,31 +12,29 @@ local ws = token(lexer.WHITESPACE, lexer.space^1)
 lex:add_rule('whitespace', ws)
 
 -- Comments and CDATA.
-lex:add_rule('comment', token(lexer.COMMENT, '<!--' * (lexer.any - '-->')^0 *
-                                             P('-->')^-1))
-lex:add_rule('cdata', token('cdata', '<![CDATA[' * (lexer.any - ']]>')^0 *
-                                     P(']]>')^-1))
+lex:add_rule('comment', token(lexer.COMMENT, lexer.range('<!--', '-->')))
+lex:add_rule('cdata', token('cdata', lexer.range('<![CDATA[', ']]>')))
 lex:add_style('cdata', lexer.STYLE_COMMENT)
 
 local alpha = R('az', 'AZ', '\127\255')
 local word_char = lexer.alnum + S('_-:.??')
-local identifier = (alpha + S('_-:.??')) * word_char^0
+local identifier = (alpha + S('_-:.?')) * word_char^0
 
 -- Doctypes and other markup tags.
 lex:add_rule('doctype', token('doctype', P('<!DOCTYPE')) * ws *
-                        token('doctype', identifier) * (ws * identifier)^-1 *
-                        (1 - P('>'))^0 *  token('doctype', '>'))
+  token('doctype', identifier) * (ws * identifier)^-1 * (1 - P('>'))^0 *
+  token('doctype', '>'))
 lex:add_style('doctype', lexer.STYLE_COMMENT)
 
 -- Processing instructions.
 lex:add_rule('proc_insn', token('proc_insn', P('<?') * (1 - P('?>'))^0 *
-                                             P('?>')^-1))
+  P('?>')^-1))
 lex:add_style('proc_insn', lexer.STYLE_COMMENT)
 
 -- Elements.
 local namespace = token(lexer.OPERATOR, ':') * token('namespace', identifier)
 lex:add_rule('element', token('element', '<' * P('/')^-1 * identifier) *
-                        namespace^-1)
+  namespace^-1)
 lex:add_style('element', lexer.STYLE_KEYWORD)
 lex:add_style('namespace', lexer.STYLE_CLASS)
 
@@ -45,7 +43,7 @@ lex:add_rule('close_tag', token('element', P('/')^-1 * '>'))
 
 -- Attributes.
 lex:add_rule('attribute', token('attribute', identifier) * namespace^-1 *
-                          #(lexer.space^0 * '='))
+  #(lexer.space^0 * '='))
 lex:add_style('attribute', lexer.STYLE_TYPE)
 
 -- TODO: performance is terrible on large files.
@@ -61,14 +59,14 @@ end)
 --lex:add_rule('equal', token(lexer.OPERATOR, '=')) -- * in_tag
 
 -- Strings.
+local sq_str = lexer.range("'", false, false)
+local dq_str = lexer.range('"', false, false)
 lex:add_rule('string', #S('\'"') * lexer.last_char_includes('=') *
-                       token(lexer.STRING,
-                             lexer.delimited_range("'", false, true) +
-                             lexer.delimited_range('"', false, true)))
+  token(lexer.STRING, sq_str + dq_str))
 
 -- Numbers.
 lex:add_rule('number', #lexer.digit * lexer.last_char_includes('=') *
-                       token(lexer.NUMBER, lexer.digit^1 * P('%')^-1))--*in_tag)
+  token(lexer.NUMBER, lexer.digit^1 * P('%')^-1))--*in_tag)
 
 -- Entities.
 lex:add_rule('entity', token('entity', '&' * word_match[[
author	mitchell <unknown>	2020-04-25 16:26:31 -0400
committer	mitchell <unknown>	2020-04-25 16:26:31 -0400
commit	fad15f79b1230b3076be515d6894c8919562809b (patch)
tree	72c848ef02c3331de5ca54eff7adaea3a9a6fb88 /lexlua/xml.lua
parent	1fd02a367dec125c0b49dd9246a0928433866b96 (diff)
download	scintilla-mirror-fad15f79b1230b3076be515d6894c8919562809b.tar.gz