aboutsummaryrefslogtreecommitdiffhomepage
path: root/lexlua/sml.lua
diff options
context:
space:
mode:
authormitchell <unknown>2020-04-25 16:26:31 -0400
committermitchell <unknown>2020-04-25 16:26:31 -0400
commitfad15f79b1230b3076be515d6894c8919562809b (patch)
tree72c848ef02c3331de5ca54eff7adaea3a9a6fb88 /lexlua/sml.lua
parent1fd02a367dec125c0b49dd9246a0928433866b96 (diff)
downloadscintilla-mirror-fad15f79b1230b3076be515d6894c8919562809b.tar.gz
Reformatted Lua LPeg lexers and added new convenience functions and pattern.
`lexer.range()` replaces `lexer.delimited_range()` and `lexer.nested_pair()`. `lexer.to_eol()` replaces `patt * lexer.nonnewline^0` constructs. `lexer.number` replaces `lexer.float + lexer.integer`. Also added unit tests for lexer functions.
Diffstat (limited to 'lexlua/sml.lua')
-rw-r--r--lexlua/sml.lua34
1 files changed, 13 insertions, 21 deletions
diff --git a/lexlua/sml.lua b/lexlua/sml.lua
index e1d00cfe6..9aa4a6922 100644
--- a/lexlua/sml.lua
+++ b/lexlua/sml.lua
@@ -11,11 +11,11 @@ end
local ws = token(lexer.WHITESPACE, lexer.space^1)
-- single line comments are valid in successor ml
-local cl = '(*)' * lexer.nonnewline^0
-local comment = token(lexer.COMMENT, cl + lexer.nested_pair('(*', '*)'))
+local line_comment = lexer.to_eol('(*)')
+local block_comment = lexer.range('(*', '*)', false, false, true)
+local comment = token(lexer.COMMENT, line_comment + block_comment)
-local string = token(lexer.STRING, lpeg.P('#')^-1 *
- lexer.delimited_range('"', true))
+local string = token(lexer.STRING, lpeg.P('#')^-1 * lexer.range('"', true))
local function num(digit)
return digit * (digit^0 * lpeg.P('_'))^0 * digit^1 + digit
@@ -29,15 +29,10 @@ local real = int * frac^-1 * exp + int * frac * exp^-1
local hex = num(lexer.xdigit)
local bin = num(lpeg.S('01'))
-local number = token(lexer.NUMBER,
- lpeg.P('0w') * int
- + (lpeg.P('0wx') + lpeg.P('0xw')) * hex
- + (lpeg.P('0wb') + lpeg.P('0bw')) * bin
- + minus * lpeg.P('0x') * hex
- + minus * lpeg.P('0b') * bin
- + minus * real
- + minus * int
-)
+local number = token(lexer.NUMBER, lpeg.P('0w') * int +
+ (lpeg.P('0wx') + lpeg.P('0xw')) * hex +
+ (lpeg.P('0wb') + lpeg.P('0bw')) * bin + minus * lpeg.P('0x') * hex +
+ minus * lpeg.P('0b') * bin + minus * real + minus * int)
local keyword = token(lexer.KEYWORD, mlword{
'abstype', 'and', 'andalso', 'as', 'case', 'do', 'datatype', 'else', 'end',
@@ -51,7 +46,7 @@ local keyword = token(lexer.KEYWORD, mlword{
-- includes valid symbols for identifiers
local operator = token(lexer.OPERATOR,
- lpeg.S('!*/+-^:@=<>()[]{},;._|#%&$?~`\\'))
+ lpeg.S('!*/+-^:@=<>()[]{},;._|#%&$?~`\\'))
local type = token(lexer.TYPE, mlword{
'int', 'real', 'word', 'bool', 'char', 'string', 'unit',
@@ -78,14 +73,11 @@ local c = mlword{'true', 'false', 'nil'}
local const = token(lexer.CONSTANT, lexer.upper * id + c)
local structure = token(lexer.CLASS, aid * lpeg.P('.'))
-local open
- = token(lexer.KEYWORD, mlword{'open', 'structure', 'functor'})
- * ws * token(lexer.CLASS, longid)
+local open = token(lexer.KEYWORD, mlword{'open', 'structure', 'functor'}) * ws *
+ token(lexer.CLASS, longid)
-local struct_dec
- = token(lexer.KEYWORD, lpeg.P('structure')) * ws
- * token(lexer.CLASS, aid) * ws
- * token(lexer.OPERATOR, lpeg.P('=')) * ws
+local struct_dec = token(lexer.KEYWORD, lpeg.P('structure')) * ws *
+ token(lexer.CLASS, aid) * ws * token(lexer.OPERATOR, lpeg.P('=')) * ws
local struct_new = struct_dec * token(lexer.KEYWORD, lpeg.P('struct'))
local struct_alias = struct_dec * token(lexer.CLASS, longid)