diff options
author | mitchell <unknown> | 2018-03-11 23:04:41 -0400 |
---|---|---|
committer | mitchell <unknown> | 2018-03-11 23:04:41 -0400 |
commit | 519b7328b66c4c84f03893a31e4be5ba6b1395f2 (patch) | |
tree | 2055cd79006357e94c185f341d0df17b9a8769eb /lexlua/sml.lua | |
parent | c0373e036e965a70045971e2abc582cb4bf12a4e (diff) | |
download | scintilla-mirror-519b7328b66c4c84f03893a31e4be5ba6b1395f2.tar.gz |
Added optional Lua lexer support.
This support is disabled by default and must be enabled via compile-time option.
Diffstat (limited to 'lexlua/sml.lua')
-rw-r--r-- | lexlua/sml.lua | 113 |
1 files changed, 113 insertions, 0 deletions
diff --git a/lexlua/sml.lua b/lexlua/sml.lua new file mode 100644 index 000000000..4b8faf625 --- /dev/null +++ b/lexlua/sml.lua @@ -0,0 +1,113 @@ +-- Copyright 2017-2018 Murray Calavera. See License.txt. +-- Standard ML LPeg lexer. + +local lexer = require('lexer') +local token = lexer.token + +function mlword(words) + return lexer.word_match(words, "'") +end + +local ws = token(lexer.WHITESPACE, lexer.space^1) + +-- single line comments are valid in successor ml +local cl = '(*)' * lexer.nonnewline^0 +local comment = token(lexer.COMMENT, cl + lexer.nested_pair('(*', '*)')) + +local string = token(lexer.STRING, lpeg.P('#')^-1 * + lexer.delimited_range('"', true)) + +local function num(digit) + return digit * (digit^0 * lpeg.P('_'))^0 * digit^1 + digit +end + +local int = num(lexer.digit) +local frac = lpeg.P('.') * int +local minus = lpeg.P('~')^-1 +local exp = lpeg.S('eE') * minus * int +local real = int * frac^-1 * exp + int * frac * exp^-1 +local hex = num(lexer.xdigit) +local bin = num(lpeg.S('01')) + +local number = token(lexer.NUMBER, + lpeg.P('0w') * int + + (lpeg.P('0wx') + lpeg.P('0xw')) * hex + + (lpeg.P('0wb') + lpeg.P('0bw')) * bin + + minus * lpeg.P('0x') * hex + + minus * lpeg.P('0b') * bin + + minus * real + + minus * int +) + +local keyword = token(lexer.KEYWORD, mlword{ + 'abstype', 'and', 'andalso', 'as', 'case', 'do', 'datatype', 'else', 'end', + 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix', 'infixr', 'let', + 'local', 'nonfix', 'of', 'op', 'orelse', 'raise', 'rec', 'then', + 'type', 'val', 'with', 'withtype', 'while', + + 'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature', + 'struct', 'structure' +}) + +-- includes valid symbols for identifiers +local operator = token(lexer.OPERATOR, + lpeg.S('!*/+-^:@=<>()[]{},;._|#%&$?~`\\')) + +local type = token(lexer.TYPE, mlword{ + 'int', 'real', 'word', 'bool', 'char', 'string', 'unit', + 'array', 'exn', 'list', 'option', 'order', 'ref', 'substring', 'vector' +}) + +-- `real`, `vector` and `substring` are a problem +local func = token(lexer.FUNCTION, mlword{ + 'app', 'before', 'ceil', 'chr', 'concat', 'exnMessage', 'exnName', + 'explode', 'floor', 'foldl', 'foldr', 'getOpt', 'hd', 'ignore', + 'implode', 'isSome', 'length', 'map', 'not', 'null', 'ord', 'print', + 'real', 'rev', 'round', 'size', 'str', 'substring', 'tl', 'trunc', + 'valOf', 'vector', + 'o', 'abs', 'mod', 'div' +}) + +-- non-symbolic identifiers only +local id = (lexer.alnum + "'" + '_')^0 +local aid = lexer.alpha * id +local longid = (aid * lpeg.P('.'))^0 * aid +local identifier = token(lexer.IDENTIFIER, lexer.lower * id) +local typevar = token(lexer.VARIABLE, "'" * id) +local c = mlword{'true', 'false', 'nil'} +local const = token(lexer.CONSTANT, lexer.upper * id + c) +local structure = token(lexer.CLASS, aid * lpeg.P('.')) + +local open + = token(lexer.KEYWORD, mlword{'open', 'structure', 'functor'}) + * ws * token(lexer.CLASS, longid) + +local struct_dec + = token(lexer.KEYWORD, lpeg.P('structure')) * ws + * token(lexer.CLASS, aid) * ws + * token(lexer.OPERATOR, lpeg.P('=')) * ws + +local struct_new = struct_dec * token(lexer.KEYWORD, lpeg.P('struct')) +local struct_alias = struct_dec * token(lexer.CLASS, longid) + +local M = {_NAME = 'sml'} + +M._rules = { + {'whitespace', ws}, + {'comment', comment}, + {'number', number}, + {'struct_new', struct_new}, + {'struct_alias', struct_alias}, + {'structure', structure}, + {'open', open}, + {'type', type}, + {'keyword', keyword}, + {'function', func}, + {'string', string}, + {'operator', operator}, + {'typevar', typevar}, + {'constant', const}, + {'identifier', identifier}, +} + +return M |