aboutsummaryrefslogtreecommitdiffhomepage
path: root/lexlua/sml.lua
diff options
context:
space:
mode:
authormitchell <unknown>2018-03-11 23:04:41 -0400
committermitchell <unknown>2018-03-11 23:04:41 -0400
commit519b7328b66c4c84f03893a31e4be5ba6b1395f2 (patch)
tree2055cd79006357e94c185f341d0df17b9a8769eb /lexlua/sml.lua
parentc0373e036e965a70045971e2abc582cb4bf12a4e (diff)
downloadscintilla-mirror-519b7328b66c4c84f03893a31e4be5ba6b1395f2.tar.gz
Added optional Lua lexer support.
This support is disabled by default and must be enabled via compile-time option.
Diffstat (limited to 'lexlua/sml.lua')
-rw-r--r--lexlua/sml.lua113
1 files changed, 113 insertions, 0 deletions
diff --git a/lexlua/sml.lua b/lexlua/sml.lua
new file mode 100644
index 000000000..4b8faf625
--- /dev/null
+++ b/lexlua/sml.lua
@@ -0,0 +1,113 @@
+-- Copyright 2017-2018 Murray Calavera. See License.txt.
+-- Standard ML LPeg lexer.
+
+local lexer = require('lexer')
+local token = lexer.token
+
+function mlword(words)
+ return lexer.word_match(words, "'")
+end
+
+local ws = token(lexer.WHITESPACE, lexer.space^1)
+
+-- single line comments are valid in successor ml
+local cl = '(*)' * lexer.nonnewline^0
+local comment = token(lexer.COMMENT, cl + lexer.nested_pair('(*', '*)'))
+
+local string = token(lexer.STRING, lpeg.P('#')^-1 *
+ lexer.delimited_range('"', true))
+
+local function num(digit)
+ return digit * (digit^0 * lpeg.P('_'))^0 * digit^1 + digit
+end
+
+local int = num(lexer.digit)
+local frac = lpeg.P('.') * int
+local minus = lpeg.P('~')^-1
+local exp = lpeg.S('eE') * minus * int
+local real = int * frac^-1 * exp + int * frac * exp^-1
+local hex = num(lexer.xdigit)
+local bin = num(lpeg.S('01'))
+
+local number = token(lexer.NUMBER,
+ lpeg.P('0w') * int
+ + (lpeg.P('0wx') + lpeg.P('0xw')) * hex
+ + (lpeg.P('0wb') + lpeg.P('0bw')) * bin
+ + minus * lpeg.P('0x') * hex
+ + minus * lpeg.P('0b') * bin
+ + minus * real
+ + minus * int
+)
+
+local keyword = token(lexer.KEYWORD, mlword{
+ 'abstype', 'and', 'andalso', 'as', 'case', 'do', 'datatype', 'else', 'end',
+ 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix', 'infixr', 'let',
+ 'local', 'nonfix', 'of', 'op', 'orelse', 'raise', 'rec', 'then',
+ 'type', 'val', 'with', 'withtype', 'while',
+
+ 'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature',
+ 'struct', 'structure'
+})
+
+-- includes valid symbols for identifiers
+local operator = token(lexer.OPERATOR,
+ lpeg.S('!*/+-^:@=<>()[]{},;._|#%&$?~`\\'))
+
+local type = token(lexer.TYPE, mlword{
+ 'int', 'real', 'word', 'bool', 'char', 'string', 'unit',
+ 'array', 'exn', 'list', 'option', 'order', 'ref', 'substring', 'vector'
+})
+
+-- `real`, `vector` and `substring` are a problem
+local func = token(lexer.FUNCTION, mlword{
+ 'app', 'before', 'ceil', 'chr', 'concat', 'exnMessage', 'exnName',
+ 'explode', 'floor', 'foldl', 'foldr', 'getOpt', 'hd', 'ignore',
+ 'implode', 'isSome', 'length', 'map', 'not', 'null', 'ord', 'print',
+ 'real', 'rev', 'round', 'size', 'str', 'substring', 'tl', 'trunc',
+ 'valOf', 'vector',
+ 'o', 'abs', 'mod', 'div'
+})
+
+-- non-symbolic identifiers only
+local id = (lexer.alnum + "'" + '_')^0
+local aid = lexer.alpha * id
+local longid = (aid * lpeg.P('.'))^0 * aid
+local identifier = token(lexer.IDENTIFIER, lexer.lower * id)
+local typevar = token(lexer.VARIABLE, "'" * id)
+local c = mlword{'true', 'false', 'nil'}
+local const = token(lexer.CONSTANT, lexer.upper * id + c)
+local structure = token(lexer.CLASS, aid * lpeg.P('.'))
+
+local open
+ = token(lexer.KEYWORD, mlword{'open', 'structure', 'functor'})
+ * ws * token(lexer.CLASS, longid)
+
+local struct_dec
+ = token(lexer.KEYWORD, lpeg.P('structure')) * ws
+ * token(lexer.CLASS, aid) * ws
+ * token(lexer.OPERATOR, lpeg.P('=')) * ws
+
+local struct_new = struct_dec * token(lexer.KEYWORD, lpeg.P('struct'))
+local struct_alias = struct_dec * token(lexer.CLASS, longid)
+
+local M = {_NAME = 'sml'}
+
+M._rules = {
+ {'whitespace', ws},
+ {'comment', comment},
+ {'number', number},
+ {'struct_new', struct_new},
+ {'struct_alias', struct_alias},
+ {'structure', structure},
+ {'open', open},
+ {'type', type},
+ {'keyword', keyword},
+ {'function', func},
+ {'string', string},
+ {'operator', operator},
+ {'typevar', typevar},
+ {'constant', const},
+ {'identifier', identifier},
+}
+
+return M