aboutsummaryrefslogtreecommitdiffhomepage
path: root/lexlua/yaml.lua
diff options
context:
space:
mode:
Diffstat (limited to 'lexlua/yaml.lua')
-rw-r--r--lexlua/yaml.lua120
1 files changed, 120 insertions, 0 deletions
diff --git a/lexlua/yaml.lua b/lexlua/yaml.lua
new file mode 100644
index 000000000..abfab8a60
--- /dev/null
+++ b/lexlua/yaml.lua
@@ -0,0 +1,120 @@
+-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt.
+-- YAML LPeg lexer.
+-- It does not keep track of indentation perfectly.
+
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+local M = {_NAME = 'yaml'}
+
+-- Whitespace.
+local indent = #lexer.starts_line(S(' \t')) *
+ (token(lexer.WHITESPACE, ' ') + token('indent_error', '\t'))^1
+local ws = token(lexer.WHITESPACE, S(' \t')^1 + lexer.newline^1)
+
+-- Comments.
+local comment = token(lexer.COMMENT, '#' * lexer.nonnewline^0)
+
+-- Strings.
+local string = token(lexer.STRING, lexer.delimited_range("'") +
+ lexer.delimited_range('"'))
+
+-- Numbers.
+local integer = lexer.dec_num + lexer.hex_num + '0' * S('oO') * R('07')^1
+local special_num = '.' * word_match({'inf', 'nan'}, nil, true)
+local number = token(lexer.NUMBER, special_num + lexer.float + integer)
+
+-- Timestamps.
+local ts = token('timestamp',
+ lexer.digit * lexer.digit * lexer.digit * lexer.digit * -- year
+ '-' * lexer.digit * lexer.digit^-1 * -- month
+ '-' * lexer.digit * lexer.digit^-1 * -- day
+ ((S(' \t')^1 + S('tT'))^-1 * -- separator
+ lexer.digit * lexer.digit^-1 * -- hour
+ ':' * lexer.digit * lexer.digit * -- minute
+ ':' * lexer.digit * lexer.digit * -- second
+ ('.' * lexer.digit^0)^-1 * -- fraction
+ ('Z' + -- timezone
+ S(' \t')^0 * S('-+') * lexer.digit * lexer.digit^-1 *
+ (':' * lexer.digit * lexer.digit)^-1)^-1)^-1)
+
+-- Constants.
+local constant = token(lexer.CONSTANT,
+ word_match({'null', 'true', 'false'}, nil, true))
+
+-- Types.
+local type = token(lexer.TYPE, '!!' * word_match({
+ -- Collection types.
+ 'map', 'omap', 'pairs', 'set', 'seq',
+ -- Scalar types.
+ 'binary', 'bool', 'float', 'int', 'merge', 'null', 'str', 'timestamp',
+ 'value', 'yaml'
+}, nil, true) + '!' * lexer.delimited_range('<>'))
+
+-- Document boundaries.
+local doc_bounds = token('document', lexer.starts_line(P('---') + '...'))
+
+-- Directives
+local directive = token('directive', lexer.starts_line('%') *
+ lexer.nonnewline^1)
+
+local word = (lexer.alpha + '-' * -lexer.space) * (lexer.alnum + '-')^0
+
+-- Keys and literals.
+local colon = S(' \t')^0 * ':' * (lexer.space + -1)
+local key = token(lexer.KEYWORD,
+ #word * (lexer.nonnewline - colon)^1 * #colon *
+ P(function(input, index)
+ local line = input:sub(1, index - 1):match('[^\r\n]+$')
+ return not line:find('[%w-]+:') and index
+ end))
+local value = #word * (lexer.nonnewline - lexer.space^0 * S(',]}'))^1
+local block = S('|>') * S('+-')^-1 * (lexer.newline + -1) *
+ function(input, index)
+ local rest = input:sub(index)
+ local level = #rest:match('^( *)')
+ for pos, indent, line in rest:gmatch('() *()([^\r\n]+)') do
+ if indent - pos < level and line ~= ' ' or
+ level == 0 and pos > 1 then
+ return index + pos - 1
+ end
+ end
+ return #input + 1
+ end
+local literal = token('literal', value + block)
+
+-- Indicators.
+local anchor = token(lexer.LABEL, '&' * word)
+local alias = token(lexer.VARIABLE, '*' * word)
+local tag = token('tag', '!' * word * P('!')^-1)
+local reserved = token(lexer.ERROR, S('@`') * word)
+local indicator_chars = token(lexer.OPERATOR, S('-?:,[]{}!'))
+
+M._rules = {
+ {'indent', indent},
+ {'whitespace', ws},
+ {'comment', comment},
+ {'doc_bounds', doc_bounds},
+ {'key', key},
+ {'literal', literal},
+ {'timestamp', ts},
+ {'number', number},
+ {'constant', constant},
+ {'type', type},
+ {'indicator', tag + indicator_chars + alias + anchor + reserved},
+ {'directive', directive},
+}
+
+M._tokenstyles = {
+ indent_error = 'back:%(color.red)',
+ document = lexer.STYLE_CONSTANT,
+ literal = lexer.STYLE_DEFAULT,
+ timestamp = lexer.STYLE_NUMBER,
+ tag = lexer.STYLE_CLASS,
+ directive = lexer.STYLE_PREPROCESSOR,
+}
+
+M._FOLDBYINDENTATION = true
+
+return M