aboutsummaryrefslogtreecommitdiffhomepage
path: root/lexlua/yaml.lua
blob: fd70182fcdcfd5f6fce85590321a516ec473a922 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
-- Copyright 2006-2020 Mitchell mitchell.att.foicica.com. See License.txt.
-- YAML LPeg lexer.
-- It does not keep track of indentation perfectly.

local lexer = require('lexer')
local token, word_match = lexer.token, lexer.word_match
local P, R, S = lpeg.P, lpeg.R, lpeg.S

local M = {_NAME = 'yaml'}

-- Whitespace.
local indent = #lexer.starts_line(S(' \t')) *
  (token(lexer.WHITESPACE, ' ') + token('indent_error', '\t'))^1
local ws = token(lexer.WHITESPACE, S(' \t')^1 + lexer.newline^1)

-- Comments.
local comment = token(lexer.COMMENT, lexer.to_eol('#'))

-- Strings.
local sq_str = lexer.range("'")
local dq_str = lexer.range('"')
local string = token(lexer.STRING, sq_str + dq_str)

-- Numbers.
local integer = lexer.dec_num + lexer.hex_num + '0' * S('oO') * R('07')^1
local special_num = '.' * word_match({'inf', 'nan'}, nil, true)
local number = token(lexer.NUMBER, special_num + lexer.float + integer)

-- Timestamps.
local year = lexer.digit * lexer.digit * lexer.digit * lexer.digit
local month = lexer.digit * lexer.digit^-1
local day = lexer.digit * lexer.digit^-1
local date = year * '-' * month * '-' * day
local hours = lexer.digit * lexer.digit^-1
local minutes = lexer.digit * lexer.digit
local seconds = lexer.digit * lexer.digit
local fraction = '.' * lexer.digit^0
local time = hours * ':' * minutes * ':' * seconds * fraction^-1
local T = S(' \t')^1 + S('tT')
local zone = 'Z' + S(' \t')^0 * S('-+') * hours * (':' * minutes)^-1
local ts = token('timestamp', date * (T * time * zone^-1))

-- Constants.
local constant = token(lexer.CONSTANT, word_match({
  'null', 'true', 'false'
}, nil, true))

-- Types.
local type = token(lexer.TYPE, '!!' * word_match({
  -- Collection types.
  'map', 'omap', 'pairs', 'set', 'seq',
  -- Scalar types.
  'binary', 'bool', 'float', 'int', 'merge', 'null', 'str', 'timestamp',
  'value', 'yaml'
}, nil, true) + '!' * lexer.range('<', '>', true))

-- Document boundaries.
local doc_bounds = token('document', lexer.starts_line(P('---') + '...'))

-- Directives
local directive = token('directive', lexer.starts_line('%') *
  lexer.nonnewline^1)

local word = (lexer.alpha + '-' * -lexer.space) * (lexer.alnum + '-')^0

-- Keys and literals.
local colon = S(' \t')^0 * ':' * (lexer.space + -1)
local key = token(lexer.KEYWORD, #word * (lexer.nonnewline - colon)^1 * #colon *
  P(function(input, index)
    local line = input:sub(1, index - 1):match('[^\r\n]+$')
    return not line:find('[%w-]+:') and index
  end))
local value = #word * (lexer.nonnewline - lexer.space^0 * S(',]}'))^1
local block = S('|>') * S('+-')^-1 * (lexer.newline + -1) *
  function(input, index)
    local rest = input:sub(index)
    local level = #rest:match('^( *)')
    for pos, indent, line in rest:gmatch('() *()([^\r\n]+)') do
      if indent - pos < level and line ~= ' ' or level == 0 and pos > 1 then
        return index + pos - 1
      end
    end
    return #input + 1
  end
local literal = token('literal', value + block)

-- Indicators.
local anchor = token(lexer.LABEL, '&' * word)
local alias = token(lexer.VARIABLE, '*' * word)
local tag = token('tag', '!' * word * P('!')^-1)
local reserved = token(lexer.ERROR, S('@`') * word)
local indicator_chars = token(lexer.OPERATOR, S('-?:,[]{}!'))

M._rules = {
  {'indent', indent},
  {'whitespace', ws},
  {'comment', comment},
  {'doc_bounds', doc_bounds},
  {'key', key},
  {'string', string},
  {'literal', literal},
  {'timestamp', ts},
  {'number', number},
  {'constant', constant},
  {'type', type},
  {'indicator', tag + indicator_chars + alias + anchor + reserved},
  {'directive', directive},
}

M._tokenstyles = {
  indent_error = 'back:%(color.red)',
  document = lexer.STYLE_CONSTANT,
  literal = lexer.STYLE_DEFAULT,
  timestamp = lexer.STYLE_NUMBER,
  tag = lexer.STYLE_CLASS,
  directive = lexer.STYLE_PREPROCESSOR,
}

M._FOLDBYINDENTATION = true

return M