2 files changed, 1151 insertions, 1 deletions
diff --git a/check.mak b/check.mak
index 5bcc2f1c2..995321ce7 100644
--- a/check.mak
+++ b/check.mak
@@ -133,7 +133,9 @@ clean:
 	rm -rf /tmp/scintilla
 
 .PHONY: test
-test: | /tmp/scintilla ; make -C $|/test/unit CXX=g++ clean test
+test: | /tmp/scintilla
+	make -C $|/test/unit CXX=g++ clean test
+	cd $|/test && lua5.1 test_lexlua.lua
 
 releasedir = /tmp/scintilla$(shell grep -o '[0-9]\+' version.txt)
 $(releasedir): ; hg archive $@
diff --git a/test/test_lexlua.lua b/test/test_lexlua.lua
new file mode 100644
index 000000000..3c4cdea40
--- /dev/null
+++ b/test/test_lexlua.lua
@@ -0,0 +1,1148 @@
+-- Copyright 2017-2018 Mitchell mitchell.att.foicica.com. See License.txt.
+-- Unit tests for Lua LPeg lexers, but without using the Scintilla lexer.
+
+package.path = '../lexlua/?.lua;'..package.path
+
+local lexer = require('lexer')
+local token, word_match = lexer.token, lexer.word_match
+local lpeg = require('lpeg')
+-- The Scintilla LPeg lexer normally defines these.
+lexer.FOLD_BASE, lexer.FOLD_HEADER, lexer.FOLD_BLANK = 0x400, 0x2000, 0x1000
+
+-- Helper assert functions.
+
+-- Asserts the given lexer contains the default LPeg lexer and Scintilla styles,
+-- and that those styles are correctly numbered. LPeg lexer style numbers start
+-- at 0 while Scintilla styles start at 32.
+-- Note: the style tables used are copied from lexer.lua since they are local to
+-- that file.
+-- @param lex The lexer to style-check.
+function assert_default_styles(lex)
+  local default_styles = {
+    'nothing', 'whitespace', 'comment', 'string', 'number', 'keyword',
+    'identifier', 'operator', 'error', 'preprocessor', 'constant', 'variable',
+    'function', 'class', 'type', 'label', 'regex', 'embedded'
+  }
+  for i = 1, #default_styles do
+    local style = default_styles[i]
+    assert(lex._TOKENSTYLES[style],
+           string.format("style '%s' does not exist", style))
+    assert(lex._TOKENSTYLES[style] == i - 1, 'default styles out of order')
+  end
+  local predefined_styles = {
+    'default', 'linenumber', 'bracelight', 'bracebad', 'controlchar',
+    'indentguide', 'calltip', 'folddisplaytext'
+  }
+  for i = 1, #predefined_styles do
+    local style = predefined_styles[i]
+    assert(lex._TOKENSTYLES[style],
+           string.format("style '%s' does not exist", style))
+    assert(lex._TOKENSTYLES[style] == i + 31, 'predefined styles out of order')
+  end
+end
+
+-- Asserts the given lexer has the given style assigned to the given style name.
+-- @param lex The lexer to style-check.
+-- @param style_name The name of the style to check for.
+-- @param style The style's expected Scintilla style string.
+function assert_style(lex, style_name, style)
+  assert(lex._TOKENSTYLES[style_name],
+         string.format("style '%s' does not exist", style_name))
+  assert(lex._EXTRASTYLES[style_name] == style,
+         string.format("'%s' ~= '%s'", lex._EXTRASTYLES[style_name], style))
+end
+
+-- Asserts the given lexer contains the given ordered list of rules.
+-- @param lex The lexer to rule-check.
+-- @param rules The ordered list of rule names the lexer should have.
+function assert_rules(lex, rules)
+  local j = 1
+  for i = 1, #lex._RULEORDER do
+    assert(lex._RULES[rules[j]],
+           string.format("rule '%s' does not exist", rules[j]))
+    assert(lex._RULEORDER[i] == rules[j],
+           string.format("'%s' ~= '%s'", lex._RULEORDER[i], rules[i] or ''))
+    j = j + 1
+  end
+  if #lex._RULEORDER ~= #rules then
+    error(string.format("'%s' rule not found", rules[j]))
+  end
+end
+
+-- Asserts the given lexer contains the given set of extra styles in addition to
+-- its defaults.
+-- @param lex The lexer to style-check.
+-- @param styles The list of extra style names the lexer should have.
+function assert_extra_styles(lex, styles)
+  for i = 1, #styles do
+    assert(lex._TOKENSTYLES[styles[i]],
+           string.format("'%s' not found", styles[i]))
+    assert(lex._EXTRASTYLES[styles[i]],
+           string.format("'%s' not found", styles[i]))
+  end
+end
+
+-- Asserts the given lexer contains the given set of child lexer names.
+-- @param lex The lexer to child-check.
+-- @param children The list of child lexer names the lexer should have.
+function assert_children(lex, children)
+  local j = 1
+  for i = 1, #lex._CHILDREN do
+    assert(lex._CHILDREN[i]._NAME == children[j],
+           string.format("'%s' ~= '%s'", lex._CHILDREN[i]._NAME,
+                         children[j] or ''))
+    j = j + 1
+  end
+  if #lex._CHILDREN ~= #children then
+    error(string.format("child '%s' not found", children[j]))
+  end
+end
+
+-- Asserts the given lexer produces the given tokens after lexing the given
+-- code.
+-- @param lex The lexer to use.
+-- @param code The string code to lex.
+-- @param expected_tokens The list of expected tokens from the lexer. Each token
+--   is a table that contains the token's name followed by the substring of code
+--   matched. Whitespace tokens are ignored for the sake of simplicity. Do not
+--   include them.
+-- @param initial_style Optional current style. This is used for determining
+--   which language to start in in a multiple-language lexer.
+-- @usage assert_lex(lua, "print('hi')", {{'function', 'print'},
+--   {'operator', '('}, {'string', "'hi'"}, {'operator', ')'}})
+function assert_lex(lex, code, expected_tokens, initial_style)
+  if lex._lexer then lex = lex._lexer end -- note: lexer.load() does this
+  local tokens = lex:lex(code, initial_style or
+                               lex._TOKENSTYLES[lex._NAME..'_whitespace'])
+  local j = 1
+  for i = 1, #tokens, 2 do
+    if not tokens[i]:find('whitespace$') then
+      local token = tokens[i]
+      local text = code:sub(tokens[i - 1] or 0, tokens[i + 1] - 1)
+      assert(token == expected_tokens[j][1] and text == expected_tokens[j][2],
+             string.format("('%s', '%s') ~= ('%s', '%s')", token, text,
+                           expected_tokens[j][1], expected_tokens[j][2]))
+      j = j + 1
+    end
+  end
+  if j - 1 ~= #expected_tokens then
+    error(string.format("('%s', '%s') not found", expected_tokens[j][1],
+                        expected_tokens[j][2]))
+  end
+end
+
+-- Asserts the given lexer produces the given fold points after lexing the
+-- given code.
+-- @param lex The lexer to use.
+-- @param code The string code to fold.
+-- @param expected_fold_points The list of expected fold points from the lexer.
+--   Each fold point is just a line number, starting from 1.
+-- @param initial_style Optional current style. This is used for determining
+--   which language to start in in a multiple-language lexer.
+-- @return fold levels for any further analysis
+-- @usage assert_fold_points(lua, "if foo then\n  bar\nend", {1})
+function assert_fold_points(lex, code, expected_fold_points, initial_style)
+  if lex._lexer then lex = lex._lexer end -- note: lexer.load() does this
+  -- Since `M.style_at()` is provided by Scintilla and not available for tests,
+  -- create it, using data from `lexer.lex()`.
+  local tokens = lex:lex(code, initial_style or
+                               lex._TOKENSTYLES[lex._NAME..'_whitespace'])
+  lexer.style_at = setmetatable({}, {__index = function(self, pos)
+    for i = 2, #tokens, 2 do
+      if pos < tokens[i] then return tokens[i - 1] end
+    end
+  end})
+  if not lexer.property then -- Scintilla normally creates this
+    lexer.property, lexer.property_int = {}, setmetatable({}, {
+      __index = function(t, k) return tonumber(lexer.property[k]) or 0 end,
+      __newindex = function() error('read-only property') end
+    })
+  end
+  lexer.property['fold'] = 1
+  local levels = lex:fold(code, 0, 1, lexer.FOLD_BASE)
+  local j = 1
+  for i = 1, #levels do
+    if i == expected_fold_points[j] then
+      assert(levels[i] >= lexer.FOLD_HEADER,
+             string.format("line %i not a fold point", i))
+      j = j + 1
+    else
+      assert(levels[i] <= lexer.FOLD_HEADER,
+             string.format("line %i is a fold point", i))
+    end
+  end
+  assert(j - 1 == #expected_fold_points,
+         string.format("line %i is not a fold point", j))
+  return levels
+end
+
+-- Unit tests.
+
+-- Tests a basic lexer with a few simple rules and no custom styles.
+function test_basics()
+  local lex = lexer.new('test')
+  assert_default_styles(lex)
+  lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
+  lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[foo bar baz]]))
+  lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"')))
+  lex:add_rule('number', token(lexer.NUMBER, lexer.integer))
+  local code = [[foo bar baz "foo bar baz" 123]]
+  local tokens = {
+    {lexer.KEYWORD, 'foo'},
+    {lexer.KEYWORD, 'bar'},
+    {lexer.KEYWORD, 'baz'},
+    {lexer.STRING, '"foo bar baz"'},
+    {lexer.NUMBER, '123'}
+  }
+  assert_lex(lex, code, tokens)
+end
+
+-- Tests that lexer rules are added in an ordered sequence and that
+-- modifying rules in place works as expected.
+function test_rule_order()
+  local lex = lexer.new('test')
+  lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
+  lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
+  lex:add_rule('keyword', token(lexer.KEYWORD, lpeg.P('foo')))
+  local code = [[foo bar]]
+  local tokens = {
+    {lexer.IDENTIFIER, 'foo'},
+    {lexer.IDENTIFIER, 'bar'}
+  }
+  assert_lex(lex, code, tokens)
+
+  -- Modify the identifier rule to not catch keywords.
+  lex:modify_rule('identifier', token(lexer.IDENTIFIER,
+                                      -lpeg.P('foo') * lexer.word))
+  tokens = {
+    {lexer.KEYWORD, 'foo'},
+    {lexer.IDENTIFIER, 'bar'},
+  }
+  assert_lex(lex, code, tokens)
+end
+
+-- Tests a basic lexer with a couple of simple rules and a custom style.
+function test_add_style()
+  local lex = lexer.new('test')
+  assert_default_styles(lex)
+  lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
+  lex:add_rule('keyword', token('custom', word_match[[foo bar baz]]))
+  lex:add_style('custom', lexer.STYLE_KEYWORD)
+  assert_default_styles(lex)
+  assert_style(lex, 'custom', lexer.STYLE_KEYWORD)
+  local code = [[foo bar baz]]
+  local tokens = {
+    {'custom', 'foo'},
+    {'custom', 'bar'},
+    {'custom', 'baz'}
+  }
+  assert_lex(lex, code, tokens)
+end
+
+-- Tests a simple parent lexer embedding a simple child lexer.
+-- Ensures the child's custom styles are also copied over.
+function test_embed()
+  -- Create the parent lexer.
+  -- Note: lexer.load() sets lexer.WHITESPACE and adds the custom whitespace
+  -- style.
+  local parent = lexer.new('parent')
+  assert_default_styles(parent)
+  lexer.WHITESPACE = parent._NAME..'_whitespace'
+  parent:add_style(lexer.WHITESPACE, lexer.STYLE_WHITESPACE)
+  assert_style(parent, parent._NAME..'_whitespace', lexer.STYLE_WHITESPACE)
+  parent:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
+  parent:add_rule('identifier', token('parent', lexer.word))
+  parent:add_style('parent', lexer.STYLE_IDENTIFIER)
+  assert_style(parent, 'parent', lexer.STYLE_IDENTIFIER)
+
+  -- Create the child lexer.
+  -- Note: lexer.load() sets lexer.WHITESPACE and adds the custom whitespace
+  -- style.
+  local child = lexer.new('child')
+  assert_default_styles(child)
+  lexer.WHITESPACE = child._NAME..'_whitespace'
+  child:add_style(lexer.WHITESPACE, lexer.STYLE_WHITESPACE)
+  assert_style(child, child._NAME..'_whitespace', lexer.STYLE_WHITESPACE)
+  child:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
+  child:add_rule('number', token('child', lexer.integer))
+  child:add_style('child', lexer.STYLE_NUMBER)
+  assert_style(child, 'child', lexer.STYLE_NUMBER)
+
+  -- Assert the child's styles are not embedded in the parent yet.
+  assert(not parent._TOKENSTYLES[child._NAME..'_whitespace'])
+  assert(not parent._EXTRASTYLES[child._NAME..'_whitespace'])
+  assert(not parent._TOKENSTYLES['child'])
+  assert(not parent._EXTRASTYLES['child'])
+
+  -- Embed the child into the parent and verify the child's styles were copied
+  -- over.
+  local start_rule = token('transition', lpeg.P('['))
+  local end_rule = token('transition', lpeg.P(']'))
+  parent:embed(child, start_rule, end_rule)
+  parent:add_style('transition', lexer.STYLE_EMBEDDED)
+  assert_default_styles(parent)
+  assert_style(parent, parent._NAME..'_whitespace', lexer.STYLE_WHITESPACE)
+  assert_style(parent, 'parent', lexer.STYLE_IDENTIFIER)
+  assert_style(parent, 'transition', lexer.STYLE_EMBEDDED)
+  assert_style(parent, child._NAME..'_whitespace', lexer.STYLE_WHITESPACE)
+  assert_style(parent, 'child', lexer.STYLE_NUMBER)
+
+  -- Lex some parent -> child -> parent code.
+  local code = [[foo [1, 2, 3] bar]]
+  local tokens = {
+    {'parent', 'foo'},
+    {'transition', '['},
+    {'child', '1'},
+    {lexer.DEFAULT, ','},
+    {'child', '2'},
+    {lexer.DEFAULT, ','},
+    {'child', '3'},
+    {'transition', ']'},
+    {'parent', 'bar'}
+  }
+  assert_lex(parent, code, tokens)
+
+  -- Lex some child -> parent code, starting from within the child.
+  code = [[2, 3] bar]]
+  tokens = {
+    {'child', '2'},
+    {lexer.DEFAULT, ','},
+    {'child', '3'},
+    {'transition', ']'},
+    {'parent', 'bar'}
+  }
+  local initial_style = parent._TOKENSTYLES[child._NAME..'_whitespace']
+  assert_lex(parent, code, tokens, initial_style)
+end
+
+-- Tests a simple child lexer embedding itself within a simple parent lexer.
+-- Ensures the child's custom styles are also copied over.
+function test_embed_into()
+  -- Create the child lexer.
+  -- Note: lexer.load() sets lexer.WHITESPACE and adds the custom whitespace
+  -- style.
+  local child = lexer.new('child')
+  lexer.WHITESPACE = child._NAME..'_whitespace'
+  child:add_style(lexer.WHITESPACE, lexer.STYLE_WHITESPACE)
+  child:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
+  child:add_rule('number', token('child', lexer.integer))
+  child:add_style('child', lexer.STYLE_NUMBER)
+
+  -- Create the parent lexer.
+  -- Note: lexer.load() sets lexer.WHITESPACE and adds the custom whitespace
+  -- style.
+  local parent = lexer.new('parent')
+  lexer.WHITESPACE = parent._NAME..'_whitespace'
+  parent:add_style(lexer.WHITESPACE, lexer.STYLE_WHITESPACE)
+  parent:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
+  parent:add_rule('identifier', token('parent', lexer.word))
+  parent:add_style('parent', lexer.STYLE_IDENTIFIER)
+
+  -- Embed the child within the parent and verify the child's custom styles were
+  -- copied over.
+  local start_rule = token('transition', lpeg.P('['))
+  local end_rule = token('transition', lpeg.P(']'))
+  parent:embed(child, start_rule, end_rule)
+  parent:add_style('transition', lexer.STYLE_EMBEDDED)
+  assert_default_styles(parent)
+  assert_style(parent, parent._NAME..'_whitespace', lexer.STYLE_WHITESPACE)
+  assert_style(parent, 'parent', lexer.STYLE_IDENTIFIER)
+  assert_style(parent, 'transition', lexer.STYLE_EMBEDDED)
+  assert_style(parent, child._NAME..'_whitespace', lexer.STYLE_WHITESPACE)
+  assert_style(parent, 'child', lexer.STYLE_NUMBER)
+
+  -- Verify any subsequent style additions to the child are copied to the
+  -- parent.
+  child:add_style('extra_style', lexer.STYLE_COMMENT)
+  assert_style(parent, 'extra_style', lexer.STYLE_COMMENT)
+
+  -- Verify any subsequent fold point additions to the child are copied to the
+  -- parent.
+  child:add_fold_point('transition', '[', ']')
+  assert(parent._FOLDPOINTS['transition']['['] == 1)
+  assert(parent._FOLDPOINTS['transition'][']'] == -1)
+
+  -- Lex some parent -> child -> parent code.
+  local code = [[foo [1, 2, 3] bar]]
+  local tokens = {
+    {'parent', 'foo'},
+    {'transition', '['},
+    {'child', '1'},
+    {lexer.DEFAULT, ','},
+    {'child', '2'},
+    {lexer.DEFAULT, ','},
+    {'child', '3'},
+    {'transition', ']'},
+    {'parent', 'bar'}
+  }
+  assert_lex(child, code, tokens)
+
+  -- Lex some child -> parent code, starting from within the child.
+  code = [[2, 3] bar]]
+  tokens = {
+    {'child', '2'},
+    {lexer.DEFAULT, ','},
+    {'child', '3'},
+    {'transition', ']'},
+    {'parent', 'bar'}
+  }
+  local initial_style = parent._TOKENSTYLES[child._NAME..'_whitespace']
+  assert_lex(child, code, tokens, initial_style)
+
+  -- Fold some code.
+  code = [[
+    foo [
+      1, 2, 3
+    ] bar
+    baz
+  ]]
+  local folds = {1}
+  local levels = assert_fold_points(child, code, folds)
+  assert(levels[3] > levels[4]) -- verify ']' is fold end point
+end
+
+-- Tests a proxy lexer that inherits from a simple parent lexer and embeds a
+-- simple child lexer.
+-- Ensures both the proxy's and child's custom styles are also copied over.
+function test_proxy()
+  -- Create the parent lexer.
+  -- Note: lexer.load() sets lexer.WHITESPACE and adds the custom whitespace
+  -- style.
+  local parent = lexer.new('parent')
+  lexer.WHITESPACE = parent._NAME..'_whitespace'
+  parent:add_style(lexer.WHITESPACE, lexer.STYLE_WHITESPACE)
+  parent:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
+  parent:add_rule('identifier', token('parent', lexer.word))
+  parent:add_style('parent', lexer.STYLE_IDENTIFIER)
+
+  -- Create the child lexer.
+  -- Note: lexer.load() sets lexer.WHITESPACE and adds the custom whitespace
+  -- style.
+  local child = lexer.new('child')
+  lexer.WHITESPACE = child._NAME..'_whitespace'
+  child:add_style(lexer.WHITESPACE, lexer.STYLE_WHITESPACE)
+  child:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
+  child:add_rule('number', token('child', lexer.integer))
+  child:add_style('child', lexer.STYLE_NUMBER)
+
+  -- Create the proxy lexer.
+  local proxy = lexer.new('proxy', {inherit = parent})
+
+  -- Embed the child into the parent and verify the proxy's custom style was
+  -- copied over.
+  local start_rule = token('transition', lpeg.P('['))
+  local end_rule = token('transition', lpeg.P(']'))
+  proxy:embed(child, start_rule, end_rule)
+  proxy:add_style('transition', lexer.STYLE_EMBEDDED)
+  assert_style(parent, 'transition', lexer.STYLE_EMBEDDED)
+
+  -- Verify any subsequent style additions to the proxy are copied to the
+  -- parent.
+  proxy:add_style('extra_style', lexer.STYLE_COMMENT)
+  assert_style(parent, 'extra_style', lexer.STYLE_COMMENT)
+
+  -- Lex some parent -> child -> parent code.
+  local code = [[foo [1, 2, 3] bar]]
+  local tokens = {
+    {'parent', 'foo'},
+    {'transition', '['},
+    {'child', '1'},
+    {lexer.DEFAULT, ','},
+    {'child', '2'},
+    {lexer.DEFAULT, ','},
+    {'child', '3'},
+    {'transition', ']'},
+    {'parent', 'bar'}
+  }
+  assert_lex(proxy, code, tokens)
+
+  -- Lex some child -> parent code, starting from within the child.
+  code = [[ 2, 3] bar]]
+  tokens = {
+    {'child', '2'},
+    {lexer.DEFAULT, ','},
+    {'child', '3'},
+    {'transition', ']'},
+    {'parent', 'bar'}
+  }
+  local initial_style = parent._TOKENSTYLES[child._NAME..'_whitespace']
+  assert_lex(proxy, code, tokens, initial_style)
+
+  -- Verify any subsequent fold point additions to the proxy are copied to
+  -- the parent.
+  proxy:add_fold_point('transition', '[', ']')
+  assert(parent._FOLDPOINTS['transition']['['] == 1)
+  assert(parent._FOLDPOINTS['transition'][']'] == -1)
+
+  -- Fold some code.
+  code = [[
+    foo [
+      1, 2, 3
+    ] bar
+    baz
+  ]]
+  local folds = {1}
+  local levels = assert_fold_points(proxy, code, folds)
+  assert(levels[3] > levels[4]) -- verify ']' is fold end point
+end
+
+-- Tests a lexer that inherits from another one.
+function test_inherits_rules()
+  local lex = lexer.new('test')
+  lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
+  lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[foo bar baz]]))
+
+  -- Verify inherited rules are used.
+  local sublexer = lexer.new('test2', {inherit = lex})
+  local code = [[foo bar baz]]
+  local tokens = {
+    {lexer.KEYWORD, 'foo'},
+    {lexer.KEYWORD, 'bar'},
+    {lexer.KEYWORD, 'baz'}
+  }
+  assert_lex(sublexer, code, tokens)
+
+  -- Verify subsequently added rules are also used.
+  sublexer:add_rule('keyword2', token(lexer.KEYWORD, lpeg.P('quux')))
+  code = [[foo bar baz quux]]
+  tokens = {
+    {lexer.KEYWORD, 'foo'},
+    {lexer.KEYWORD, 'bar'},
+    {lexer.KEYWORD, 'baz'},
+    {lexer.KEYWORD, 'quux'}
+  }
+  assert_lex(sublexer, code, tokens)
+end
+
+-- Tests that fold words are folded properly, even if fold words are substrings
+-- of others (e.g. "if" and "endif").
+function test_fold_words()
+  local lex = lexer.new('test')
+  lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[if endif]]))
+  lex:add_fold_point(lexer.KEYWORD, 'if', 'endif')
+
+  local code = [[
+    if foo
+      bar
+    endif
+    ifbaz
+    quuxif
+  ]]
+  local folds = {1}
+  local levels = assert_fold_points(lex, code, folds)
+  assert(levels[2] == lexer.FOLD_BASE + 1)
+  assert(levels[3] == lexer.FOLD_BASE + 1)
+  assert(levels[4] == lexer.FOLD_BASE)
+end
+
+-- Tests folding by indentation.
+function test_fold_by_indentation()
+  local lex = lexer.new('test', {fold_by_indentation = true})
+  local code = [[
+    if foo:
+      bar
+    else:
+      baz
+  ]]
+  lexer.fold_level = {[0] = lexer.FOLD_BASE} -- Scintilla normally creates this
+  lexer.indent_amount = {[0] = 0} -- Scintilla normally creates this
+  local folds = {1, 3}
+  assert_fold_points(lex, code, folds)
+end
+
+function test_legacy()
+  local lex = {_NAME = 'test'}
+  lex._rules = {
+    {'whitespace', token(lexer.WHITESPACE, lexer.space^1)},
+    {'keyword', token(lexer.KEYWORD, word_match{'foo', 'bar', 'baz'})},
+    {'custom', token('custom', lpeg.P('quux'))}
+  }
+  lex._tokenstyles = {custom = lexer.STYLE_CONSTANT}
+  lex._foldsymbols = {
+    _patterns = {'%l+'},
+    [lexer.KEYWORD] = {foo = 1, baz = -1}
+  }
+  -- The following comes from `process_legacy_lexer()`.
+  local default = {
+    'nothing', 'whitespace', 'comment', 'string', 'number', 'keyword',
+    'identifier', 'operator', 'error', 'preprocessor', 'constant', 'variable',
+    'function', 'class', 'type', 'label', 'regex', 'embedded'
+  }
+  local predefined = {
+    'default', 'linenumber', 'bracelight', 'bracebad', 'controlchar',
+    'indentguide', 'calltip', 'folddisplaytext'
+  }
+  local token_styles = {}
+  for i = 1, #default do token_styles[default[i]] = i - 1 end
+  for i = 1, #predefined do token_styles[predefined[i]] = i + 31 end
+  lex._TOKENSTYLES, lex._numstyles = token_styles, #default
+  lex._EXTRASTYLES = {}
+  assert_default_styles(lex)
+  setmetatable(lex, getmetatable(lexer.new('')))
+  for i = 1, #lex._rules do lex:add_rule(lex._rules[i][1], lex._rules[i][2]) end
+
+  local code = [[
+    foo
+      bar
+    baz
+    quux
+  ]]
+  local tokens = {
+    {'keyword', 'foo'},
+    {'keyword', 'bar'},
+    {'keyword', 'baz'},
+    {'custom', 'quux'}
+  }
+  assert_lex(lex, code, tokens)
+end
+
+-- Tests that all lexers load and lex text.
+function test_loads()
+  local p = io.popen('ls -1 ../lexlua/*.lua')
+  local files = p:read('*a')
+  p:close()
+  for file in files:gmatch('[^\n]+') do
+    local lex_name = file:match('^%.%./lexlua/(.+)%.lua$')
+    if lex_name ~= 'lexer' then
+      local lex = lexer.load(lex_name, nil, true)
+      assert_default_styles(lex)
+      local tokens = lex:lex('test')
+      assert(#tokens >= 2)
+    end
+  end
+end
+
+-- Tests the Lua lexer.
+function test_lua()
+  local lua = lexer.load('lua')
+  assert(lua._NAME == 'lua')
+  assert_default_styles(lua)
+  local rules = {
+    'whitespace', 'keyword', 'function', 'constant', 'library', 'identifier',
+    'string', 'comment', 'number', 'label', 'operator'
+  }
+  assert_rules(lua, rules)
+  local styles = {
+    'deprecated_function', 'library', 'deprecated_library', 'longstring',
+    'lua_whitespace' -- language-specific whitespace for multilang lexers
+  }
+  assert_extra_styles(lua, styles)
+
+  -- Lexing tests.
+  local code = [=[
+    -- Comment.
+    ::begin::
+    local a = 1 + 2.0e3 - 0x40
+    local b = "two"..[[three]]
+    _G.print(a, string.upper(b))
+  ]=]
+  local tokens = {
+    {lexer.COMMENT, '-- Comment.'},
+    {lexer.LABEL, '::begin::'},
+    {lexer.KEYWORD, 'local'},
+    {lexer.IDENTIFIER, 'a'},
+    {lexer.OPERATOR, '='},
+    {lexer.NUMBER, '1'},
+    {lexer.OPERATOR, '+'},
+    {lexer.NUMBER, '2.0e3'},
+    {lexer.OPERATOR, '-'},
+    {lexer.NUMBER, '0x40'},
+    {lexer.KEYWORD, 'local'},
+    {lexer.IDENTIFIER, 'b'},
+    {lexer.OPERATOR, '='},
+    {lexer.STRING, '"two"'},
+    {lexer.OPERATOR, '..'},
+    {'longstring', '[[three]]'},
+    {lexer.CONSTANT, '_G'},
+    {lexer.OPERATOR, '.'},
+    {lexer.FUNCTION, 'print'},
+    {lexer.OPERATOR, '('},
+    {lexer.IDENTIFIER, 'a'},
+    {lexer.OPERATOR, ','},
+    {'library', 'string.upper'},
+    {lexer.OPERATOR, '('},
+    {lexer.IDENTIFIER, 'b'},
+    {lexer.OPERATOR, ')'},
+    {lexer.OPERATOR, ')'}
+  }
+  assert_lex(lua, code, tokens)
+
+  -- Folding tests.
+  code = [=[
+    if foo then
+      bar
+    end
+    for k, v in pairs(foo) do
+      bar
+    end
+    function foo(bar)
+      baz
+    end
+    repeat
+      foo
+    until bar
+    --[[
+      foo
+    ]]
+    (foo,
+     bar,
+     baz)
+    {foo,
+     bar,
+     baz}
+  ]=]
+  local folds = {1, 4, 7, 10, 13, 16, 19}
+  assert_fold_points(lua, code, folds)
+end
+
+-- Tests the C lexer.
+function test_c()
+  local c = lexer.load('ansi_c')
+  assert(c._NAME == 'ansi_c')
+  assert_default_styles(c)
+
+  -- Lexing tests.
+  local code = ([[
+    /* Comment. */
+    #include <stdlib.h>
+    #include "lua.h"
+    int main(int argc, char **argv) {
+      if (NULL);
+      return 0;
+    }
+  ]]):gsub('    ', '') -- strip indent
+  local tokens = {
+    {lexer.COMMENT, '/* Comment. */'},
+    {lexer.PREPROCESSOR, '#include'},
+    {lexer.STRING, '<stdlib.h>'},
+    {lexer.PREPROCESSOR, '#include'},
+    {lexer.STRING, '"lua.h"'},
+    {lexer.TYPE, 'int'},
+    {lexer.IDENTIFIER, 'main'},
+    {lexer.OPERATOR, '('},
+    {lexer.TYPE, 'int'},
+    {lexer.IDENTIFIER, 'argc'},
+    {lexer.OPERATOR, ','},
+    {lexer.TYPE, 'char'},
+    {lexer.OPERATOR, '*'},
+    {lexer.OPERATOR, '*'},
+    {lexer.IDENTIFIER, 'argv'},
+    {lexer.OPERATOR, ')'},
+    {lexer.OPERATOR, '{'},
+    {lexer.KEYWORD, 'if'},
+    {lexer.OPERATOR, '('},
+    {lexer.CONSTANT, 'NULL'},
+    {lexer.OPERATOR, ')'},
+    {lexer.OPERATOR, ';'},
+    {lexer.KEYWORD, 'return'},
+    {lexer.NUMBER, '0'},
+    {lexer.OPERATOR, ';'},
+    {lexer.OPERATOR, '}'}
+  }
+  assert_lex(c, code, tokens)
+
+  -- Folding tests.
+  code = ([[
+    if (foo) {
+      bar;
+    }
+    /**
+     * foo
+     */
+    #ifdef foo
+      bar;
+    #endif
+  ]]):gsub('    ', '') -- strip indent
+  local folds = {1, 4, 7}
+  assert_fold_points(c, code, folds)
+end
+
+-- Tests the HTML lexer and its embedded languages.
+function test_html()
+  local html = lexer.load('html')
+  assert(html._NAME == 'html')
+  assert_default_styles(html)
+  local rules = {
+    'whitespace', 'comment', 'doctype', 'element', 'tag_close', 'attribute',
+    --[['equals',]] 'string', 'number', 'entity'
+  }
+  assert_rules(html, rules)
+  local styles = {
+    'doctype', 'element', 'unknown_element', 'attribute', 'unknown_attribute',
+    'entity', 'html_whitespace',
+    'value', 'color', 'unit', 'at_rule', 'css_whitespace', -- CSS
+    'javascript_whitespace', -- JS
+    'coffeescript_whitespace' -- CoffeeScript
+  }
+  assert_extra_styles(html, styles)
+  assert_children(html, {'css', 'javascript', 'coffeescript'})
+
+  -- Lexing tests.
+  local code = [[
+    <!DOCTYPE html>
+    <!-- Comment. -->
+    <html>
+      <head>
+        <style type="text/css">
+          /* Another comment. */
+          h1:hover {
+            color: red;
+            border: 1px solid #0000FF;
+          }
+        </style>
+        <script type="text/javascript">
+          /* A third comment. */
+          var a = 1 + 2.0e3 - 0x40;
+          var b = "two" + `three`;
+          var c = /pattern/i;
+        //</script>
+      </head>
+      <bod/>
+    </html>
+  ]]
+  local tokens = {
+    {'doctype', '<!DOCTYPE html>'},
+    {lexer.COMMENT, '<!-- Comment. -->'},
+    {'element', '<html'},
+    {'element', '>'},
+    {'element', '<head'},
+    {'element', '>'},
+    {'element', '<style'},
+    {'attribute', 'type'},
+    {lexer.OPERATOR, '='},
+    {lexer.STRING, '"text/css"'},
+    {'element', '>'},
+    {lexer.COMMENT, '/* Another comment. */'},
+    {lexer.IDENTIFIER, 'h1'},
+    {'pseudoclass', ':hover'},
+    {lexer.OPERATOR, '{'},
+    {'property', 'color'},
+    {lexer.OPERATOR, ':'},
+    {'value', 'red'},
+    {lexer.OPERATOR, ';'},
+    {'property', 'border'},
+    {lexer.OPERATOR, ':'},
+    {lexer.NUMBER, '1'},
+    {'unit', 'px'},
+    {'value', 'solid'},
+    {'color', '#0000FF'},
+    {lexer.OPERATOR, ';'},
+    {lexer.OPERATOR, '}'},
+    {'element', '</style'},
+    {'element', '>'},
+    {'element', '<script'},
+    {'attribute', 'type'},
+    {lexer.OPERATOR, '='},
+    {lexer.STRING, '"text/javascript"'},
+    {'element', '>'},
+    {lexer.COMMENT, '/* A third comment. */'},
+    {lexer.KEYWORD, 'var'},
+    {lexer.IDENTIFIER, 'a'},
+    {lexer.OPERATOR, '='},
+    {lexer.NUMBER, '1'},
+    {lexer.OPERATOR, '+'},
+    {lexer.NUMBER, '2.0e3'},
+    {lexer.OPERATOR, '-'},
+    {lexer.NUMBER, '0x40'},
+    {lexer.OPERATOR, ';'},
+    {lexer.KEYWORD, 'var'},
+    {lexer.IDENTIFIER, 'b'},
+    {lexer.OPERATOR, '='},
+    {lexer.STRING, '"two"'},
+    {lexer.OPERATOR, '+'},
+    {lexer.STRING, '`three`'},
+    {lexer.OPERATOR, ';'},
+    {lexer.KEYWORD, 'var'},
+    {lexer.IDENTIFIER, 'c'},
+    {lexer.OPERATOR, '='},
+    {lexer.REGEX, '/pattern/i'},
+    {lexer.OPERATOR, ';'},
+    {lexer.COMMENT, '//'},
+    {'element', '</script'},
+    {'element', '>'},
+    {'element', '</head'},
+    {'element', '>'},
+    {'unknown_element', '<bod'},
+    {'element', '/>'},
+    {'element', '</html'},
+    {'element', '>'}
+  }
+  assert_lex(html, code, tokens)
+
+  -- Folding tests.
+  local symbols = {'<', '/>', '<!--', '-->', '{', '}', '/*', '*/', '//'}
+  for i = 1, #symbols do assert(html._FOLDPOINTS._SYMBOLS[symbols[i]]) end
+  assert(html._FOLDPOINTS['element']['<'])
+  assert(html._FOLDPOINTS['element']['/>'])
+  assert(html._FOLDPOINTS['unknown_element']['<'])
+  assert(html._FOLDPOINTS['unknown_element']['/>'])
+  assert(html._FOLDPOINTS[lexer.COMMENT]['<!--'])
+  assert(html._FOLDPOINTS[lexer.COMMENT]['-->'])
+  assert(html._FOLDPOINTS[lexer.OPERATOR]['{'])
+  assert(html._FOLDPOINTS[lexer.OPERATOR]['}'])
+  assert(html._FOLDPOINTS[lexer.COMMENT]['/*'])
+  assert(html._FOLDPOINTS[lexer.COMMENT]['*/'])
+  assert(html._FOLDPOINTS[lexer.COMMENT]['//'])
+  code = [[
+    <html>
+      foo
+    </html>
+    <body/>
+    <style type="text/css">
+      h1 {
+        foo;
+      }
+    </style>
+    <script type="text/javascript">
+      function foo() {
+        bar;
+      }
+    </script>
+    h1 {
+      foo;
+    }
+    function foo() {
+      bar;
+    }
+  ]]
+  local folds = {1, 5, 6, 10, 11}
+  local levels = assert_fold_points(html, code, folds)
+  assert(levels[3] > levels[4]) -- </html> is ending fold point
+end
+
+-- Tests the PHP lexer.
+function test_php()
+  local php = lexer.load('php')
+  assert(php._NAME == 'php')
+  assert_default_styles(php)
+  assert_extra_styles(php, {'php_whitespace', 'php_tag'})
+
+  -- Lexing tests
+  -- Starting in HTML.
+  local code = [[<h1><?php echo "hi"; ?></h1>]]
+  local tokens = {
+    {'element', '<h1'},
+    {'element', '>'},
+    {'php_tag', '<?php '},
+    {lexer.KEYWORD, 'echo'},
+    {lexer.STRING, '"hi"'},
+    {lexer.OPERATOR, ';'},
+    {'php_tag', '?>'},
+    {'element', '</h1'},
+    {'element', '>'}
+  }
+  local initial_style = php._TOKENSTYLES['html_whitespace']
+  assert_lex(php, code, tokens, initial_style)
+  -- Starting in PHP.
+  code = [[echo "hi";]]
+  initial_style = php._TOKENSTYLES['php_whitespace']
+  tokens = {
+    {lexer.KEYWORD, 'echo'},
+    {lexer.STRING, '"hi"'},
+    {lexer.OPERATOR, ';'},  
+  }
+  assert_lex(php, code, tokens, initial_style)
+
+  -- Folding tests.
+  local symbols = {'<?', '?>', '/*', '*/', '//', '#', '{', '}', '(', ')'}
+  for i = 1, #symbols do assert(php._FOLDPOINTS._SYMBOLS[symbols[i]]) end
+  assert(php._FOLDPOINTS['php_tag']['<?'])
+  assert(php._FOLDPOINTS['php_tag']['?>'])
+  assert(php._FOLDPOINTS[lexer.COMMENT]['/*'])
+  assert(php._FOLDPOINTS[lexer.COMMENT]['*/'])
+  assert(php._FOLDPOINTS[lexer.COMMENT]['//'])
+  assert(php._FOLDPOINTS[lexer.COMMENT]['#'])
+  assert(php._FOLDPOINTS[lexer.OPERATOR]['{'])
+  assert(php._FOLDPOINTS[lexer.OPERATOR]['}'])
+  assert(php._FOLDPOINTS[lexer.OPERATOR]['('])
+  assert(php._FOLDPOINTS[lexer.OPERATOR][')'])
+end
+
+-- Tests the Ruby lexer.
+function test_ruby()
+  local ruby = lexer.load('ruby')
+
+  -- Lexing tests.
+  local code = [[
+    # Comment.
+    require "foo"
+    $a = 1 + 2.0e3 - 0x40 if true
+    b = "two" + %q[three]
+    puts :c
+  ]]
+  local tokens = {
+    {lexer.COMMENT, '# Comment.'},
+    {lexer.FUNCTION, 'require'},
+    {lexer.STRING, '"foo"'},
+    {lexer.VARIABLE, '$a'},
+    {lexer.OPERATOR, '='},
+    {lexer.NUMBER, '1'},
+    {lexer.OPERATOR, '+'},
+    {lexer.NUMBER, '2.0e3'},
+    {lexer.OPERATOR, '-'},
+    {lexer.NUMBER, '0x40'},
+    {lexer.KEYWORD, 'if'},
+    {lexer.KEYWORD, 'true'},
+    {lexer.IDENTIFIER, 'b'},
+    {lexer.OPERATOR, '='},
+    {lexer.STRING, '"two"'},
+    {lexer.OPERATOR, '+'},
+    {lexer.STRING, '%q[three]'},
+    {lexer.FUNCTION, 'puts'},
+    {'symbol', ':c'}
+  }
+  assert_lex(ruby, code, tokens)
+
+  -- Folding tests.
+  local fold_keywords = {
+    begin = 1, class = 1, def = 1, ['do'] = 1, ['for'] = 1, ['module'] = 1,
+    case = 1, ['if'] = function() end, ['while'] = function() end,
+    ['unless'] = function() end, ['until'] = function() end, ['end'] = -1
+  }
+  for k, v in pairs(fold_keywords) do
+    assert(ruby._FOLDPOINTS._SYMBOLS[k])
+    if type(v) == 'number' then
+      assert(ruby._FOLDPOINTS[lexer.KEYWORD][k] == v)
+    else
+      assert(type(ruby._FOLDPOINTS[lexer.KEYWORD][k]) == 'function')
+    end
+  end
+  local fold_operators = { '(', ')', '[', ']', '{', '}'}
+  for i = 1, #fold_operators do
+    assert(ruby._FOLDPOINTS._SYMBOLS[fold_operators[i]])
+    assert(ruby._FOLDPOINTS[lexer.OPERATOR][fold_operators[i]])
+  end
+  code = [=[
+    class Foo
+      bar
+    end
+    foo.each do |v|
+      bar
+    end
+    def foo(bar)
+      baz
+    end
+    =begin
+      foo
+    =end
+    (foo,
+     bar,
+     baz)
+    [foo,
+     bar,
+     baz]
+    {foo,
+     bar,
+     baz}
+  ]=]
+  local folds = {1, 4, 7, 10, 13, 16, 19}
+  assert_fold_points(ruby, code, folds)
+end
+
+-- Tests the Ruby and Rails lexers and tests lexer caching and lack of caching.
+-- The Rails lexer inherits from Ruby and modifies some of its rules. Verify
+-- the Ruby lexer is unaffected.
+function test_ruby_and_rails()
+  local ruby = lexer.load('ruby', nil, true)
+  local rails = lexer.load('rails', nil, true)
+  local code = [[
+    class Foo < ActiveRecord::Base
+      has_one :bar
+    end
+  ]]
+  local ruby_tokens = {
+    {lexer.KEYWORD, 'class'},
+    {lexer.IDENTIFIER, 'Foo'},
+    {lexer.OPERATOR, '<'},
+    {lexer.IDENTIFIER, 'ActiveRecord'},
+    {lexer.OPERATOR, ':'},
+    {lexer.OPERATOR, ':'},
+    {lexer.IDENTIFIER, 'Base'},
+    {lexer.IDENTIFIER, 'has_one'},
+    {'symbol', ':bar'},
+    {lexer.KEYWORD, 'end'}
+  }
+  assert_lex(ruby, code, ruby_tokens)
+
+  local rails_tokens = {
+    {lexer.KEYWORD, 'class'},
+    {lexer.IDENTIFIER, 'Foo'},
+    {lexer.OPERATOR, '<'},
+    {lexer.IDENTIFIER, 'ActiveRecord'},
+    {lexer.OPERATOR, ':'},
+    {lexer.OPERATOR, ':'},
+    {lexer.IDENTIFIER, 'Base'},
+    {lexer.FUNCTION, 'has_one'},
+    {'symbol', ':bar'},
+    {lexer.KEYWORD, 'end'}
+  }
+  assert_lex(rails, code, rails_tokens)
+
+  -- Load from the cache.
+  local ruby2 = lexer.load('ruby', nil, true)
+  assert_lex(ruby, code, ruby_tokens)
+  assert(ruby == ruby2)
+
+  -- Load without a cache and perform the same validations.
+  ruby = lexer.load('ruby')
+  assert_lex(ruby, code, ruby_tokens)
+  rails = lexer.load('rails')
+  assert_lex(rails, code, rails_tokens)
+  ruby2 = lexer.load('ruby')
+  assert_lex(ruby, code, ruby_tokens)
+  assert(ruby ~= ruby2)
+end
+
+-- Tests the RHTML lexer, which is a proxy for HTML and Rails.
+function test_rhtml()
+  local rhtml = lexer.load('rhtml')
+  
+  -- Lexing tests.
+  -- Start in HTML.
+  local code = [[<h1><% puts "hi" %></h1>]]
+  local rhtml_tokens = {
+    {'element', '<h1'},
+    {'element', '>'},
+    {'rhtml_tag', '<%'},
+    {lexer.FUNCTION, 'puts'},
+    {lexer.STRING, '"hi"'},
+    {'rhtml_tag', '%>'},
+    {'element', '</h1'},
+    {'element', '>'}
+  }
+  local initial_style = rhtml._TOKENSTYLES['html_whitespace']
+  assert_lex(rhtml, code, rhtml_tokens, initial_style)
+  -- Start in Ruby.
+  code = [[puts "hi"]]
+  rhtml_tokens = {
+    {lexer.FUNCTION, 'puts'},
+    {lexer.STRING, '"hi"'}
+  }
+  initial_style = rhtml._TOKENSTYLES['rails_whitespace']
+  assert_lex(rhtml, code, rhtml_tokens, initial_style)
+end
+
+-- Run tests.
+print('Starting test suite.')
+local tests = {}
+if #arg == 0 then
+  for k, v in pairs(_G) do
+    if k:find('^test_') and type(v) == 'function' then
+      tests[#tests + 1] = k
+    end
+  end
+else
+  for i = 1, #arg do
+    if type(_G[arg[i]]) == 'function' then tests[#tests + 1] = arg[i] end
+  end
+end
+table.sort(tests)
+local failed = 0
+for i = 1, #tests do
+  print(string.format('Running %s.', tests[i]))
+  local ok, errmsg = xpcall(_G[tests[i]], function(errmsg)
+    print(string.format('Failed! %s', debug.traceback(errmsg, 3)))
+    failed = failed + 1
+  end)
+end
+print(string.format('%d/%d tests passed', #tests - failed, #tests))