diff options
author | mitchell <unknown> | 2018-03-11 23:04:41 -0400 |
---|---|---|
committer | mitchell <unknown> | 2018-03-11 23:04:41 -0400 |
commit | 519b7328b66c4c84f03893a31e4be5ba6b1395f2 (patch) | |
tree | 2055cd79006357e94c185f341d0df17b9a8769eb /lexlua/perl.lua | |
parent | c0373e036e965a70045971e2abc582cb4bf12a4e (diff) | |
download | scintilla-mirror-519b7328b66c4c84f03893a31e4be5ba6b1395f2.tar.gz |
Added optional Lua lexer support.
This support is disabled by default and must be enabled via compile-time option.
Diffstat (limited to 'lexlua/perl.lua')
-rw-r--r-- | lexlua/perl.lua | 142 |
1 files changed, 142 insertions, 0 deletions
diff --git a/lexlua/perl.lua b/lexlua/perl.lua new file mode 100644 index 000000000..2dfc70152 --- /dev/null +++ b/lexlua/perl.lua @@ -0,0 +1,142 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Perl LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V + +local lex = lexer.new('perl') + +-- Whitespace. +lex:add_rule('perl', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + STDIN STDOUT STDERR BEGIN END CHECK INIT + require use + break continue do each else elsif foreach for if last local my next our + package return sub unless until while __FILE__ __LINE__ __PACKAGE__ + and or not eq ne lt gt le ge +]])) + +-- Markers. +lex:add_rule('marker', token(lexer.COMMENT, word_match[[__DATA__ __END__]] * + lexer.any^0)) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match[[ + abs accept alarm atan2 bind binmode bless caller chdir chmod chomp chop chown + chr chroot closedir close connect cos crypt dbmclose dbmopen defined delete + die dump each endgrent endhostent endnetent endprotoent endpwent endservent + eof eval exec exists exit exp fcntl fileno flock fork format formline getc + getgrent getgrgid getgrnam gethostbyaddr gethostbyname gethostent getlogin + getnetbyaddr getnetbyname getnetent getpeername getpgrp getppid getpriority + getprotobyname getprotobynumber getprotoent getpwent getpwnam getpwuid + getservbyname getservbyport getservent getsockname getsockopt glob gmtime goto + grep hex import index int ioctl join keys kill lcfirst lc length link listen + localtime log lstat map mkdir msgctl msgget msgrcv msgsnd new oct opendir open + ord pack pipe pop pos printf print prototype push quotemeta rand readdir read + readlink recv redo ref rename reset reverse rewinddir rindex rmdir scalar + seekdir seek select semctl semget semop send setgrent sethostent setnetent + setpgrp setpriority setprotoent setpwent setservent setsockopt shift shmctl + shmget shmread shmwrite shutdown sin sleep socket socketpair sort splice split + sprintf sqrt srand stat study substr symlink syscall sysread sysseek system + syswrite telldir tell tied tie time times truncate ucfirst uc umask undef + unlink unpack unshift untie utime values vec wait waitpid wantarray warn write +]])) + +local delimiter_matches = {['('] = ')', ['['] = ']', ['{'] = '}', ['<'] = '>'} +local literal_delimitted = P(function(input, index) -- for single delimiter sets + local delimiter = input:sub(index, index) + if not delimiter:find('%w') then -- only non alpha-numerics + local match_pos, patt + if delimiter_matches[delimiter] then + -- Handle nested delimiter/matches in strings. + local s, e = delimiter, delimiter_matches[delimiter] + patt = lexer.delimited_range(s..e, false, false, true) + else + patt = lexer.delimited_range(delimiter) + end + match_pos = lpeg.match(patt, input, index) + return match_pos or #input + 1 + end +end) +local literal_delimitted2 = P(function(input, index) -- for 2 delimiter sets + local delimiter = input:sub(index, index) + -- Only consider non-alpha-numerics and non-spaces as delimiters. The + -- non-spaces are used to ignore operators like "-s". + if not delimiter:find('[%w ]') then + local match_pos, patt + if delimiter_matches[delimiter] then + -- Handle nested delimiter/matches in strings. + local s, e = delimiter, delimiter_matches[delimiter] + patt = lexer.delimited_range(s..e, false, false, true) + else + patt = lexer.delimited_range(delimiter) + end + first_match_pos = lpeg.match(patt, input, index) + final_match_pos = lpeg.match(patt, input, first_match_pos - 1) + if not final_match_pos then -- using (), [], {}, or <> notation + final_match_pos = lpeg.match(lexer.space^0 * patt, input, first_match_pos) + end + return final_match_pos or #input + 1 + end +end) + +-- Strings. +local sq_str = lexer.delimited_range("'") +local dq_str = lexer.delimited_range('"') +local cmd_str = lexer.delimited_range('`') +local heredoc = '<<' * P(function(input, index) + local s, e, delimiter = input:find('([%a_][%w_]*)[\n\r\f;]+', index) + if s == index and delimiter then + local end_heredoc = '[\n\r\f]+' + local _, e = input:find(end_heredoc..delimiter, e) + return e and e + 1 or #input + 1 + end +end) +local lit_str = 'q' * P('q')^-1 * literal_delimitted +local lit_array = 'qw' * literal_delimitted +local lit_cmd = 'qx' * literal_delimitted +local lit_tr = (P('tr') + 'y') * literal_delimitted2 * S('cds')^0 +local regex_str = #P('/') * lexer.last_char_includes('-<>+*!~\\=%&|^?:;([{') * + lexer.delimited_range('/', true) * S('imosx')^0 +local lit_regex = 'qr' * literal_delimitted * S('imosx')^0 +local lit_match = 'm' * literal_delimitted * S('cgimosx')^0 +local lit_sub = 's' * literal_delimitted2 * S('ecgimosx')^0 +lex:add_rule('string', + token(lexer.STRING, sq_str + dq_str + cmd_str + heredoc + lit_str + + lit_array + lit_cmd + lit_tr) + + token(lexer.REGEX, regex_str + lit_regex + lit_match + lit_sub)) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Comments. +local line_comment = '#' * lexer.nonnewline_esc^0 +local block_comment = lexer.starts_line('=') * lexer.alpha * + (lexer.any - lexer.newline * '=cut')^0 * + (lexer.newline * '=cut')^-1 +lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Variables. +local special_var = '$' * ('^' * S('ADEFHILMOPSTWX')^-1 + + S('\\"[]\'&`+*.,;=%~?@<>(|/!-') + + ':' * (lexer.any - ':') + + P('$') * -lexer.word + + lexer.digit^1) +local plain_var = ('$#' + S('$@%')) * P('$')^0 * lexer.word + '$#' +lex:add_rule('variable', token(lexer.VARIABLE, special_var + plain_var)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('-<>+*!~\\=/%&|^.?:;()[]{}'))) + +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '[', ']') +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '#', lexer.fold_line_comments('#')) + +return lex |