diff options
author | mitchell <unknown> | 2018-03-11 23:04:41 -0400 |
---|---|---|
committer | mitchell <unknown> | 2018-03-11 23:04:41 -0400 |
commit | 519b7328b66c4c84f03893a31e4be5ba6b1395f2 (patch) | |
tree | 2055cd79006357e94c185f341d0df17b9a8769eb /lexlua | |
parent | c0373e036e965a70045971e2abc582cb4bf12a4e (diff) | |
download | scintilla-mirror-519b7328b66c4c84f03893a31e4be5ba6b1395f2.tar.gz |
Added optional Lua lexer support.
This support is disabled by default and must be enabled via compile-time option.
Diffstat (limited to 'lexlua')
134 files changed, 13607 insertions, 0 deletions
diff --git a/lexlua/actionscript.lua b/lexlua/actionscript.lua new file mode 100644 index 000000000..80d46a53c --- /dev/null +++ b/lexlua/actionscript.lua @@ -0,0 +1,59 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Actionscript LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('actionscript') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + break continue delete do else for function if in new on return this typeof var + void while with NaN Infinity false null true undefined + -- Reserved for future use. + abstract case catch class const debugger default export extends final finally + goto implements import instanceof interface native package private Void + protected public dynamic static super switch synchonized throw throws + transient try volatile +]])) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match[[ + Array Boolean Color Date Function Key MovieClip Math Mouse Number Object + Selection Sound String XML XMLNode XMLSocket + -- Reserved for future use. + boolean byte char double enum float int long short +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +local sq_str = lexer.delimited_range("'", true) +local dq_str = lexer.delimited_range('"', true) +local ml_str = '<![CDATA[' * (lexer.any - ']]>')^0 * ']]>' +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + ml_str)) + +-- Comments. +local line_comment = '//' * lexer.nonnewline^0 +local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) * + S('LlUuFf')^-2)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/*%&|^~.,;?()[]{}'))) + +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, '//', lexer.fold_line_comments('//')) +lex:add_fold_point(lexer.STRING, '<![CDATA[', ']]>') + +return lex diff --git a/lexlua/ada.lua b/lexlua/ada.lua new file mode 100644 index 000000000..f1db9f1fb --- /dev/null +++ b/lexlua/ada.lua @@ -0,0 +1,57 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Ada LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('ada') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + abort abs accept all and begin body case declare delay do else elsif end entry + exception exit for generic goto if in is loop mod new not null or others out + protected raise record rem renames requeue reverse select separate subtype + task terminate then type until when while xor + -- Preprocessor. + package pragma use with + -- Function. + function procedure return + -- Storage class. + abstract access aliased array at constant delta digits interface limited of + private range tagged synchronized + -- Boolean. + true false +]])) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match[[ + boolean character count duration float integer long_float long_integer + priority short_float short_integer string +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, + lexer.delimited_range('"', true, true))) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '--' * lexer.nonnewline^0)) + +-- Numbers. +local hex_num = 'O' * S('xX') * (lexer.xdigit + '_')^1 +local integer = lexer.digit^1 * ('_' * lexer.digit^1)^0 +local float = integer^1 * ('.' * integer^0)^-1 * S('eE') * S('+-')^-1 * integer +lex:add_rule('number', token(lexer.NUMBER, hex_num + + S('+-')^-1 * (float + integer) * + S('LlUuFf')^-3)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S(':;=<>&+-*/.()'))) + +return lex diff --git a/lexlua/ansi_c.lua b/lexlua/ansi_c.lua new file mode 100644 index 000000000..4f961e67b --- /dev/null +++ b/lexlua/ansi_c.lua @@ -0,0 +1,90 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- C LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('ansi_c') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + auto break case const continue default do else extern for goto if inline + register restrict return sizeof static switch typedef volatile while + -- C11. + _Alignas _Alignof _Atomic _Generic _Noreturn _Static_assert _Thread_local +]])) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match[[ + char double enum float int long short signed struct union unsigned void + _Bool _Complex _Imaginary + -- Stdlib types. + ptrdiff_t size_t max_align_t wchar_t intptr_t uintptr_t intmax_t uintmax_t +]] + P('u')^-1 * 'int' * (P('_least') + '_fast')^-1 * R('09')^1 * '_t')) + +-- Constants. +lex:add_rule('constants', token(lexer.CONSTANT, word_match[[ + NULL + -- Preprocessor. + __DATE__ __FILE__ __LINE__ __TIME__ __func__ + -- errno.h. + E2BIG EACCES EADDRINUSE EADDRNOTAVAIL EAFNOSUPPORT EAGAIN EALREADY EBADF + EBADMSG EBUSY ECANCELED ECHILD ECONNABORTED ECONNREFUSED ECONNRESET EDEADLK + EDESTADDRREQ EDOM EDQUOT EEXIST EFAULT EFBIG EHOSTUNREACH EIDRM EILSEQ + EINPROGRESS EINTR EINVAL EIO EISCONN EISDIR ELOOP EMFILE EMLINK EMSGSIZE + EMULTIHOP ENAMETOOLONG ENETDOWN ENETRESET ENETUNREACH ENFILE ENOBUFS ENODATA + ENODEV ENOENT ENOEXEC ENOLCK ENOLINK ENOMEM ENOMSG ENOPROTOOPT ENOSPC ENOSR + ENOSTR ENOSYS ENOTCONN ENOTDIR ENOTEMPTY ENOTRECOVERABLE ENOTSOCK ENOTSUP + ENOTTY ENXIO EOPNOTSUPP EOVERFLOW EOWNERDEAD EPERM EPIPE EPROTO + EPROTONOSUPPORT EPROTOTYPE ERANGE EROFS ESPIPE ESRCH ESTALE ETIME ETIMEDOUT + ETXTBSY EWOULDBLOCK EXDEV +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +local sq_str = P('L')^-1 * lexer.delimited_range("'", true) +local dq_str = P('L')^-1 * lexer.delimited_range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) + +-- Comments. +local line_comment = '//' * lexer.nonnewline_esc^0 +local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 + + lexer.starts_line('#if') * S(' \t')^0 * '0' * + lexer.space * + (lexer.any - lexer.starts_line('#endif'))^0 * + (lexer.starts_line('#endif'))^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Preprocessor. +local preproc_word = word_match[[ + define elif else endif if ifdef ifndef line pragma undef +]] +lex:add_rule('preprocessor', + #lexer.starts_line('#') * + (token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * preproc_word) + + token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * 'include') * + (token(lexer.WHITESPACE, S('\t ')^1) * + token(lexer.STRING, + lexer.delimited_range('<>', true, true)))^-1)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>~!=^&|?~:;,.()[]{}'))) + +-- Fold points. +lex:add_fold_point(lexer.PREPROCESSOR, '#if', '#endif') +lex:add_fold_point(lexer.PREPROCESSOR, '#ifdef', '#endif') +lex:add_fold_point(lexer.PREPROCESSOR, '#ifndef', '#endif') +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, '//', lexer.fold_line_comments('//')) + +return lex diff --git a/lexlua/antlr.lua b/lexlua/antlr.lua new file mode 100644 index 000000000..7d1fa3b50 --- /dev/null +++ b/lexlua/antlr.lua @@ -0,0 +1,57 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- ANTLR LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('antlr') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + abstract break case catch continue default do else extends final finally for + if implements instanceof native new private protected public return static + switch synchronized throw throws transient try volatile + while package import header options tokens strictfp + false null super this true +]])) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match[[ + boolean byte char class double float int interface long short void +]])) + +-- Functions. +lex:add_rule('func', token(lexer.FUNCTION, 'assert')) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Comments. +local line_comment = '//' * lexer.nonnewline^0 +local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Actions. +lex:add_rule('action', token(lexer.OPERATOR, P('{')) * + token('action', (1 - P('}'))^0) * + token(lexer.OPERATOR, P('}'))^-1) +lex:add_style('action', lexer.STYLE_NOTHING) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true))) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('$@:;|.=+*?~!^>-()[]{}'))) + +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, ':', ';') +lex:add_fold_point(lexer.OPERATOR, '(', ')') +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, '//', lexer.fold_line_comments('//')) + +return lex diff --git a/lexlua/apdl.lua b/lexlua/apdl.lua new file mode 100644 index 000000000..5a1b2e164 --- /dev/null +++ b/lexlua/apdl.lua @@ -0,0 +1,74 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- APDL LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('apdl', {case_insensitive_fold_points = true}) + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + *abbr *abb *afun *afu *ask *cfclos *cfc *cfopen *cfo *cfwrite *cfw *create + *cre *cycle *cyc *del *dim *do *elseif *else *enddo *endif *end *eval *eva + *exit *exi *get *go *if *list *lis *mfouri *mfo *mfun *mfu *mooney *moo *moper + *mop *msg *repeat *rep *set *status *sta *tread *tre *ulib *uli *use *vabs + *vab *vcol *vco *vcum *vcu *vedit *ved *vfact *vfa *vfill *vfi *vfun *vfu + *vget *vge *vitrp *vit *vlen *vle *vmask *vma *voper *vop *vplot *vpl *vput + *vpu *vread *vre *vscfun *vsc *vstat *vst *vwrite *vwr + /anfile /anf /angle /ang /annot /ann /anum /anu /assign /ass /auto /aut /aux15 + /aux2 /aux /axlab /axl /batch /bat /clabel /cla /clear /cle /clog /clo /cmap + /cma /color /col /com /config /contour /con /copy /cop /cplane /cpl /ctype + /cty /cval /cva /delete /del /devdisp /device /dev /dist /dis /dscale /dsc + /dv3d /dv3 /edge /edg /efacet /efa /eof /erase /era /eshape /esh /exit /exi + /expand /exp /facet /fac /fdele /fde /filname /fil /focus /foc /format /for + /ftype /fty /gcmd /gcm /gcolumn /gco /gfile /gfi /gformat /gfo /gline /gli + /gmarker /gma /golist /gol /gopr /gop /go /graphics /gra /gresume /gre /grid + /gri /gropt /gro /grtyp /grt /gsave /gsa /gst /gthk /gth /gtype /gty /header + /hea /input /inp /larc /lar /light /lig /line /lin /lspec /lsp /lsymbol /lsy + /menu /men /mplib /mpl /mrep /mre /mstart /mst /nerr /ner /noerase /noe + /nolist /nol /nopr /nop /normal /nor /number /num /opt /output /out /page /pag + /pbc /pbf /pcircle /pci /pcopy /pco /plopts /plo /pmacro /pma /pmeth /pme + /pmore /pmo /pnum /pnu /polygon /pol /post26 /post1 /pos /prep7 /pre /psearch + /pse /psf /pspec /psp /pstatus /pst /psymb /psy /pwedge /pwe /quit /qui /ratio + /rat /rename /ren /replot /rep /reset /res /rgb /runst /run /seclib /sec /seg + /shade /sha /showdisp /show /sho /shrink /shr /solu /sol /sscale /ssc /status + /sta /stitle /sti /syp /sys /title /tit /tlabel /tla /triad /tri /trlcy /trl + /tspec /tsp /type /typ /ucmd /ucm /uis /ui /units /uni /user /use /vcone /vco + /view /vie /vscale /vsc /vup /wait /wai /window /win /xrange /xra /yrange /yra + /zoom /zoo +]], true)) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, + lexer.delimited_range("'", true, true))) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, + lexer.delimited_range('%', true, true))) + +-- Labels. +lex:add_rule('label', token(lexer.LABEL, lexer.starts_line(':') * lexer.word)) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '!' * lexer.nonnewline^0)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('+-*/$=,;()'))) + +-- Fold points. +lex:add_fold_point(lexer.KEYWORD, '*if', '*endif') +lex:add_fold_point(lexer.KEYWORD, '*do', '*enddo') +lex:add_fold_point(lexer.KEYWORD, '*dowhile', '*enddo') +lex:add_fold_point(lexer.COMMENT, '!', lexer.fold_line_comments('!')) + +return lex diff --git a/lexlua/apl.lua b/lexlua/apl.lua new file mode 100644 index 000000000..5275ebffa --- /dev/null +++ b/lexlua/apl.lua @@ -0,0 +1,57 @@ +-- Copyright 2015-2018 David B. Lamkins <david@lamkins.net>. See License.txt. +-- APL LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('apl') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, (P('⍝') + '#') * + lexer.nonnewline^0)) + +-- Strings. +local sq_str = lexer.delimited_range("'", false, true) +local dq_str = lexer.delimited_range('"') +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) + +-- Numbers. +local dig = R('09') +local rad = P('.') +local exp = S('eE') +local img = S('jJ') +local sgn = P('¯')^-1 +local float = sgn * (dig^0 * rad * dig^1 + dig^1 * rad * dig^0 + dig^1) * + (exp * sgn *dig^1)^-1 +lex:add_rule('number', token(lexer.NUMBER, float * img * float + float)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, P('⍞') + 'χ' + '⍺' + '⍶' + '⍵' + + '⍹' + '⎕' * R('AZ', 'az')^0)) + +-- Names. +local n1l = R('AZ', 'az') +local n1b = P('_') + '∆' + '⍙' +local n2l = n1l + R('09') +local n2b = n1b + '¯' +local n1 = n1l + n1b +local n2 = n2l + n2b +local name = n1 * n2^0 + +-- Labels. +lex:add_rule('label', token(lexer.LABEL, name * ':')) + +-- Variables. +lex:add_rule('variable', token(lexer.VARIABLE, name)) + +-- Special. +lex:add_rule('special', token(lexer.TYPE, S('{}[]();') + '←' + '→' + '◊')) + +-- Nabla. +lex:add_rule('nabla', token(lexer.PREPROCESSOR, P('∇') + '⍫')) + +return lex diff --git a/lexlua/applescript.lua b/lexlua/applescript.lua new file mode 100644 index 000000000..60a67383b --- /dev/null +++ b/lexlua/applescript.lua @@ -0,0 +1,69 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Applescript LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('applescript') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + script property prop end copy to set global local on to of in given with + without return continue tell if then else repeat times while until from exit + try error considering ignoring timeout transaction my get put into is + -- References. + each some every whose where id index first second third fourth fifth sixth + seventh eighth ninth tenth last front back st nd rd th middle named through + thru before after beginning the + -- Commands. + close copy count delete duplicate exists launch make move open print quit + reopen run save saving + -- Operators. + div mod and not or as contains equal equals isn't +]], true)) + +-- Constants. +lex:add_rule('constant', token(lexer.CONSTANT, word_match[[ + case diacriticals expansion hyphens punctuation + -- Predefined variables. + it me version pi result space tab anything + -- Text styles. + bold condensed expanded hidden italic outline plain shadow strikethrough + subscript superscript underline + -- Save options. + ask no yes + -- Booleans. + false true + -- Date and time. + weekday monday mon tuesday tue wednesday wed thursday thu friday fri saturday + sat sunday sun month january jan february feb march mar april apr may june jun + july jul august aug september sep october oct november nov december dec + minutes hours days weeks +]], true)) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alpha + '_') * + lexer.alnum^0)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"', true))) + +-- Comments. +local line_comment = '--' * lexer.nonnewline^0 +local block_comment = '(*' * (lexer.any - '*)')^0 * P('*)')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('+-^*/&<>=:,(){}'))) + +-- Fold points. +lex:add_fold_point(lexer.COMMENT, '(*', '*)') + +return lex diff --git a/lexlua/asm.lua b/lexlua/asm.lua new file mode 100644 index 000000000..b2e137146 --- /dev/null +++ b/lexlua/asm.lua @@ -0,0 +1,363 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- NASM Assembly LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('asm') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + -- Preprocessor macros. + struc endstruc istruc at iend align alignb sectalign .nolist + -- Preprocessor Packages. + --altreg smartalign fp ifunc + -- Directives. + absolute bits class common common cpu default export extern float global group + import osabi overlay private public __SECT__ section segment stack use16 use32 + use64 + -- Section Names. + .bss .comment .data .lbss .ldata .lrodata .rdata .rodata .tbss .tdata .text + -- Section Qualifiers. + alloc bss code exec data noalloc nobits noexec nowrite progbits rdata tls + write + -- Operators. + abs rel seg wrt strict + __utf16__ __utf16be__ __utf16le__ __utf32__ __utf32be__ __utf32le__ +]])) + +-- Instructions. +-- awk '{print $1}'|uniq|tr '[:upper:]' '[:lower:]'| +-- lua -e "for l in io.lines() do print(\"'\"..l..\"',\") end"|fmt -w 78 +lex:add_rule('instruction', token('instruction', word_match[[ + -- Special Instructions. + db dd do dq dt dw dy resb resd reso resq rest resw resy + -- Conventional Instructions. + aaa aad aam aas adc add and arpl bb0_reset bb1_reset bound bsf bsr bswap bt + btc btr bts call cbw cdq cdqe clc cld cli clts cmc cmp cmpsb cmpsd cmpsq cmpsw + cmpxchg cmpxchg486 cmpxchg8b cmpxchg16b cpuid cpu_read cpu_write cqo cwd cwde + daa das dec div dmint emms enter equ f2xm1 fabs fadd faddp fbld fbstp fchs + fclex fcmovb fcmovbe fcmove fcmovnb fcmovnbe fcmovne fcmovnu fcmovu fcom fcomi + fcomip fcomp fcompp fcos fdecstp fdisi fdiv fdivp fdivr fdivrp femms feni + ffree ffreep fiadd ficom ficomp fidiv fidivr fild fimul fincstp finit fist + fistp fisttp fisub fisubr fld fld1 fldcw fldenv fldl2e fldl2t fldlg2 fldln2 + fldpi fldz fmul fmulp fnclex fndisi fneni fninit fnop fnsave fnstcw fnstenv + fnstsw fpatan fprem fprem1 fptan frndint frstor fsave fscale fsetpm fsin + fsincos fsqrt fst fstcw fstenv fstp fstsw fsub fsubp fsubr fsubrp ftst fucom + fucomi fucomip fucomp fucompp fxam fxch fxtract fyl2x fyl2xp1 hlt ibts icebp + idiv imul in inc incbin insb insd insw int int01 int1 int03 int3 into invd + invpcid invlpg invlpga iret iretd iretq iretw jcxz jecxz jrcxz jmp jmpe lahf + lar lds lea leave les lfence lfs lgdt lgs lidt lldt lmsw loadall loadall286 + lodsb lodsd lodsq lodsw loop loope loopne loopnz loopz lsl lss ltr mfence + monitor mov movd movq movsb movsd movsq movsw movsx movsxd movsx movzx mul + mwait neg nop not or out outsb outsd outsw packssdw packsswb packuswb paddb + paddd paddsb paddsiw paddsw paddusb paddusw paddw pand pandn pause paveb + pavgusb pcmpeqb pcmpeqd pcmpeqw pcmpgtb pcmpgtd pcmpgtw pdistib pf2id pfacc + pfadd pfcmpeq pfcmpge pfcmpgt pfmax pfmin pfmul pfrcp pfrcpit1 pfrcpit2 + pfrsqit1 pfrsqrt pfsub pfsubr pi2fd pmachriw pmaddwd pmagw pmulhriw pmulhrwa + pmulhrwc pmulhw pmullw pmvgezb pmvlzb pmvnzb pmvzb pop popa popad popaw popf + popfd popfq popfw por prefetch prefetchw pslld psllq psllw psrad psraw psrld + psrlq psrlw psubb psubd psubsb psubsiw psubsw psubusb psubusw psubw punpckhbw + punpckhdq punpckhwd punpcklbw punpckldq punpcklwd push pusha pushad pushaw + pushf pushfd pushfq pushfw pxor rcl rcr rdshr rdmsr rdpmc rdtsc rdtscp ret + retf retn rol ror rdm rsdc rsldt rsm rsts sahf sal salc sar sbb scasb scasd + scasq scasw sfence sgdt shl shld shr shrd sidt sldt skinit smi smint smintold + smsw stc std sti stosb stosd stosq stosw str sub svdc svldt svts swapgs + syscall sysenter sysexit sysret test ud0 ud1 ud2b ud2 ud2a umov verr verw + fwait wbinvd wrshr wrmsr xadd xbts xchg xlatb xlat xor cmovcc jcc setcc + -- Katmai Streaming SIMD instructions (SSE -- a.k.a. KNI XMM MMX2). + addps addss andnps andps cmpeqps cmpeqss cmpleps cmpless cmpltps cmpltss + cmpneqps cmpneqss cmpnleps cmpnless cmpnltps cmpnltss cmpordps cmpordss + cmpunordps cmpunordss cmpps cmpss comiss cvtpi2ps cvtps2pi cvtsi2ss cvtss2si + cvttps2pi cvttss2si divps divss ldmxcsr maxps maxss minps minss movaps movhps + movlhps movlps movhlps movmskps movntps movss movups mulps mulss orps rcpps + rcpss rsqrtps rsqrtss shufps sqrtps sqrtss stmxcsr subps subss ucomiss + unpckhps unpcklps xorps + -- Introduced in Deschutes but necessary for SSE support. + fxrstor fxrstor64 fxsave fxsave64 + -- XSAVE group (AVX and extended state). + xgetbv xsetbv xsave xsave64 xsaveopt xsaveopt64 xrstor xrstor64 + -- Generic memory operations. + prefetchnta prefetcht0 prefetcht1 prefetcht2 sfence + -- New MMX instructions introduced in Katmai. + maskmovq movntq pavgb pavgw pextrw pinsrw pmaxsw pmaxub pminsw pminub pmovmskb + pmulhuw psadbw pshufw + -- AMD Enhanced 3DNow! (Athlon) instructions. + pf2iw pfnacc pfpnacc pi2fw pswapd + -- Willamette SSE2 Cacheability Instructions. + maskmovdqu clflush movntdq movnti movntpd lfence mfence + -- Willamette MMX instructions (SSE2 SIMD Integer Instructions). + movd movdqa movdqu movdq2q movq movq2dq packsswb packssdw packuswb paddb paddw + paddd paddq paddsb paddsw paddusb paddusw pand pandn pavgb pavgw pcmpeqb + pcmpeqw pcmpeqd pcmpgtb pcmpgtw pcmpgtd pextrw pinsrw pmaddwd pmaxsw pmaxub + pminsw pminub pmovmskb pmulhuw pmulhw pmullw pmuludq por psadbw pshufd pshufhw + pshuflw pslldq psllw pslld psllq psraw psrad psrldq psrlw psrld psrlq psubb + psubw psubd psubq psubsb psubsw psubusb psubusw punpckhbw punpckhwd punpckhdq + punpckhqdq punpcklbw punpcklwd punpckldq punpcklqdq pxor + -- Willamette Streaming SIMD instructions (SSE2). + addpd addsd andnpd andpd cmpeqpd cmpeqsd cmplepd cmplesd cmpltpd cmpltsd + cmpneqpd cmpneqsd cmpnlepd cmpnlesd cmpnltpd cmpnltsd cmpordpd cmpordsd + cmpunordpd cmpunordsd cmppd cmpsd comisd cvtdq2pd cvtdq2ps cvtpd2dq cvtpd2pi + cvtpd2ps cvtpi2pd cvtps2dq cvtps2pd cvtsd2si cvtsd2ss cvtsi2sd cvtss2sd + cvttpd2pi cvttpd2dq cvttps2dq cvttsd2si divpd divsd maxpd maxsd minpd minsd + movapd movhpd movlpd movmskpd movsd movupd mulpd mulsd orpd shufpd sqrtpd + sqrtsd subpd subsd ucomisd unpckhpd unpcklpd xorpd + -- Prescott New Instructions (SSE3). + addsubpd addsubps haddpd haddps hsubpd hsubps lddqu movddup movshdup movsldup + -- VMX/SVM Instructions. + clgi stgi vmcall vmclear vmfunc vmlaunch vmload vmmcall vmptrld vmptrst vmread + vmresume vmrun vmsave vmwrite vmxoff vmxon + -- Extended Page Tables VMX instructions. + invept invvpid + -- Tejas New Instructions (SSSE3). + pabsb pabsw pabsd palignr phaddw phaddd phaddsw phsubw phsubd phsubsw + pmaddubsw pmulhrsw pshufb psignb psignw psignd + -- AMD SSE4A. + extrq insertq movntsd movntss + -- New instructions in Barcelona. + lzcnt + -- Penryn New Instructions (SSE4.1). + blendpd blendps blendvpd blendvps dppd dpps extractps insertps movntdqa + mpsadbw packusdw pblendvb pblendw pcmpeqq pextrb pextrd pextrq pextrw + phminposuw pinsrb pinsrd pinsrq pmaxsb pmaxsd pmaxud pmaxuw pminsb pminsd + pminud pminuw pmovsxbw pmovsxbd pmovsxbq pmovsxwd pmovsxwq pmovsxdq pmovzxbw + pmovzxbd pmovzxbq pmovzxwd pmovzxwq pmovzxdq pmuldq pmulld ptest roundpd + roundps roundsd roundss + -- Nehalem New Instructions (SSE4.2). + crc32 pcmpestri pcmpestrm pcmpistri pcmpistrm pcmpgtq popcnt + -- Intel SMX. + getsec + -- Geode (Cyrix) 3DNow! additions. + pfrcpv pfrsqrtv + -- Intel new instructions in ???. + movbe + -- Intel AES instructions. + aesenc aesenclast aesdec aesdeclast aesimc aeskeygenassist + -- Intel AVX AES instructions. + vaesenc vaesenclast vaesdec vaesdeclast vaesimc vaeskeygenassist + -- Intel AVX instructions. + vaddpd vaddps vaddsd vaddss vaddsubpd vaddsubps vandpd vandps vandnpd vandnps + vblendpd vblendps vblendvpd vblendvps vbroadcastss vbroadcastsd vbroadcastf128 + vcmpeq_ospd vcmpeqpd vcmplt_ospd vcmpltpd vcmple_ospd vcmplepd vcmpunord_qpd + vcmpunordpd vcmpneq_uqpd vcmpneqpd vcmpnlt_uspd vcmpnltpd vcmpnle_uspd + vcmpnlepd vcmpord_qpd vcmpordpd vcmpeq_uqpd vcmpnge_uspd vcmpngepd + vcmpngt_uspd vcmpngtpd vcmpfalse_oqpd vcmpfalsepd vcmpneq_oqpd vcmpge_ospd + vcmpgepd vcmpgt_ospd vcmpgtpd vcmptrue_uqpd vcmptruepd vcmpeq_ospd vcmplt_oqpd + vcmple_oqpd vcmpunord_spd vcmpneq_uspd vcmpnlt_uqpd vcmpnle_uqpd vcmpord_spd + vcmpeq_uspd vcmpnge_uqpd vcmpngt_uqpd vcmpfalse_ospd vcmpneq_ospd vcmpge_oqpd + vcmpgt_oqpd vcmptrue_uspd vcmppd vcmpeq_osps vcmpeqps vcmplt_osps vcmpltps + vcmple_osps vcmpleps vcmpunord_qps vcmpunordps vcmpneq_uqps vcmpneqps + vcmpnlt_usps vcmpnltps vcmpnle_usps vcmpnleps vcmpord_qps vcmpordps + vcmpeq_uqps vcmpnge_usps vcmpngeps vcmpngt_usps vcmpngtps vcmpfalse_oqps + vcmpfalseps vcmpneq_oqps vcmpge_osps vcmpgeps vcmpgt_osps vcmpgtps + vcmptrue_uqps vcmptrueps vcmpeq_osps vcmplt_oqps vcmple_oqps vcmpunord_sps + vcmpneq_usps vcmpnlt_uqps vcmpnle_uqps vcmpord_sps vcmpeq_usps vcmpnge_uqps + vcmpngt_uqps vcmpfalse_osps vcmpneq_osps vcmpge_oqps vcmpgt_oqps vcmptrue_usps + vcmpps vcmpeq_ossd vcmpeqsd vcmplt_ossd vcmpltsd vcmple_ossd vcmplesd + vcmpunord_qsd vcmpunordsd vcmpneq_uqsd vcmpneqsd vcmpnlt_ussd vcmpnltsd + vcmpnle_ussd vcmpnlesd vcmpord_qsd vcmpordsd vcmpeq_uqsd vcmpnge_ussd + vcmpngesd vcmpngt_ussd vcmpngtsd vcmpfalse_oqsd vcmpfalsesd vcmpneq_oqsd + vcmpge_ossd vcmpgesd vcmpgt_ossd vcmpgtsd vcmptrue_uqsd vcmptruesd vcmpeq_ossd + vcmplt_oqsd vcmple_oqsd vcmpunord_ssd vcmpneq_ussd vcmpnlt_uqsd vcmpnle_uqsd + vcmpord_ssd vcmpeq_ussd vcmpnge_uqsd vcmpngt_uqsd vcmpfalse_ossd vcmpneq_ossd + vcmpge_oqsd vcmpgt_oqsd vcmptrue_ussd vcmpsd vcmpeq_osss vcmpeqss vcmplt_osss + vcmpltss vcmple_osss vcmpless vcmpunord_qss vcmpunordss vcmpneq_uqss vcmpneqss + vcmpnlt_usss vcmpnltss vcmpnle_usss vcmpnless vcmpord_qss vcmpordss + vcmpeq_uqss vcmpnge_usss vcmpngess vcmpngt_usss vcmpngtss vcmpfalse_oqss + vcmpfalsess vcmpneq_oqss vcmpge_osss vcmpgess vcmpgt_osss vcmpgtss + vcmptrue_uqss vcmptruess vcmpeq_osss vcmplt_oqss vcmple_oqss vcmpunord_sss + vcmpneq_usss vcmpnlt_uqss vcmpnle_uqss vcmpord_sss vcmpeq_usss vcmpnge_uqss + vcmpngt_uqss vcmpfalse_osss vcmpneq_osss vcmpge_oqss vcmpgt_oqss vcmptrue_usss + vcmpss vcomisd vcomiss vcvtdq2pd vcvtdq2ps vcvtpd2dq vcvtpd2ps vcvtps2dq + vcvtps2pd vcvtsd2si vcvtsd2ss vcvtsi2sd vcvtsi2ss vcvtss2sd vcvtss2si + vcvttpd2dq vcvttps2dq vcvttsd2si vcvttss2si vdivpd vdivps vdivsd vdivss vdppd + vdpps vextractf128 vextractps vhaddpd vhaddps vhsubpd vhsubps vinsertf128 + vinsertps vlddqu vldqqu vlddqu vldmxcsr vmaskmovdqu vmaskmovps vmaskmovpd + vmaxpd vmaxps vmaxsd vmaxss vminpd vminps vminsd vminss vmovapd vmovaps vmovd + vmovq vmovddup vmovdqa vmovqqa vmovdqa vmovdqu vmovqqu vmovdqu vmovhlps + vmovhpd vmovhps vmovlhps vmovlpd vmovlps vmovmskpd vmovmskps vmovntdq vmovntqq + vmovntdq vmovntdqa vmovntpd vmovntps vmovsd vmovshdup vmovsldup vmovss vmovupd + vmovups vmpsadbw vmulpd vmulps vmulsd vmulss vorpd vorps vpabsb vpabsw vpabsd + vpacksswb vpackssdw vpackuswb vpackusdw vpaddb vpaddw vpaddd vpaddq vpaddsb + vpaddsw vpaddusb vpaddusw vpalignr vpand vpandn vpavgb vpavgw vpblendvb + vpblendw vpcmpestri vpcmpestrm vpcmpistri vpcmpistrm vpcmpeqb vpcmpeqw + vpcmpeqd vpcmpeqq vpcmpgtb vpcmpgtw vpcmpgtd vpcmpgtq vpermilpd vpermilps + vperm2f128 vpextrb vpextrw vpextrd vpextrq vphaddw vphaddd vphaddsw + vphminposuw vphsubw vphsubd vphsubsw vpinsrb vpinsrw vpinsrd vpinsrq vpmaddwd + vpmaddubsw vpmaxsb vpmaxsw vpmaxsd vpmaxub vpmaxuw vpmaxud vpminsb vpminsw + vpminsd vpminub vpminuw vpminud vpmovmskb vpmovsxbw vpmovsxbd vpmovsxbq + vpmovsxwd vpmovsxwq vpmovsxdq vpmovzxbw vpmovzxbd vpmovzxbq vpmovzxwd + vpmovzxwq vpmovzxdq vpmulhuw vpmulhrsw vpmulhw vpmullw vpmulld vpmuludq + vpmuldq vpor vpsadbw vpshufb vpshufd vpshufhw vpshuflw vpsignb vpsignw vpsignd + vpslldq vpsrldq vpsllw vpslld vpsllq vpsraw vpsrad vpsrlw vpsrld vpsrlq vptest + vpsubb vpsubw vpsubd vpsubq vpsubsb vpsubsw vpsubusb vpsubusw vpunpckhbw + vpunpckhwd vpunpckhdq vpunpckhqdq vpunpcklbw vpunpcklwd vpunpckldq vpunpcklqdq + vpxor vrcpps vrcpss vrsqrtps vrsqrtss vroundpd vroundps vroundsd vroundss + vshufpd vshufps vsqrtpd vsqrtps vsqrtsd vsqrtss vstmxcsr vsubpd vsubps vsubsd + vsubss vtestps vtestpd vucomisd vucomiss vunpckhpd vunpckhps vunpcklpd + vunpcklps vxorpd vxorps vzeroall vzeroupper + -- Intel Carry-Less Multiplication instructions (CLMUL). + pclmullqlqdq pclmulhqlqdq pclmullqhqdq pclmulhqhqdq pclmulqdq + -- Intel AVX Carry-Less Multiplication instructions (CLMUL). + vpclmullqlqdq vpclmulhqlqdq vpclmullqhqdq vpclmulhqhqdq vpclmulqdq + -- Intel Fused Multiply-Add instructions (FMA). + vfmadd132ps vfmadd132pd vfmadd312ps vfmadd312pd vfmadd213ps vfmadd213pd + vfmadd123ps vfmadd123pd vfmadd231ps vfmadd231pd vfmadd321ps vfmadd321pd + vfmaddsub132ps vfmaddsub132pd vfmaddsub312ps vfmaddsub312pd vfmaddsub213ps + vfmaddsub213pd vfmaddsub123ps vfmaddsub123pd vfmaddsub231ps vfmaddsub231pd + vfmaddsub321ps vfmaddsub321pd vfmsub132ps vfmsub132pd vfmsub312ps vfmsub312pd + vfmsub213ps vfmsub213pd vfmsub123ps vfmsub123pd vfmsub231ps vfmsub231pd + vfmsub321ps vfmsub321pd vfmsubadd132ps vfmsubadd132pd vfmsubadd312ps + vfmsubadd312pd vfmsubadd213ps vfmsubadd213pd vfmsubadd123ps vfmsubadd123pd + vfmsubadd231ps vfmsubadd231pd vfmsubadd321ps vfmsubadd321pd vfnmadd132ps + vfnmadd132pd vfnmadd312ps vfnmadd312pd vfnmadd213ps vfnmadd213pd vfnmadd123ps + vfnmadd123pd vfnmadd231ps vfnmadd231pd vfnmadd321ps vfnmadd321pd vfnmsub132ps + vfnmsub132pd vfnmsub312ps vfnmsub312pd vfnmsub213ps vfnmsub213pd vfnmsub123ps + vfnmsub123pd vfnmsub231ps vfnmsub231pd vfnmsub321ps vfnmsub321pd vfmadd132ss + vfmadd132sd vfmadd312ss vfmadd312sd vfmadd213ss vfmadd213sd vfmadd123ss + vfmadd123sd vfmadd231ss vfmadd231sd vfmadd321ss vfmadd321sd vfmsub132ss + vfmsub132sd vfmsub312ss vfmsub312sd vfmsub213ss vfmsub213sd vfmsub123ss + vfmsub123sd vfmsub231ss vfmsub231sd vfmsub321ss vfmsub321sd vfnmadd132ss + vfnmadd132sd vfnmadd312ss vfnmadd312sd vfnmadd213ss vfnmadd213sd vfnmadd123ss + vfnmadd123sd vfnmadd231ss vfnmadd231sd vfnmadd321ss vfnmadd321sd vfnmsub132ss + vfnmsub132sd vfnmsub312ss vfnmsub312sd vfnmsub213ss vfnmsub213sd vfnmsub123ss + vfnmsub123sd vfnmsub231ss vfnmsub231sd vfnmsub321ss vfnmsub321sd + -- Intel post-32 nm processor instructions. + rdfsbase rdgsbase rdrand wrfsbase wrgsbase vcvtph2ps vcvtps2ph adcx adox + rdseed clac stac + -- VIA (Centaur) security instructions. + xstore xcryptecb xcryptcbc xcryptctr xcryptcfb xcryptofb montmul xsha1 xsha256 + -- AMD Lightweight Profiling (LWP) instructions. + llwpcb slwpcb lwpval lwpins + -- AMD XOP and FMA4 instructions (SSE5). + vfmaddpd vfmaddps vfmaddsd vfmaddss vfmaddsubpd vfmaddsubps vfmsubaddpd + vfmsubaddps vfmsubpd vfmsubps vfmsubsd vfmsubss vfnmaddpd vfnmaddps vfnmaddsd + vfnmaddss vfnmsubpd vfnmsubps vfnmsubsd vfnmsubss vfrczpd vfrczps vfrczsd + vfrczss vpcmov vpcomb vpcomd vpcomq vpcomub vpcomud vpcomuq vpcomuw vpcomw + vphaddbd vphaddbq vphaddbw vphadddq vphaddubd vphaddubq vphaddubw vphaddudq + vphadduwd vphadduwq vphaddwd vphaddwq vphsubbw vphsubdq vphsubwd vpmacsdd + vpmacsdqh vpmacsdql vpmacssdd vpmacssdqh vpmacssdql vpmacsswd vpmacssww + vpmacswd vpmacsww vpmadcsswd vpmadcswd vpperm vprotb vprotd vprotq vprotw + vpshab vpshad vpshaq vpshaw vpshlb vpshld vpshlq vpshlw + -- Intel AVX2 instructions. + vmpsadbw vpabsb vpabsw vpabsd vpacksswb vpackssdw vpackusdw vpackuswb vpaddb + vpaddw vpaddd vpaddq vpaddsb vpaddsw vpaddusb vpaddusw vpalignr vpand vpandn + vpavgb vpavgw vpblendvb vpblendw vpcmpeqb vpcmpeqw vpcmpeqd vpcmpeqq vpcmpgtb + vpcmpgtw vpcmpgtd vpcmpgtq vphaddw vphaddd vphaddsw vphsubw vphsubd vphsubsw + vpmaddubsw vpmaddwd vpmaxsb vpmaxsw vpmaxsd vpmaxub vpmaxuw vpmaxud vpminsb + vpminsw vpminsd vpminub vpminuw vpminud vpmovmskb vpmovsxbw vpmovsxbd + vpmovsxbq vpmovsxwd vpmovsxwq vpmovsxdq vpmovzxbw vpmovzxbd vpmovzxbq + vpmovzxwd vpmovzxwq vpmovzxdq vpmuldq vpmulhrsw vpmulhuw vpmulhw vpmullw + vpmulld vpmuludq vpor vpsadbw vpshufb vpshufd vpshufhw vpshuflw vpsignb + vpsignw vpsignd vpslldq vpsllw vpslld vpsllq vpsraw vpsrad vpsrldq vpsrlw + vpsrld vpsrlq vpsubb vpsubw vpsubd vpsubq vpsubsb vpsubsw vpsubusb vpsubusw + vpunpckhbw vpunpckhwd vpunpckhdq vpunpckhqdq vpunpcklbw vpunpcklwd vpunpckldq + vpunpcklqdq vpxor vmovntdqa vbroadcastss vbroadcastsd vbroadcasti128 vpblendd + vpbroadcastb vpbroadcastw vpbroadcastd vpbroadcastq vpermd vpermpd vpermps + vpermq vperm2i128 vextracti128 vinserti128 vpmaskmovd vpmaskmovq vpmaskmovd + vpmaskmovq vpsllvd vpsllvq vpsllvd vpsllvq vpsravd vpsrlvd vpsrlvq vpsrlvd + vpsrlvq vgatherdpd vgatherqpd vgatherdpd vgatherqpd vgatherdps vgatherqps + vgatherdps vgatherqps vpgatherdd vpgatherqd vpgatherdd vpgatherqd vpgatherdq + vpgatherqq vpgatherdq vpgatherqq + -- Transactional Synchronization Extensions (TSX). + xabort xbegin xend xtest + -- Intel BMI1 and BMI2 instructions AMD TBM instructions. + andn bextr blci blcic blsi blsic blcfill blsfill blcmsk blsmsk blsr blcs bzhi + mulx pdep pext rorx sarx shlx shrx tzcnt tzmsk t1mskc + -- Systematic names for the hinting nop instructions. + hint_nop0 hint_nop1 hint_nop2 hint_nop3 hint_nop4 hint_nop5 hint_nop6 + hint_nop7 hint_nop8 hint_nop9 hint_nop10 hint_nop11 hint_nop12 hint_nop13 + hint_nop14 hint_nop15 hint_nop16 hint_nop17 hint_nop18 hint_nop19 hint_nop20 + hint_nop21 hint_nop22 hint_nop23 hint_nop24 hint_nop25 hint_nop26 hint_nop27 + hint_nop28 hint_nop29 hint_nop30 hint_nop31 hint_nop32 hint_nop33 hint_nop34 + hint_nop35 hint_nop36 hint_nop37 hint_nop38 hint_nop39 hint_nop40 hint_nop41 + hint_nop42 hint_nop43 hint_nop44 hint_nop45 hint_nop46 hint_nop47 hint_nop48 + hint_nop49 hint_nop50 hint_nop51 hint_nop52 hint_nop53 hint_nop54 hint_nop55 + hint_nop56 hint_nop57 hint_nop58 hint_nop59 hint_nop60 hint_nop61 hint_nop62 + hint_nop63 +]])) +lex:add_style('instruction', lexer.STYLE_FUNCTION) + +-- Registers. +lex:add_rule('register', token('register', word_match[[ + -- 32-bit registers. + ah al ax bh bl bp bx ch cl cx dh di dl dx eax ebx ebx ecx edi edx esi esp fs + mm0 mm1 mm2 mm3 mm4 mm5 mm6 mm7 si st0 st1 st2 st3 st4 st5 st6 st7 xmm0 xmm1 + xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 ymm0 ymm1 ymm2 ymm3 ymm4 ymm5 ymm6 ymm7 + -- 64-bit registers. + bpl dil gs r8 r8b r8w r9 r9b r9w r10 r10b r10w r11 r11b r11w r12 r12b r12w r13 + r13b r13w r14 r14b r14w r15 r15b r15w rax rbp rbx rcx rdi rdx rsi rsp sil xmm8 + xmm9 xmm10 xmm11 xmm12 xmm13 xmm14 xmm15 ymm8 ymm9 ymm10 ymm11 ymm12 ymm13 + ymm14 ymm15 +]])) +lex:add_style('register', lexer.STYLE_CONSTANT) + +-- Types. +local sizes = word_match[[ + byte word dword qword tword oword yword + a16 a32 a64 o16 o32 o64 -- instructions +]] +local wrt_types = '..' * word_match[[ + start gotpc gotoff gottpoff got plt sym tlsie +]] +lex:add_rule('type', token(lexer.TYPE, sizes + wrt_types)) + +local word = (lexer.alpha + S('$._?')) * (lexer.alnum + S('$._?#@~'))^0 + +-- Constants. +local constants = word_match[[ + __float128h__ __float128l__ __float16__ __float32__ __float64__ __float8__ + __float80e__ __float80m__ __Infinity__ __NaN__ __QNaN__ __SNaN__ +]] +lex:add_rule('constant', token(lexer.CONSTANT, constants + + '$' * P('$')^-1 * -word)) + +-- Labels. +lex:add_rule('label', token(lexer.LABEL, word * ':')) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + + lexer.delimited_range('"', true))) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, ';' * lexer.nonnewline^0)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + + lexer.integer * S('hqb')^-1)) + +-- Preprocessor. +local preproc_word = word_match[[ + arg assign clear define defstr deftok depend elif elifctx elifdef elifempty + elifenv elifid elifidn elifidni elifmacro elifn elifnctx elifndef elifnempty + elifnenv elifnid elifnidn elifnidni elifnmacro elifnnum elifnstr elifntoken + elifnum elifstr eliftoken else endif endmacro endrep endwhile error exitmacro + exitrep exitwhile fatal final idefine idefstr ideftok if ifctx ifdef ifempty + ifenv ifid ifidn ifidni ifmacro ifn ifnctx ifndef ifnempty ifnenv ifnid ifnidn + ifnidni ifnmacro ifnnum ifnstr ifntoken ifnum ifstr iftoken imacro include + ixdefine line local macro pathsearch pop push rep repl rmacro rotate stacksize + strcat strlen substr undef unmacro use warning while xdefine +]] +local preproc_symbol = '??' + S('!$+?') + '%' * -lexer.space + R('09')^1 +lex:add_rule('preproc', token(lexer.PREPROCESSOR, '%' * (preproc_word + + preproc_symbol))) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|~:,()[]'))) + +-- Fold points. +lex:add_fold_point(lexer.PREPROCESSOR, '%if', '%endif') +lex:add_fold_point(lexer.PREPROCESSOR, '%macro', '%endmacro') +lex:add_fold_point(lexer.PREPROCESSOR, '%rep', '%endrep') +lex:add_fold_point(lexer.PREPROCESSOR, '%while', '%endwhile') +lex:add_fold_point(lexer.KEYWORD, 'struc', 'endstruc') +lex:add_fold_point(lexer.COMMENT, ';', lexer.fold_line_comments(';')) + +return lex diff --git a/lexlua/asp.lua b/lexlua/asp.lua new file mode 100644 index 000000000..ef955c262 --- /dev/null +++ b/lexlua/asp.lua @@ -0,0 +1,34 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- ASP LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local html = lexer.load('html') +local lex = lexer.new('asp', {inherit = html}) -- proxy for HTML + +-- Embedded VB. +local vb = lexer.load('vb') +local vb_start_rule = token('asp_tag', '<%' * P('=')^-1) +local vb_end_rule = token('asp_tag', '%>') +lex:embed(vb, vb_start_rule, vb_end_rule) +lex:add_style('asp_tag', lexer.STYLE_EMBEDDED) + +-- Embedded VBScript. +local vbs = lexer.load('vbscript') +local script_element = word_match('script', true) +local vbs_start_rule = #(P('<') * script_element * (P(function(input, index) + if input:find('^%s+language%s*=%s*(["\'])vbscript%1', index) or + input:find('^%s+type%s*=%s*(["\'])text/vbscript%1', index) then + return index + end +end) + '>')) * html.embed_start_tag -- <script language="vbscript"> +local vbs_end_rule = #('</' * script_element * lexer.space^0 * '>') * + html.embed_end_tag -- </script> +lex:embed(vbs, vbs_start_rule, vbs_end_rule) + +-- Fold points. +lex:add_fold_point('asp_tag', '<%', '%>') + +return lex diff --git a/lexlua/autoit.lua b/lexlua/autoit.lua new file mode 100644 index 000000000..68121bb69 --- /dev/null +++ b/lexlua/autoit.lua @@ -0,0 +1,132 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- AutoIt LPeg lexer. +-- Contributed by Jeff Stone. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('autoit') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match([[ + False True And Or Not ContinueCase ContinueLoop Default Dim Global Local Const + Do Until Enum Exit ExitLoop For To Step Next In Func Return EndFunc If Then + ElseIf Else EndIf Null ReDim Select Case EndSelect Static Switch EndSwitch + Volatile While WEnd With EndWith +]], true))) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match([[ + Abs ACos AdlibRegister AdlibUnRegister Asc AscW ASin Assign ATan + AutoItSetOption AutoItWinGetTitle AutoItWinSetTitle Beep Binary BinaryLen + BinaryMid BinaryToString BitAND BitNOT BitOR BitRotate BitShift BitXOR + BlockInput Break Call CDTray Ceiling Chr ChrW ClipGet ClipPut ConsoleRead + ConsoleWrite ConsoleWriteError ControlClick ControlCommand ControlDisable + ControlEnable ControlFocus ControlGetFocus ControlGetHandle ControlGetPos + ControlGetText ControlHide ControlListView ControlMove ControlSend + ControlSetText ControlShow ControlTreeView Cos Dec DirCopy DirCreate + DirGetSize DirMove DirRemove DllCall DllCallAddress DllCallbackFree + DllCallbackGetPtr DllCallbackRegister DllClose DllOpen DllStructCreate + DllStructGetData DllStructGetPtr DllStructGetSize DllStructSetData + DriveGetDrive DriveGetFileSystem DriveGetLabel DriveGetSerial DriveGetType + DriveMapAdd DriveMapDel DriveMapGet DriveSetLabel DriveSpaceFree + DriveSpaceTotal DriveStatus EnvGet EnvSet EnvUpdate Eval Execute Exp + FileChangeDir FileClose FileCopy FileCreateNTFSLink FileCreateShortcut + FileDelete FileExists FileFindFirstFile FileFindNextFile FileFlush + FileGetAttrib FileGetEncoding FileGetLongName FileGetPos FileGetShortcut + FileGetShortName FileGetSize FileGetTime FileGetVersion FileInstall FileMove + FileOpen FileOpenDialog FileRead FileReadLine FileReadToArray FileRecycle + FileRecycleEmpty FileSaveDialog FileSelectFolder FileSetAttrib FileSetEnd + FileSetPos FileSetTime FileWrite FileWriteLine Floor FtpSetProxy FuncName + GUICreate GUICtrlCreateAvi GUICtrlCreateButton GUICtrlCreateCheckbox + GUICtrlCreateCombo GUICtrlCreateContextMenu GUICtrlCreateDate + GUICtrlCreateDummy GUICtrlCreateEdit GUICtrlCreateGraphic GUICtrlCreateGroup + GUICtrlCreateIcon GUICtrlCreateInput GUICtrlCreateLabel GUICtrlCreateList + GUICtrlCreateListView GUICtrlCreateListViewItem GUICtrlCreateMenu + GUICtrlCreateMenuItem GUICtrlCreateMonthCal GUICtrlCreateObj GUICtrlCreatePic + GUICtrlCreateProgress GUICtrlCreateRadio GUICtrlCreateSlider GUICtrlCreateTab + GUICtrlCreateTabItem GUICtrlCreateTreeView GUICtrlCreateTreeViewItem + GUICtrlCreateUpdown GUICtrlDelete GUICtrlGetHandle GUICtrlGetState GUICtrlRead + GUICtrlRecvMsg GUICtrlRegisterListViewSort GUICtrlSendMsg GUICtrlSendToDummy + GUICtrlSetBkColor GUICtrlSetColor GUICtrlSetCursor GUICtrlSetData + GUICtrlSetDefBkColor GUICtrlSetDefColor GUICtrlSetFont GUICtrlSetGraphic + GUICtrlSetImage GUICtrlSetLimit GUICtrlSetOnEvent GUICtrlSetPos + GUICtrlSetResizing GUICtrlSetState GUICtrlSetStyle GUICtrlSetTip GUIDelete + GUIGetCursorInfo GUIGetMsg GUIGetStyle GUIRegisterMsg GUISetAccelerators + GUISetBkColor GUISetCoord GUISetCursor GUISetFont GUISetHelp GUISetIcon + GUISetOnEvent GUISetState GUISetStyle GUIStartGroup GUISwitch Hex HotKeySet + HttpSetProxy HttpSetUserAgent HWnd InetClose InetGet InetGetInfo InetGetSize + InetRead IniDelete IniRead IniReadSection IniReadSectionNames IniRenameSection + IniWrite IniWriteSection InputBox Int IsAdmin IsArray IsBinary IsBool + IsDeclared IsDllStruct IsFloat IsFunc IsHWnd IsInt IsKeyword IsNumber IsObj + IsPtr IsString Log MemGetStats Mod MouseClick MouseClickDrag MouseDown + MouseGetCursor MouseGetPos MouseMove MouseUp MouseWheel MsgBox Number + ObjCreate ObjCreateInterface ObjEvent ObjGet ObjName OnAutoItExitRegister + OnAutoItExitUnRegister Ping PixelChecksum PixelGetColor PixelSearch + ProcessClose ProcessExists ProcessGetStats ProcessList ProcessSetPriority + ProcessWait ProcessWaitClose ProgressOff ProgressOn ProgressSet Ptr Random + RegDelete RegEnumKey RegEnumVal RegRead RegWrite Round Run RunAs RunAsWait + RunWait Send SendKeepActive SetError SetExtended ShellExecute ShellExecuteWait + Shutdown Sin Sleep SoundPlay SoundSetWaveVolume SplashImageOn SplashOff + SplashTextOn Sqrt SRandom StatusbarGetText StderrRead StdinWrite StdioClose + StdoutRead String StringAddCR StringCompare StringFormat StringFromASCIIArray + StringInStr StringIsAlNum StringIsAlpha StringIsASCII StringIsDigit + StringIsFloat StringIsInt StringIsLower StringIsSpace StringIsUpper + StringIsXDigit StringLeft StringLen StringLower StringMid StringRegExp + StringRegExpReplace StringReplace StringReverse StringRight StringSplit + StringStripCR StringStripWS StringToASCIIArray StringToBinary StringTrimLeft + StringTrimRight StringUpper Tan TCPAccept TCPCloseSocket TCPConnect + TCPListen TCPNameToIP TCPRecv TCPSend TCPShutdown TCPStartup TimerDiff + TimerInit ToolTip TrayCreateItem TrayCreateMenu TrayGetMsg TrayItemDelete + TrayItemGetHandle TrayItemGetState TrayItemGetText TrayItemSetOnEvent + TrayItemSetState TrayItemSetText TraySetClick TraySetIcon TraySetOnEvent + TraySetPauseIcon TraySetState TraySetToolTip TrayTip UBound UDPBind + UDPCloseSocket UDPOpen UDPRecv UDPSend UDPShutdown UDPStartup VarGetType + WinActivate WinActive WinClose WinExists WinFlash WinGetCaretPos + WinGetClassList WinGetClientSize WinGetHandle WinGetPos WinGetProcess + WinGetState WinGetText WinGetTitle WinKill WinList WinMenuSelectItem + WinMinimizeAll WinMinimizeAllUndo WinMove WinSetOnTop WinSetState WinSetTitle + WinSetTrans WinWait WinWaitActive WinWaitClose WinWaitNotActive +]], true))) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Comments. +local line_comment = ';' * lexer.nonnewline_esc^0 +local block_comment1 = '#comments-start' * (lexer.any - '#comments-end')^0 * + P('#comments-end')^-1 +local block_comment2 = '#cs' * (lexer.any - '#ce')^0 * P('#ce')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment1 + + block_comment2)) + +-- Preprocessor. +lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, '#' * word_match([[ + include-once include pragma forceref RequireAdmin NoTrayIcon + OnAutoItStartRegister +]], true))) + +-- Strings. +local dq_str = lexer.delimited_range('"', true, true) +local sq_str = lexer.delimited_range("'", true, true) +local inc = lexer.delimited_range('<>', true, true, true) +lex:add_rule('string', token(lexer.STRING, dq_str + sq_str + inc)) + +-- Macros. +lex:add_rule('macro', token('macro', '@' * (lexer.alnum + '_')^1)) +lex:add_style('macro', lexer.STYLE_PREPROCESSOR) + +-- Variables. +lex:add_rule('variable', token(lexer.VARIABLE, '$' * (lexer.alnum + '_')^1)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('+-^*/&<>=?:()[]'))) + +return lex diff --git a/lexlua/awk.lua b/lexlua/awk.lua new file mode 100644 index 000000000..a3f69fd83 --- /dev/null +++ b/lexlua/awk.lua @@ -0,0 +1,297 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- AWK LPeg lexer. +-- Modified by Wolfgang Seeberg 2012, 2013. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('awk') + +local LEFTBRACKET = '[' +local RIGHTBRACKET = ']' +local SLASH = '/' +local BACKSLASH = '\\' +local CARET = '^' +local CR = '\r' +local LF = '\n' +local CRLF = CR .. LF +local DQUOTE = '"' +local DELIMITER_MATCHES = {['('] = ')', ['['] = ']'} +local COMPANION = {['('] = '[', ['['] = '('} +local CC = { + alnum = 1, alpha = 1, blank = 1, cntrl = 1, digit = 1, graph = 1, lower = 1, + print = 1, punct = 1, space = 1, upper = 1, xdigit = 1 +} +local LastRegexEnd = 0 +local BackslashAtCommentEnd = 0 +local KW_BEFORE_RX = { + case = 1, ['do'] = 1, ['else'] = 1, exit = 1, print = 1, printf = 1, + ['return'] = 1 +} + +local function findKeyword(input, e) + local i = e + while i > 0 and input:find("^[%l]", i) do i = i - 1 end + local w = input:sub(i + 1, e) + if i == 0 then + return KW_BEFORE_RX[w] == 1 + elseif input:find("^[%u%d_]", i) then + return false + else + return KW_BEFORE_RX[w] == 1 + end +end + +local function isRegex(input, i) + while i >= 1 and input:find('^[ \t]', i) do i = i - 1 end + if i < 1 then return true end + if input:find("^[-!%%&(*+,:;<=>?[^{|}~\f]", i) or findKeyword(input, i) then + return true + elseif input:sub(i, i) == SLASH then + return i ~= LastRegexEnd -- deals with /xx/ / /yy/. + elseif input:find('^[]%w)."]', i) then + return false + elseif input:sub(i, i) == LF then + if i == 1 then return true end + i = i - 1 + if input:sub(i, i) == CR then + if i == 1 then return true end + i = i - 1 + end + elseif input:sub(i, i) == CR then + if i == 1 then return true end + i = i - 1 + else + return false + end + if input:sub(i, i) == BACKSLASH and i ~= BackslashAtCommentEnd then + return isRegex(input, i - 1) + else + return true + end +end + +local function eatCharacterClass(input, s, e) + local i = s + while i <= e do + if input:find('^[\r\n]', i) then + return false + elseif input:sub(i, i + 1) == ':]' then + local str = input:sub(s, i - 1) + return CC[str] == 1 and i + 1 + end + i = i + 1 + end + return false +end + +local function eatBrackets(input, i, e) + if input:sub(i, i) == CARET then i = i + 1 end + if input:sub(i, i) == RIGHTBRACKET then i = i + 1 end + while i <= e do + if input:find('^[\r\n]', i) then + return false + elseif input:sub(i, i) == RIGHTBRACKET then + return i + elseif input:sub(i, i + 1) == '[:' then + i = eatCharacterClass(input, i + 2, e) + if not i then return false end + elseif input:sub(i, i) == BACKSLASH then + i = i + 1 + if input:sub(i, i + 1) == CRLF then i = i + 1 end + end + i = i + 1 + end + return false +end + +local function eatRegex(input, i) + local e = #input + while i <= e do + if input:find('^[\r\n]', i) then + return false + elseif input:sub(i, i) == SLASH then + LastRegexEnd = i + return i + elseif input:sub(i, i) == LEFTBRACKET then + i = eatBrackets(input, i + 1, e) + if not i then return false end + elseif input:sub(i, i) == BACKSLASH then + i = i + 1 + if input:sub(i, i + 1) == CRLF then i = i + 1 end + end + i = i + 1 + end + return false +end + +local ScanRegexResult +local function scanGawkRegex(input, index) + if isRegex(input, index - 2) then + local i = eatRegex(input, index) + if not i then + ScanRegexResult = false + return false + end + local rx = input:sub(index - 1, i) + for bs in rx:gmatch("[^\\](\\+)[BSsWwy<>`']") do + -- /\S/ is special, but /\\S/ is not. + if #bs % 2 == 1 then return i + 1 end + end + ScanRegexResult = i + 1 + else + ScanRegexResult = false + end + return false +end +-- Is only called immediately after scanGawkRegex(). +local function scanRegex() + return ScanRegexResult +end + +local function scanString(input, index) + local i = index + local e = #input + while i <= e do + if input:find('^[\r\n]', i) then + return false + elseif input:sub(i, i) == DQUOTE then + return i + 1 + elseif input:sub(i, i) == BACKSLASH then + i = i + 1 + -- lexer.delimited_range() doesn't handle CRLF. + if input:sub(i, i + 1) == CRLF then i = i + 1 end + end + i = i + 1 + end + return false +end + +-- purpose: prevent isRegex() from entering a comment line that ends with a +-- backslash. +local function scanComment(input, index) + local _, i = input:find('[^\r\n]*', index) + if input:sub(i, i) == BACKSLASH then BackslashAtCommentEnd = i end + return i + 1 +end + +local function scanFieldDelimiters(input, index) + local i = index + local e = #input + local left = input:sub(i - 1, i - 1) + local count = 1 + local right = DELIMITER_MATCHES[left] + local left2 = COMPANION[left] + local count2 = 0 + local right2 = DELIMITER_MATCHES[left2] + while i <= e do + if input:find('^[#\r\n]', i) then + return false + elseif input:sub(i, i) == right then + count = count - 1 + if count == 0 then return count2 == 0 and i + 1 end + elseif input:sub(i, i) == left then + count = count + 1 + elseif input:sub(i, i) == right2 then + count2 = count2 - 1 + if count2 < 0 then return false end + elseif input:sub(i, i) == left2 then + count2 = count2 + 1 + elseif input:sub(i, i) == DQUOTE then + i = scanString(input, i + 1) + if not i then return false end + i = i - 1 + elseif input:sub(i, i) == SLASH then + if isRegex(input, i - 1) then + i = eatRegex(input, i + 1) + if not i then return false end + end + elseif input:sub(i, i) == BACKSLASH then + if input:sub(i + 1, i + 2) == CRLF then + i = i + 2 + elseif input:find('^[\r\n]', i + 1) then + i = i + 1 + end + end + i = i + 1 + end + return false +end + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '#' * P(scanComment))) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, DQUOTE * P(scanString))) + +-- No leading sign because it might be binary. +local float = ((lexer.digit^1 * ('.' * lexer.digit^0)^-1) + + ('.' * lexer.digit^1)) * + (S('eE') * S('+-')^-1 * lexer.digit^1)^-1 + +-- Fields. E.g. $1, $a, $(x), $a(x), $a[x], $"1", $$a, etc. +lex:add_rule('field', + token('field', P('$') * S('$+-')^0 * + (float + + lexer.word^0 * '(' * P(scanFieldDelimiters) + + lexer.word^1 * ('[' * P(scanFieldDelimiters))^-1 + + '"' * P(scanString) + + '/' * P(eatRegex) * '/'))) +lex:add_style('field', lexer.STYLE_LABEL) + +-- Regular expressions. +-- Slash delimited regular expressions are preceded by most operators or +-- the keywords 'print' and 'case', possibly on a preceding line. They +-- can contain unescaped slashes and brackets in brackets. Some escape +-- sequences like '\S', '\s' have special meanings with Gawk. Tokens that +-- contain them are displayed differently. +lex:add_rule('gawkRegex', token('gawkRegex', SLASH * P(scanGawkRegex))) +lex:add_style('gawkRegex', lexer.STYLE_PREPROCESSOR..',underlined') +lex:add_rule('regex', token(lexer.REGEX, SLASH * P(scanRegex))) + +-- Operators. +lex:add_rule('gawkOperator', token('gawkOperator', P("|&") + "@" + "**=" + + "**")) +lex:add_style('gawkOperator', lexer.STYLE_OPERATOR..',underlined') +lex:add_rule('operator', token(lexer.OPERATOR, S('!%&()*+,-/:;<=>?[\\]^{|}~'))) + +-- Numbers. +lex:add_rule('gawkNumber', token('gawkNumber', lexer.hex_num + lexer.oct_num)) +lex:add_style('gawkNumber', lexer.STYLE_NUMBER..',underlined') +lex:add_rule('number', token(lexer.NUMBER, float)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + BEGIN END atan2 break close continue cos delete do else exit exp fflush for + function getline gsub if in index int length log match next nextfile print + printf rand return sin split sprintf sqrt srand sub substr system tolower + toupper while +]])) + +lex:add_rule('builtInVariable', token('builtInVariable', word_match[[ + ARGC ARGV CONVFMT ENVIRON FILENAME FNR FS NF NR OFMT OFS ORS RLENGTH RS RSTART + SUBSEP +]])) +lex:add_style('builtInVariable', lexer.STYLE_CONSTANT) + +lex:add_rule('gawkBuiltInVariable', token('gawkBuiltInVariable', word_match[[ + ARGIND BINMODE ERRNO FIELDWIDTHS FPAT FUNCTAB IGNORECASE LINT PREC PROCINFO + ROUNDMODE RT SYMTAB TEXTDOMAIN +]])) +lex:add_style('gawkBuiltInVariable', lexer.STYLE_CONSTANT..',underlined') + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, lexer.word * #P('('))) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '#', lexer.fold_line_comments('#')) + +return lex diff --git a/lexlua/bash.lua b/lexlua/bash.lua new file mode 100644 index 000000000..bd738e47b --- /dev/null +++ b/lexlua/bash.lua @@ -0,0 +1,60 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Shell LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('bash') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + if then elif else fi case in esac while for do done continue local return + select + -- Operators. + -a -b -c -d -e -f -g -h -k -p -r -s -t -u -w -x -O -G -L -S -N -nt -ot -ef -o + -z -n -eq -ne -lt -le -gt -ge +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +local sq_str = lexer.delimited_range("'", false, true) +local dq_str = lexer.delimited_range('"') +local ex_str = lexer.delimited_range('`') +local heredoc = '<<' * P(function(input, index) + local s, e, _, delimiter = + input:find('%-?(["\']?)([%a_][%w_]*)%1[\n\r\f;]+', index) + if s == index and delimiter then + local _, e = input:find('[\n\r\f]+'..delimiter, e) + return e and e + 1 or #input + 1 + end +end) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + ex_str + heredoc)) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Variables. +lex:add_rule('variable', token(lexer.VARIABLE, + '$' * (S('!#?*@$') + lexer.digit^1 + lexer.word + + lexer.delimited_range('{}', true, true)))) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/*^&|~.,:;?()[]{}'))) + +-- Fold points. +lex:add_fold_point(lexer.KEYWORD, 'if', 'fi') +lex:add_fold_point(lexer.KEYWORD, 'case', 'esac') +lex:add_fold_point(lexer.KEYWORD, 'do', 'done') +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '#', lexer.fold_line_comments('#')) + +return lex diff --git a/lexlua/batch.lua b/lexlua/batch.lua new file mode 100644 index 000000000..c81ec08d1 --- /dev/null +++ b/lexlua/batch.lua @@ -0,0 +1,52 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Batch LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('batch', {case_insensitive_fold_points = true}) + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match([[ + cd chdir md mkdir cls for if echo echo. move copy ren del set call exit + setlocal shift endlocal pause defined exist errorlevel else in do NUL AUX PRN + not goto pushd popd +]], true))) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match([[ + APPEND ATTRIB CHKDSK CHOICE DEBUG DEFRAG DELTREE DISKCOMP DISKCOPY DOSKEY + DRVSPACE EMM386 EXPAND FASTOPEN FC FDISK FIND FORMAT GRAPHICS KEYB LABEL + LOADFIX MEM MODE MORE MOVE MSCDEX NLSFUNC POWER PRINT RD REPLACE RESTORE + SETVER SHARE SORT SUBST SYS TREE UNDELETE UNFORMAT VSAFE XCOPY +]], true))) + +-- Comments. +local rem = (P('REM') + 'rem') * lexer.space +lex:add_rule('comment', token(lexer.COMMENT, (rem + '::') * lexer.nonnewline^0)) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"', true))) + +-- Variables. +lex:add_rule('variable', token(lexer.VARIABLE, + '%' * (lexer.digit + '%' * lexer.alpha) + + lexer.delimited_range('%', true, true))) + +-- Labels. +lex:add_rule('label', token(lexer.LABEL, ':' * lexer.word)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('+|&!<>='))) + +-- Fold points. +lex:add_fold_point(lexer.KEYWORD, 'setlocal', 'endlocal') + +return lex diff --git a/lexlua/bibtex.lua b/lexlua/bibtex.lua new file mode 100644 index 000000000..bdca1a807 --- /dev/null +++ b/lexlua/bibtex.lua @@ -0,0 +1,45 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Bibtex LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('bibtex') + +-- Whitespace. +local ws = token(lexer.WHITESPACE, lexer.space^1) + +-- Fields. +lex:add_rule('field', token('field', word_match[[ + author title journal year volume number pages month note key publisher editor + series address edition howpublished booktitle organization chapter school + institution type isbn issn affiliation issue keyword url +]])) +lex:add_style('field', lexer.STYLE_CONSTANT) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, + lexer.delimited_range('"') + + lexer.delimited_range('{}', false, true, true))) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S(',='))) + +-- Embedded in Latex. +local latex = lexer.load('latex') + +-- Embedded Bibtex. +local entry = token('entry', P('@') * word_match([[ + book article booklet conference inbook incollection inproceedings manual + mastersthesis lambda misc phdthesis proceedings techreport unpublished +]], true)) +lex:add_style('entry', lexer.STYLE_PREPROCESSOR) +local bibtex_start_rule = entry * ws^0 * token(lexer.OPERATOR, P('{')) +local bibtex_end_rule = token(lexer.OPERATOR, P('}')) +latex:embed(lex, bibtex_start_rule, bibtex_end_rule) + +return lex diff --git a/lexlua/boo.lua b/lexlua/boo.lua new file mode 100644 index 000000000..d1b1d6849 --- /dev/null +++ b/lexlua/boo.lua @@ -0,0 +1,64 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Boo LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('boo') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + and break cast continue elif else ensure except for given goto if in isa is + not or otherwise pass raise ref try unless when while + -- Definitions. + abstract callable class constructor def destructor do enum event final get + interface internal of override partial private protected public return set + static struct transient virtual yield + -- Namespaces. + as from import namespace + -- Other. + self super null true false +]])) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match[[ + bool byte char date decimal double duck float int long object operator regex + sbyte short single string timespan uint ulong ushort +]])) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match[[ + array assert checked enumerate __eval__ filter getter len lock map matrix max + min normalArrayIndexing print property range rawArrayIndexing required + __switch__ typeof unchecked using yieldAll zip +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +local sq_str = lexer.delimited_range("'", true) +local dq_str = lexer.delimited_range('"', true) +local triple_dq_str = '"""' * (lexer.any - '"""')^0 * P('"""')^-1 +local regex_str = #P('/') * lexer.last_char_includes('!%^&*([{-=+|:;,?<>~') * + lexer.delimited_range('/', true) +lex:add_rule('string', token(lexer.STRING, triple_dq_str + sq_str + dq_str) + + token(lexer.REGEX, regex_str)) + +-- Comments. +local line_comment = '#' * lexer.nonnewline_esc^0 +local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) * + (S('msdhsfFlL') + 'ms')^-1)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~`'))) + +return lex diff --git a/lexlua/caml.lua b/lexlua/caml.lua new file mode 100644 index 000000000..10e308af0 --- /dev/null +++ b/lexlua/caml.lua @@ -0,0 +1,62 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- OCaml LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('caml') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + and as asr begin class closed constraint do done downto else end exception + external failwith false flush for fun function functor if in include incr + inherit land let load los lsl lsr lxor match method mod module mutable new not + of open option or parser private raise rec ref regexp sig stderr stdin stdout + struct then to true try type val virtual when while with +]])) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match[[ + bool char float int string unit +]])) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match[[ + abs abs_float acos asin atan atan2 at_exit bool_of_string ceil char_of_int + classify_float close_in close_in_noerr close_out close_out_noerr compare cos + cosh decr epsilon_float exit exp failwith float float_of_int float_of_string + floor flush flush_all format_of_string frexp fst ignore in_channel_length incr + infinity input input_binary_int input_byte input_char input_line input_value + int_of_char int_of_float int_of_string invalid_arg ldexp log log10 max + max_float max_int min min_float min_int mod modf mod_float nan open_in + open_in_bin open_in_gen open_out open_out_bin open_out_gen out_channel_length + output output_binary_int output_byte output_char output_string output_value + pos_in pos_out pred prerr_char prerr_endline prerr_float prerr_int + prerr_newline prerr_string print_char print_endline print_float print_int + print_newline print_string raise read_float read_int read_line really_input + seek_in seek_out set_binary_mode_in set_binary_mode_out sin sinh snd sqrt + stderr stdin stdout string_of_bool string_of_float string_of_format + string_of_int succ tan tanh truncate +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + + lexer.delimited_range('"', true))) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, lexer.nested_pair('(*', '*)'))) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('=<>+-*/.,:;~!#%^&|?[](){}'))) + +return lex diff --git a/lexlua/chuck.lua b/lexlua/chuck.lua new file mode 100644 index 000000000..988732ab4 --- /dev/null +++ b/lexlua/chuck.lua @@ -0,0 +1,72 @@ +-- Copyright 2010-2018 Martin Morawetz. See License.txt. +-- ChucK LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('chuck') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + -- Control structures. + break continue else for if repeat return switch until while + -- Other chuck keywords. + function fun spork const new +]])) + +-- Constants. +lex:add_rule('constant', token(lexer.CONSTANT, word_match[[ + -- Special values. + false maybe me null NULL pi true +]])) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match[[ + float int time dur void same +]])) + +-- Classes. +lex:add_rule('class', token(lexer.CLASS, word_match[[ + -- Class keywords. + class extends implements interface private protected public pure static super + this +]])) + +-- Global ugens. +lex:add_rule('ugen', token('ugen', word_match[[dac adc blackhole]])) +lex:add_style('ugen', lexer.STYLE_CONSTANT) + +-- Times. +lex:add_rule('time', token('time', word_match[[ + samp ms second minute hour day week +]])) +lex:add_style('time', lexer.STYLE_NUMBER) + +-- Special special value. +lex:add_rule('now', token('now', P('now'))) +lex:add_style('now', lexer.STYLE_CONSTANT..',bold') + +-- Strings. +local sq_str = P('L')^-1 * lexer.delimited_range("'", true) +local dq_str = P('L')^-1 * lexer.delimited_range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Comments. +local line_comment = '//' * lexer.nonnewline_esc^0 +local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}@'))) + +return lex diff --git a/lexlua/cmake.lua b/lexlua/cmake.lua new file mode 100644 index 000000000..0d3b3c5d4 --- /dev/null +++ b/lexlua/cmake.lua @@ -0,0 +1,140 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- CMake LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('cmake') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match([[ + IF ENDIF FOREACH ENDFOREACH WHILE ENDWHILE ELSE ELSEIF +]], true))) + +-- Commands. +lex:add_rule('command', token(lexer.FUNCTION, word_match([[ + ADD_CUSTOM_COMMAND ADD_CUSTOM_TARGET ADD_DEFINITIONS ADD_DEPENDENCIES + ADD_EXECUTABLE ADD_LIBRARY ADD_SUBDIRECTORY ADD_TEST AUX_SOURCE_DIRECTORY + BUILD_COMMAND BUILD_NAME CMAKE_MINIMUM_REQUIRED CONFIGURE_FILE + CREATE_TEST_SOURCELIST ENABLE_LANGUAGE ENABLE_TESTING ENDMACRO EXEC_PROGRAM + EXECUTE_PROCESS EXPORT_LIBRARY_DEPENDENCIES FILE FIND_FILE FIND_LIBRARY + FIND_PACKAGE FIND_PATH FIND_PROGRAM FLTK_WRAP_UI GET_CMAKE_PROPERTY + GET_DIRECTORY_PROPERTY GET_FILENAME_COMPONENT GET_SOURCE_FILE_PROPERTY + GET_TARGET_PROPERTY GET_TEST_PROPERTY INCLUDE INCLUDE_DIRECTORIES + INCLUDE_EXTERNAL_MSPROJECT INCLUDE_REGULAR_EXPRESSION INSTALL INSTALL_FILES + INSTALL_PROGRAMS INSTALL_TARGETS LINK_DIRECTORIES LINK_LIBRARIES LIST + LOAD_CACHE LOAD_COMMAND MACRO MAKE_DIRECTORY MARK_AS_ADVANCED MATH MESSAGE + OPTION OUTPUT_REQUIRED_FILES PROJECT QT_WRAP_CPP QT_WRAP_UI REMOVE + REMOVE_DEFINITIONS SEPARATE_ARGUMENTS SET SET_DIRECTORY_PROPERTIES + SET_SOURCE_FILES_PROPERTIES SET_TARGET_PROPERTIES SET_TESTS_PROPERTIES + SITE_NAME SOURCE_GROUP STRING SUBDIR_DEPENDS SUBDIRS TARGET_LINK_LIBRARIES + TRY_COMPILE TRY_RUN USE_MANGLED_MESA UTILITY_SOURCE VARIABLE_REQUIRES + VTK_MAKE_INSTANTIATOR VTK_WRAP_JAVA VTK_WRAP_PYTHON VTK_WRAP_TCL WRITE_FILE +]], true))) + +-- Constants. +lex:add_rule('constant', token(lexer.CONSTANT, word_match([[ + BOOL CACHE FALSE N NO ON OFF NOTFOUND TRUE +]], true))) + +-- Variables. +lex:add_rule('variable', token(lexer.VARIABLE, word_match[[ + APPLE ARGS BORLAND CMAKE_AR CMAKE_BACKWARDS_COMPATIBILITY CMAKE_BASE_NAME + CMAKE_BINARY_DIR CMAKE_BUILD_TOOL CMAKE_BUILD_TYPE CMAKE_CACHEFILE_DIR + CMAKE_CACHE_MAJOR_VERSION CMAKE_CACHE_MINOR_VERSION + CMAKE_CACHE_RELEASE_VERSION CMAKE_C_COMPILE_OBJECT CMAKE_C_COMPILER + CMAKE_C_COMPILER_ARG1 CMAKE_C_COMPILER_ENV_VAR CMAKE_C_COMPILER_FULLPATH + CMAKE_C_COMPILER_LOADED CMAKE_C_COMPILER_WORKS CMAKE_C_CREATE_SHARED_LIBRARY + CMAKE_C_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS CMAKE_C_CREATE_SHARED_MODULE + CMAKE_C_CREATE_STATIC_LIBRARY CMAKE_CFG_INTDIR CMAKE_C_FLAGS + CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_DEBUG_INIT CMAKE_C_FLAGS_INIT + CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_MINSIZEREL_INIT CMAKE_C_FLAGS_RELEASE + CMAKE_C_FLAGS_RELEASE_INIT CMAKE_C_FLAGS_RELWITHDEBINFO + CMAKE_C_FLAGS_RELWITHDEBINFO_INIT CMAKE_C_IGNORE_EXTENSIONS + CMAKE_C_INFORMATION_LOADED CMAKE_C_LINKER_PREFERENCE CMAKE_C_LINK_EXECUTABLE + CMAKE_C_LINK_FLAGS CMAKE_COLOR_MAKEFILE CMAKE_COMMAND CMAKE_COMPILER_IS_GNUCC + CMAKE_COMPILER_IS_GNUCC_RUN CMAKE_COMPILER_IS_GNUCXX + CMAKE_COMPILER_IS_GNUCXX_RUN CMAKE_C_OUTPUT_EXTENSION + CMAKE_C_SOURCE_FILE_EXTENSIONS CMAKE_CTEST_COMMAND CMAKE_CURRENT_BINARY_DIR + CMAKE_CURRENT_SOURCE_DIR CMAKE_CXX_COMPILE_OBJECT CMAKE_CXX_COMPILER + CMAKE_CXX_COMPILER_ARG1 CMAKE_CXX_COMPILER_ENV_VAR CMAKE_CXX_COMPILER_FULLPATH + CMAKE_CXX_COMPILER_LOADED CMAKE_CXX_COMPILER_WORKS + CMAKE_CXX_CREATE_SHARED_LIBRARY + CMAKE_CXX_CREATE_SHARED_LIBRARY_FORBIDDEN_FLAGS CMAKE_CXX_CREATE_SHARED_MODULE + CMAKE_CXX_CREATE_STATIC_LIBRARY CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG + CMAKE_CXX_FLAGS_DEBUG_INIT CMAKE_CXX_FLAGS_INIT CMAKE_CXX_FLAGS_MINSIZEREL + CMAKE_CXX_FLAGS_MINSIZEREL_INIT CMAKE_CXX_FLAGS_RELEASE + CMAKE_CXX_FLAGS_RELEASE_INIT CMAKE_CXX_FLAGS_RELWITHDEBINFO + CMAKE_CXX_FLAGS_RELWITHDEBINFO_INIT CMAKE_CXX_IGNORE_EXTENSIONS + CMAKE_CXX_INFORMATION_LOADED CMAKE_CXX_LINKER_PREFERENCE + CMAKE_CXX_LINK_EXECUTABLE CMAKE_CXX_LINK_FLAGS CMAKE_CXX_OUTPUT_EXTENSION + CMAKE_CXX_SOURCE_FILE_EXTENSIONS CMAKE_DL_LIBS CMAKE_EDIT_COMMAND + CMAKE_EXECUTABLE_SUFFIX CMAKE_EXE_LINKER_FLAGS CMAKE_EXE_LINKER_FLAGS_DEBUG + CMAKE_EXE_LINKER_FLAGS_MINSIZEREL CMAKE_EXE_LINKER_FLAGS_RELEASE + CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO CMAKE_FILES_DIRECTORY + CMAKE_FIND_APPBUNDLE CMAKE_FIND_FRAMEWORK CMAKE_FIND_LIBRARY_PREFIXES + CMAKE_FIND_LIBRARY_SUFFIXES CMAKE_GENERATOR CMAKE_HOME_DIRECTORY + CMAKE_INCLUDE_FLAG_C CMAKE_INCLUDE_FLAG_C_SEP CMAKE_INCLUDE_FLAG_CXX + CMAKE_INIT_VALUE CMAKE_INSTALL_PREFIX CMAKE_LIBRARY_PATH_FLAG + CMAKE_LINK_LIBRARY_FLAG CMAKE_LINK_LIBRARY_SUFFIX + CMAKE_MacOSX_Content_COMPILE_OBJECT CMAKE_MAJOR_VERSION CMAKE_MAKE_PROGRAM + CMAKE_MINOR_VERSION CMAKE_MODULE_EXISTS CMAKE_MODULE_LINKER_FLAGS + CMAKE_MODULE_LINKER_FLAGS_DEBUG CMAKE_MODULE_LINKER_FLAGS_MINSIZEREL + CMAKE_MODULE_LINKER_FLAGS_RELEASE CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO + CMAKE_NUMBER_OF_LOCAL_GENERATORS CMAKE_OSX_ARCHITECTURES _CMAKE_OSX_MACHINE + CMAKE_OSX_SYSROOT CMAKE_PARENT_LIST_FILE CMAKE_PATCH_VERSION + CMAKE_PLATFORM_HAS_INSTALLNAME CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES + CMAKE_PLATFORM_ROOT_BIN CMAKE_PROJECT_NAME CMAKE_RANLIB CMAKE_ROOT + CMAKE_SHARED_LIBRARY_C_FLAGS CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS + CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS CMAKE_SHARED_LIBRARY_CXX_FLAGS + CMAKE_SHARED_LIBRARY_LINK_C_FLAGS CMAKE_SHARED_LIBRARY_PREFIX + CMAKE_SHARED_LIBRARY_RUNTIME_C_FLAG CMAKE_SHARED_LIBRARY_RUNTIME_C_FLAG_SEP + CMAKE_SHARED_LIBRARY_SONAME_C_FLAG CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG + CMAKE_SHARED_LIBRARY_SUFFIX CMAKE_SHARED_LINKER_FLAGS + CMAKE_SHARED_LINKER_FLAGS_DEBUG CMAKE_SHARED_LINKER_FLAGS_MINSIZEREL + CMAKE_SHARED_LINKER_FLAGS_RELEASE CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO + CMAKE_SHARED_MODULE_CREATE_C_FLAGS CMAKE_SHARED_MODULE_CREATE_CXX_FLAGS + CMAKE_SHARED_MODULE_PREFIX CMAKE_SHARED_MODULE_SUFFIX CMAKE_SIZEOF_VOID_P + CMAKE_SKIP_RPATH CMAKE_SOURCE_DIR CMAKE_STATIC_LIBRARY_PREFIX + CMAKE_STATIC_LIBRARY_SUFFIX CMAKE_SYSTEM CMAKE_SYSTEM_AND_C_COMPILER_INFO_FILE + CMAKE_SYSTEM_AND_CXX_COMPILER_INFO_FILE CMAKE_SYSTEM_APPBUNDLE_PATH + CMAKE_SYSTEM_FRAMEWORK_PATH CMAKE_SYSTEM_INCLUDE_PATH CMAKE_SYSTEM_INFO_FILE + CMAKE_SYSTEM_LIBRARY_PATH CMAKE_SYSTEM_LOADED CMAKE_SYSTEM_NAME + CMAKE_SYSTEM_PROCESSOR CMAKE_SYSTEM_PROGRAM_PATH + CMAKE_SYSTEM_SPECIFIC_INFORMATION_LOADED CMAKE_SYSTEM_VERSION CMAKE_UNAME + CMAKE_USE_RELATIVE_PATHS CMAKE_VERBOSE_MAKEFILE CYGWIN EXECUTABLE_OUTPUT_PATH + FORCE HAVE_CMAKE_SIZEOF_VOID_P LIBRARY_OUTPUT_PATH LOCATION MACOSX_BUNDLE + MINGW MSVC MSVC60 MSVC70 MSVC71 MSVC80 MSVC_IDE POST_BUILD PRE_BUILD + PROJECT_BINARY_DIR PROJECT_NAME PROJECT_SOURCE_DIR RUN_CONFIGURE TARGET + UNIX WIN32 +]] + P('$') * lexer.delimited_range('{}', false, true))) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, word_match[[ + AND COMMAND DEFINED DOC EQUAL EXISTS GREATER INTERNAL LESS MATCHES NAME NAMES + NAME_WE NOT OR PATH PATHS PROGRAM STREQUAL STRGREATER STRINGS STRLESS +]] + S('=(){}'))) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"'))) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) + +-- Fold points. +lex:add_fold_point(lexer.KEYWORD, 'IF', 'ENDIF') +lex:add_fold_point(lexer.KEYWORD, 'FOREACH', 'ENDFOREACH') +lex:add_fold_point(lexer.KEYWORD, 'WHILE', 'ENDWHILE') +lex:add_fold_point(lexer.FUNCTION, 'MACRO', 'ENDMACRO') +lex:add_fold_point(lexer.OPERATOR, '(', ')') +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '#', lexer.fold_line_comments('#')) + +return lex diff --git a/lexlua/coffeescript.lua b/lexlua/coffeescript.lua new file mode 100644 index 000000000..55c4154e2 --- /dev/null +++ b/lexlua/coffeescript.lua @@ -0,0 +1,46 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- CoffeeScript LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S = lpeg.P, lpeg.S + +local lex = lexer.new('coffeescript', {fold_by_indentation = true}) + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + all and bind break by case catch class const continue default delete do each + else enum export extends false finally for function if import in instanceof is + isnt let loop native new no not of off on or return super switch then this + throw true try typeof unless until var void when while with yes +]])) + +-- Fields: object properties and methods. +lex:add_rule('field', token(lexer.FUNCTION, '.' * (S('_$') + lexer.alpha) * + (S('_$') + lexer.alnum)^0)) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +local regex_str = #P('/') * lexer.last_char_includes('+-*%<>!=^&|?~:;,([{') * + lexer.delimited_range('/', true) * S('igm')^0 +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") + + lexer.delimited_range('"')) + + token(lexer.REGEX, regex_str)) + +-- Comments. +local block_comment = '###' * (lexer.any - '###')^0 * P('###')^-1 +local line_comment = '#' * lexer.nonnewline_esc^0 +lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;,.()[]{}'))) + +return lex diff --git a/lexlua/container.lua b/lexlua/container.lua new file mode 100644 index 000000000..212748ec5 --- /dev/null +++ b/lexlua/container.lua @@ -0,0 +1,5 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Container LPeg lexer. +-- This is SciTE's plain text lexer. + +return require('lexer').new('container') diff --git a/lexlua/context.lua b/lexlua/context.lua new file mode 100644 index 000000000..5b3510671 --- /dev/null +++ b/lexlua/context.lua @@ -0,0 +1,47 @@ +-- Copyright 2006-2018 Robert Gieseke. See License.txt. +-- ConTeXt LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('context') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '%' * lexer.nonnewline^0)) + +-- ConTeXt environments. +local environment = token('environment', '\\' * (P('start') + 'stop') * + lexer.word) +lex:add_rule('environment', environment) +lex:add_style('environment', lexer.STYLE_KEYWORD) + +-- Sections. +lex:add_rule('section', token('section', '\\' * word_match[[ + chapter part section subject subsection subsubject subsubsection subsubsubject + title +]])) +lex:add_style('section', lexer.STYLE_CLASS) + +-- Commands. +lex:add_rule('command', token(lexer.KEYWORD, '\\' * + (lexer.alpha^1 + S('#$&~_^%{}')))) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('$&#{}[]'))) + +-- Fold points. +lex:add_fold_point('environment', '\\start', '\\stop') +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '%', lexer.fold_line_comments('%')) + +-- Embedded Lua. +local luatex = lexer.load('lua') +local luatex_start_rule = #P('\\startluacode') * environment +local luatex_end_rule = #P('\\stopluacode') * environment +lex:embed(luatex, luatex_start_rule, luatex_end_rule) + +return lex diff --git a/lexlua/cpp.lua b/lexlua/cpp.lua new file mode 100644 index 000000000..277b820e8 --- /dev/null +++ b/lexlua/cpp.lua @@ -0,0 +1,75 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- C++ LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('cpp') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + asm auto break case catch class const const_cast continue default delete do + dynamic_cast else explicit export extern false for friend goto if inline + mutable namespace new operator private protected public register + reinterpret_cast return sizeof static static_cast switch template this throw + true try typedef typeid typename using virtual volatile while + -- Operators. + and and_eq bitand bitor compl not not_eq or or_eq xor xor_eq + -- C++11. + alignas alignof constexpr decltype final noexcept override static_assert + thread_local +]])) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match[[ + bool char double enum float int long short signed struct union unsigned void + wchar_t + -- C++11. + char16_t char32_t nullptr +]])) + +-- Strings. +local sq_str = P('L')^-1 * lexer.delimited_range("'", true) +local dq_str = P('L')^-1 * lexer.delimited_range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Comments. +local line_comment = '//' * lexer.nonnewline_esc^0 +local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Preprocessor. +local preproc_word = word_match[[ + define elif else endif error if ifdef ifndef import line pragma undef using + warning +]] +lex:add_rule('preprocessor', + #lexer.starts_line('#') * + (token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * preproc_word) + + token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * 'include') * + (token(lexer.WHITESPACE, S('\t ')^1) * + token(lexer.STRING, + lexer.delimited_range('<>', true, true)))^-1)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;,.()[]{}'))) + +-- Fold points. +lex:add_fold_point(lexer.PREPROCESSOR, 'if', 'endif') +lex:add_fold_point(lexer.PREPROCESSOR, 'ifdef', 'endif') +lex:add_fold_point(lexer.PREPROCESSOR, 'ifndef', 'endif') +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, '//', lexer.fold_line_comments('//')) + +return lex diff --git a/lexlua/crystal.lua b/lexlua/crystal.lua new file mode 100644 index 000000000..75f9437a6 --- /dev/null +++ b/lexlua/crystal.lua @@ -0,0 +1,122 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Copyright 2017 Michel Martens. +-- Crystal LPeg lexer (based on Ruby). + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('crystal') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + alias begin break case class def defined? do else elsif end ensure false for + if in module next nil not redo rescue retry return self super then true undef + unless until when while yield __FILE__ __LINE__ +]])) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match[[ + abort at_exit caller delay exit fork future get_stack_top gets lazy loop main + p print printf puts raise rand read_line require sleep spawn sprintf system + with_color + -- Macros. + assert_responds_to debugger parallel pp record redefine_main +]]) * -S('.:|')) + +-- Identifiers. +local word_char = lexer.alnum + S('_!?') +local word = (lexer.alpha + '_') * word_char^0 +local identifier = token(lexer.IDENTIFIER, word) + +local delimiter_matches = {['('] = ')', ['['] = ']', ['{'] = '}'} +local literal_delimitted = P(function(input, index) + local delimiter = input:sub(index, index) + if not delimiter:find('[%w\r\n\f\t ]') then -- only non alpha-numerics + local match_pos, patt + if delimiter_matches[delimiter] then + -- Handle nested delimiter/matches in strings. + local s, e = delimiter, delimiter_matches[delimiter] + patt = lexer.delimited_range(s..e, false, false, true) + else + patt = lexer.delimited_range(delimiter) + end + match_pos = lpeg.match(patt, input, index) + return match_pos or #input + 1 + end +end) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline_esc^0)) + +-- Strings. +local cmd_str = lexer.delimited_range('`') +local sq_str = lexer.delimited_range("'") +local dq_str = lexer.delimited_range('"') +local heredoc = '<<' * P(function(input, index) + local s, e, indented, _, delimiter = + input:find('(%-?)(["`]?)([%a_][%w_]*)%2[\n\r\f;]+', index) + if s == index and delimiter then + local end_heredoc = (#indented > 0 and '[\n\r\f]+ *' or '[\n\r\f]+') + local _, e = input:find(end_heredoc..delimiter, e) + return e and e + 1 or #input + 1 + end +end) +-- TODO: regex_str fails with `obj.method /patt/` syntax. +local regex_str = #P('/') * lexer.last_char_includes('!%^&*([{-=+|:;,?<>~') * + lexer.delimited_range('/', true, false) * S('iomx')^0 +lex:add_rule('string', token(lexer.STRING, (sq_str + dq_str + heredoc + + cmd_str) * S('f')^-1) + + token(lexer.REGEX, regex_str)) + +-- Numbers. +local dec = lexer.digit^1 * ('_' * lexer.digit^1)^0 * S('ri')^-1 +local bin = '0b' * S('01')^1 * ('_' * S('01')^1)^0 +local integer = S('+-')^-1 * (bin + lexer.hex_num + lexer.oct_num + dec) +-- TODO: meta, control, etc. for numeric_literal. +local numeric_literal = '?' * (lexer.any - lexer.space) * -word_char +lex:add_rule('number', token(lexer.NUMBER, lexer.float * S('ri')^-1 + integer + + numeric_literal)) + +-- Variables. +local global_var = '$' * (word + S('!@L+`\'=~/\\,.;<>_*"$?:') + lexer.digit + + '-' * S('0FadiIKlpvw')) +local class_var = '@@' * word +local inst_var = '@' * word +lex:add_rule('variable', token(lexer.VARIABLE, global_var + class_var + + inst_var)) + +-- Symbols. +lex:add_rule('symbol', token('symbol', ':' * P(function(input, index) + if input:sub(index - 2, index - 2) ~= ':' then return index end +end) * (word_char^1 + sq_str + dq_str))) +lex:add_style('symbol', lexer.STYLE_CONSTANT) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~'))) + +-- Fold points. +local function disambiguate(text, pos, line, s) + return line:sub(1, s - 1):match('^%s*$') and + not text:sub(1, pos - 1):match('\\[ \t]*\r?\n$') and 1 or 0 +end +lex:add_fold_point(lexer.KEYWORD, 'begin', 'end') +lex:add_fold_point(lexer.KEYWORD, 'case', 'end') +lex:add_fold_point(lexer.KEYWORD, 'class', 'end') +lex:add_fold_point(lexer.KEYWORD, 'def', 'end') +lex:add_fold_point(lexer.KEYWORD, 'do', 'end') +lex:add_fold_point(lexer.KEYWORD, 'for', 'end') +lex:add_fold_point(lexer.KEYWORD, 'module', 'end') +lex:add_fold_point(lexer.KEYWORD, 'if', disambiguate) +lex:add_fold_point(lexer.KEYWORD, 'while', disambiguate) +lex:add_fold_point(lexer.KEYWORD, 'unless', disambiguate) +lex:add_fold_point(lexer.KEYWORD, 'until', disambiguate) +lex:add_fold_point(lexer.OPERATOR, '(', ')') +lex:add_fold_point(lexer.OPERATOR, '[', ']') +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.OPERATOR, '#', lexer.fold_line_comments('#')) + +return lex diff --git a/lexlua/csharp.lua b/lexlua/csharp.lua new file mode 100644 index 000000000..4263c6672 --- /dev/null +++ b/lexlua/csharp.lua @@ -0,0 +1,68 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- C# LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('csharp') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + class delegate enum event interface namespace struct using abstract const + explicit extern fixed implicit internal lock out override params partial + private protected public ref sealed static readonly unsafe virtual volatile + add as assembly base break case catch checked continue default do else finally + for foreach get goto if in is new remove return set sizeof stackalloc super + switch this throw try typeof unchecked value void while yield + null true false +]])) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match[[ + bool byte char decimal double float int long object operator sbyte short + string uint ulong ushort +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Comments. +local line_comment = '//' * lexer.nonnewline_esc^0 +local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Strings. +local sq_str = lexer.delimited_range("'", true) +local dq_str = lexer.delimited_range('"', true) +local ml_str = P('@')^-1 * lexer.delimited_range('"', false, true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + ml_str)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) * + S('lLdDfFMm')^-1)) + +-- Preprocessor. +local preproc_word = word_match[[ + define elif else endif error if line undef warning region endregion +]] +lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, lexer.starts_line('#') * + S('\t ')^0 * + preproc_word)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('~!.,:;+-*/<>=\\^|&%?()[]{}'))) + +-- Fold points. +lex:add_fold_point(lexer.PREPROCESSOR, 'if', 'endif') +lex:add_fold_point(lexer.PREPROCESSOR, 'ifdef', 'endif') +lex:add_fold_point(lexer.PREPROCESSOR, 'ifndef', 'endif') +lex:add_fold_point(lexer.PREPROCESSOR, 'region', 'endregion') +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, '//', lexer.fold_line_comments('//')) + +return lex diff --git a/lexlua/css.lua b/lexlua/css.lua new file mode 100644 index 000000000..7b3230287 --- /dev/null +++ b/lexlua/css.lua @@ -0,0 +1,165 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- CSS LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V + +local lex = lexer.new('css') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Properties. +lex:add_rule('property', token('property', word_match[[ + -- CSS 1. + color background-color background-image background-repeat + background-attachment background-position background font-family font-style + font-variant font-weight font-size font word-spacing letter-spacing + text-decoration vertical-align text-transform text-align text-indent + line-height margin-top margin-right margin-bottom margin-left margin + padding-top padding-right padding-bottom padding-left padding border-top-width + border-right-width border-bottom-width border-left-width border-width + border-top border-right border-bottom border-left border border-color + border-style width height float clear display white-space list-style-type + list-style-image list-style-position list-style + -- CSS 2. + border-top-color border-right-color border-bottom-color border-left-color + border-color border-top-style border-right-style border-bottom-style + border-left-style border-style top right bottom left position z-index + direction unicode-bidi min-width max-width min-height max-height overflow clip + visibility content quotes counter-reset counter-increment marker-offset size + marks page-break-before page-break-after page-break-inside page orphans widows + font-stretch font-size-adjust unicode-range units-per-em src panose-1 stemv + stemh slope cap-height x-height ascent descent widths bbox definition-src + baseline centerline mathline topline text-shadow caption-side table-layout + border-collapse border-spacing empty-cells speak-header cursor outline + outline-width outline-style outline-color volume speak pause-before + pause-after pause cue-before cue-after cue play-during azimuth elevation + speech-rate voice-family pitch pitch-range stress richness speak-punctuation + speak-numeral +]])) +lex:add_style('property', lexer.STYLE_KEYWORD) + +-- Values. +lex:add_rule('value', token('value', word_match[[ + -- CSS 1. + auto none normal italic oblique small-caps bold bolder lighter xx-small + x-small small medium large x-large xx-large larger smaller transparent repeat + repeat-x repeat-y no-repeat scroll fixed top bottom left center right justify + both underline overline line-through blink baseline sub super text-top middle + text-bottom capitalize uppercase lowercase thin medium thick dotted dashed + solid double groove ridge inset outset block inline list-item pre no-wrap + inside outside disc circle square decimal lower-roman upper-roman lower-alpha + upper-alpha aqua black blue fuchsia gray green lime maroon navy olive purple + red silver teal white yellow + -- CSS 2. + inherit run-in compact marker table inline-table table-row-group + table-header-group table-footer-group table-row table-column-group + table-column table-cell table-caption static relative absolute fixed ltr rtl + embed bidi-override visible hidden scroll collapse open-quote close-quote + no-open-quote no-close-quote decimal-leading-zero lower-greek lower-latin + upper-latin hebrew armenian georgian cjk-ideographic hiragana katakana + hiragana-iroha katakana-iroha landscape portrait crop cross always avoid wider + narrower ultra-condensed extra-condensed condensed semi-condensed + semi-expanded expanded extra-expanded ultra-expanded caption icon menu + message-box small-caption status-bar separate show hide once crosshair default + pointer move text wait help e-resize ne-resize nw-resize n-resize se-resize + sw-resize s-resize w-resize ActiveBorder ActiveCaption AppWorkspace Background + ButtonFace ButtonHighlight ButtonShadow InactiveCaptionText ButtonText + CaptionText GrayText Highlight HighlightText InactiveBorder InactiveCaption + InfoBackground InfoText Menu MenuText Scrollbar ThreeDDarkShadow ThreeDFace + ThreeDHighlight ThreeDLightShadow ThreeDShadow Window WindowFrame WindowText + silent x-soft soft medium loud x-loud spell-out mix left-side far-left + center-left center-right far-right right-side behind leftwards rightwards + below level above higher lower x-slow slow medium fast x-fast faster slower + male female child x-low low high x-high code digits continous +]])) +lex:add_style('value', lexer.STYLE_CONSTANT) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match[[ + attr blackness blend blenda blur brightness calc circle color-mod contrast + counter cubic-bezier device-cmyk drop-shadow ellipse gray grayscale hsl hsla + hue hue-rotate hwb image inset invert lightness linear-gradient matrix + matrix3d opacity perspective polygon radial-gradient rect + repeating-linear-gradient repeating-radial-gradient rgb rgba rotate rotate3d + rotateX rotateY rotateZ saturate saturation scale scale3d scaleX scaleY scaleZ + sepia shade skewX skewY steps tint toggle translate translate3d translateX + translateY translateZ url whiteness +]])) + +-- Colors. +local xdigit = lexer.xdigit +lex:add_rule('color', token('color', word_match[[ + aliceblue antiquewhite aqua aquamarine azure beige bisque black + blanchedalmond blue blueviolet brown burlywood cadetblue chartreuse chocolate + coral cornflowerblue cornsilk crimson cyan darkblue darkcyan darkgoldenrod + darkgray darkgreen darkgrey darkkhaki darkmagenta darkolivegreen darkorange + darkorchid darkred darksalmon darkseagreen darkslateblue darkslategray + darkslategrey darkturquoise darkviolet deeppink deepskyblue dimgray dimgrey + dodgerblue firebrick floralwhite forestgreen fuchsia gainsboro ghostwhite gold + goldenrod gray green greenyellow grey honeydew hotpink indianred indigo ivory + khaki lavender lavenderblush lawngreen lemonchiffon lightblue lightcoral + lightcyan lightgoldenrodyellow lightgray lightgreen lightgrey lightpink + lightsalmon lightseagreen lightskyblue lightslategray lightslategrey + lightsteelblue lightyellow lime limegreen linen magenta maroon + mediumaquamarine mediumblue mediumorchid mediumpurple mediumseagreen + mediumslateblue mediumspringgreen mediumturquoise mediumvioletred + midnightblue mintcream mistyrose moccasin navajowhite navy oldlace olive + olivedrab orange orangered orchid palegoldenrod palegreen paleturquoise + palevioletred papayawhip peachpuff peru pink plum powderblue purple + rebeccapurple red rosybrown royalblue saddlebrown salmon sandybrown seagreen + seashell sienna silver skyblue slateblue slategray slategrey snow springgreen + steelblue tan teal thistle tomato transparent turquoise violet wheat white + whitesmoke yellow yellowgreen +]] + '#' * xdigit * xdigit * xdigit * (xdigit * xdigit * xdigit)^-1)) +lex:add_style('color', lexer.STYLE_NUMBER) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.alpha * + (lexer.alnum + S('_-'))^0)) + +-- Pseudo classes and pseudo elements. +lex:add_rule('pseudoclass', ':' * token('pseudoclass', word_match[[ + active checked disabled empty enabled first-child first-of-type focus hover + in-range invalid lang last-child last-of-type link not nth-child + nth-last-child nth-last-of-type nth-of-type only-of-type only-child optional + out-of-range read-only read-write required root target valid visited +]])) +lex:add_style('pseudoclass', lexer.STYLE_CONSTANT) +lex:add_rule('pseudoelement', '::' * token('pseudoelement', word_match[[ + after before first-letter first-line selection +]])) +lex:add_style('pseudoelement', lexer.STYLE_CONSTANT) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") + + lexer.delimited_range('"'))) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '/*' * (lexer.any - '*/')^0 * + P('*/')^-1)) + +-- Numbers. +local unit = token('unit', word_match[[ + ch cm deg dpcm dpi dppx em ex grad Hz in kHz mm ms pc pt px q rad rem s turn + vh vmax vmin vw +]]) +lex:add_style('unit', lexer.STYLE_NUMBER) +lex:add_rule('number', token(lexer.NUMBER, lexer.digit^1) * unit^-1) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('~!#*>+=|.,:;()[]{}'))) + +-- At rule. +lex:add_rule('at_rule', token('at_rule', P('@') * word_match[[ + charset font-face media page import namespace +]])) +lex:add_style('at_rule', lexer.STYLE_PREPROCESSOR) + +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') + +return lex diff --git a/lexlua/cuda.lua b/lexlua/cuda.lua new file mode 100644 index 000000000..950392057 --- /dev/null +++ b/lexlua/cuda.lua @@ -0,0 +1,71 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- CUDA LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('cuda', {inherit = lexer.load('cpp')}) + +-- Whitespace +lex:modify_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:modify_rule('keyword', token(lexer.KEYWORD, word_match[[ + __global__ __host__ __device__ __constant__ __shared__ +]]) + lex:get_rule('keyword')) + +-- Types. +lex:modify_rule('type', token(lexer.TYPE, word_match[[ + uint int1 uint1 int2 uint2 int3 uint3 int4 uint4 float1 float2 float3 float4 + char1 char2 char3 char4 uchar1 uchar2 uchar3 uchar4 short1 short2 short3 + short4 dim1 dim2 dim3 dim4 +]]) + lex:get_rule('type') + + +-- Functions. +token(lexer.FUNCTION, word_match[[ + -- Atom. + atomicAdd atomicAnd atomicCAS atomicDec atomicExch atomicInc atomicMax + atomicMin atomicOr atomicSub atomicXor + -- Dev. + tex1D tex1Dfetch tex2D __float_as_int __int_as_float __float2int_rn + __float2int_rz __float2int_ru __float2int_rd __float2uint_rn __float2uint_rz + __float2uint_ru __float2uint_rd __int2float_rn __int2float_rz __int2float_ru + __int2float_rd __uint2float_rn __uint2float_rz __uint2float_ru __uint2float_rd + __fadd_rz __fmul_rz __fdividef __mul24 __umul24 __mulhi __umulhi __mul64hi + __umul64hi min umin fminf fmin max umax fmaxf fmax abs fabsf fabs sqrtf sqrt + sinf __sinf sin cosf __cosf cos sincosf __sincosf expf __expf exp logf __logf + log + -- Runtime. + cudaBindTexture cudaBindTextureToArray cudaChooseDevice cudaConfigureCall + cudaCreateChannelDesc cudaD3D10GetDevice cudaD3D10MapResources + cudaD3D10RegisterResource cudaD3D10ResourceGetMappedArray + cudaD3D10ResourceGetMappedPitch cudaD3D10ResourceGetMappedPointer + cudaD3D10ResourceGetMappedSize cudaD3D10ResourceGetSurfaceDimensions + cudaD3D10ResourceSetMapFlags cudaD3D10SetDirect3DDevice + cudaD3D10UnmapResources cudaD3D10UnregisterResource cudaD3D9GetDevice + cudaD3D9GetDirect3DDevice cudaD3D9MapResources cudaD3D9RegisterResource + cudaD3D9ResourceGetMappedArray cudaD3D9ResourceGetMappedPitch + cudaD3D9ResourceGetMappedPointer cudaD3D9ResourceGetMappedSize + cudaD3D9ResourceGetSurfaceDimensions cudaD3D9ResourceSetMapFlags + cudaD3D9SetDirect3DDevice cudaD3D9UnmapResources cudaD3D9UnregisterResource + cudaEventCreate cudaEventDestroy cudaEventElapsedTime cudaEventQuery + cudaEventRecord cudaEventSynchronize cudaFree cudaFreeArray cudaFreeHost + cudaGetChannelDesc cudaGetDevice cudaGetDeviceCount cudaGetDeviceProperties + cudaGetErrorString cudaGetLastError cudaGetSymbolAddress cudaGetSymbolSize + cudaGetTextureAlignmentOffset cudaGetTextureReference cudaGLMapBufferObject + cudaGLRegisterBufferObject cudaGLSetGLDevice cudaGLUnmapBufferObject + cudaGLUnregisterBufferObject cudaLaunch cudaMalloc cudaMalloc3D + cudaMalloc3DArray cudaMallocArray cudaMallocHost cudaMallocPitch cudaMemcpy + cudaMemcpy2D cudaMemcpy2DArrayToArray cudaMemcpy2DFromArray + cudaMemcpy2DToArray cudaMemcpy3D cudaMemcpyArrayToArray cudaMemcpyFromArray + cudaMemcpyFromSymbol cudaMemcpyToArray cudaMemcpyToSymbol cudaMemset + cudaMemset2D cudaMemset3D cudaSetDevice cudaSetupArgument cudaStreamCreate + cudaStreamDestroy cudaStreamQuery cudaStreamSynchronize cudaThreadExit + cudaThreadSynchronize cudaUnbindTexture +]]) + + +-- Variables. +token(lexer.VARIABLE, word_match[[gridDim blockIdx blockDim threadIdx]])) + +return lex diff --git a/lexlua/dart.lua b/lexlua/dart.lua new file mode 100644 index 000000000..4a2c43b57 --- /dev/null +++ b/lexlua/dart.lua @@ -0,0 +1,57 @@ +-- Copyright 2013-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Dart LPeg lexer. +-- Written by Brian Schott (@Hackerpilot on Github). + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('dart') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + assert break case catch class const continue default do else enum extends + false final finally for if in is new null rethrow return super switch this + throw true try var void while with +]])) + +-- Built-ins. +lex:add_rule('builtin', token(lexer.CONSTANT, word_match[[ + abstract as dynamic export external factory get implements import library + operator part set static typedef +]])) + +-- Strings. +local sq_str = S('r')^-1 * lexer.delimited_range("'", true) +local dq_str = S('r')^-1 * lexer.delimited_range('"', true) +local sq_str_multiline = S('r')^-1 * "'''" * (lexer.any - "'''")^0 * P("'''")^-1 +local dq_str_multiline = S('r')^-1 * '"""' * (lexer.any - '"""')^0 * P('"""')^-1 +lex:add_rule('string', token(lexer.STRING, sq_str_multiline + dq_str_multiline + + sq_str + dq_str)) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '//' * lexer.nonnewline_esc^0 + + lexer.nested_pair('/*', '*/'))) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.hex_num)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('#?=!<>+-*$/%&|^~.,;()[]{}'))) + +-- Annotations. +lex:add_rule('annotation', token('annotation', '@' * lexer.word^1)) +lex:add_style('annotation', lexer.STYLE_PREPROCESSOR) + +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, '//', lexer.fold_line_comments('//')) + +return lex diff --git a/lexlua/desktop.lua b/lexlua/desktop.lua new file mode 100644 index 000000000..1c78e3278 --- /dev/null +++ b/lexlua/desktop.lua @@ -0,0 +1,56 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Desktop Entry LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('desktop') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keys. +lex:add_rule('key', token('key', word_match[[ + Type Version Name GenericName NoDisplay Comment Icon Hidden OnlyShowIn + NotShowIn TryExec Exec Exec Path Terminal MimeType Categories StartupNotify + StartupWMClass URL +]])) +lex:add_style('key', lexer.STYLE_KEYWORD) + +-- Values. +lex:add_rule('value', token('value', word_match[[true false]])) +lex:add_style('value', lexer.STYLE_CONSTANT) + +-- Identifiers. +lex:add_rule('identifier', lexer.token(lexer.IDENTIFIER, + lexer.alpha * (lexer.alnum + S('_-'))^0)) + +-- Group headers. +lex:add_rule('header', + lexer.starts_line(token('header', + lexer.delimited_range('[]', false, true)))) +lex:add_style('header', lexer.STYLE_LABEL) + +-- Locales. +lex:add_rule('locale', token('locale', + lexer.delimited_range('[]', false, true))) +lex:add_style('locale', lexer.STYLE_CLASS) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"'))) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer))) + +-- Field codes. +lex:add_rule('code', lexer.token('code', P('%') * S('fFuUdDnNickvm'))) +lex:add_style('code', lexer.STYLE_VARIABLE) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('='))) + +return lex diff --git a/lexlua/diff.lua b/lexlua/diff.lua new file mode 100644 index 000000000..a0c62d214 --- /dev/null +++ b/lexlua/diff.lua @@ -0,0 +1,32 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Diff LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('diff', {lex_by_line = true}) + +-- Text, separators, and file headers. +lex:add_rule('index', token(lexer.COMMENT, 'Index: ' * lexer.any^0 * -1)) +lex:add_rule('separator', token(lexer.COMMENT, ('---' + P('*')^4 + P('=')^1) * + lexer.space^0 * -1)) +lex:add_rule('header', token('header', (P('*** ') + '--- ' + '+++ ') * + lexer.any^1)) +lex:add_style('header', lexer.STYLE_COMMENT) + +-- Location. +lex:add_rule('location', token(lexer.NUMBER, ('@@' + lexer.digit^1 + '****') * + lexer.any^1)) + +-- Additions, deletions, and changes. +lex:add_rule('addition', token('addition', S('>+') * lexer.any^0)) +lex:add_style('addition', 'fore:$(color.green)') +lex:add_rule('deletion', token('deletion', S('<-') * lexer.any^0)) +lex:add_style('deletion', 'fore:$(color.red)') +lex:add_rule('change', token('change', '!' * lexer.any^0)) +lex:add_style('change', 'fore:$(color.yellow)') + +lex:add_rule('any_line', token(lexer.DEFAULT, lexer.any^1)) + +return lex diff --git a/lexlua/django.lua b/lexlua/django.lua new file mode 100644 index 000000000..88406e1f8 --- /dev/null +++ b/lexlua/django.lua @@ -0,0 +1,55 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Django LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('django') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + as block blocktrans by endblock endblocktrans comment endcomment cycle date + debug else extends filter endfilter firstof for endfor if endif ifchanged + endifchanged ifnotequal endifnotequal in load not now or parsed regroup ssi + trans with widthratio +]])) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match[[ + add addslashes capfirst center cut date default dictsort dictsortreversed + divisibleby escape filesizeformat first fix_ampersands floatformat get_digit + join length length_is linebreaks linebreaksbr linenumbers ljust lower + make_list phone2numeric pluralize pprint random removetags rjust slice slugify + stringformat striptags time timesince title truncatewords unordered_list upper + urlencode urlize urlizetrunc wordcount wordwrap yesno +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, + lexer.delimited_range('"', false, true))) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S(':,.|'))) + +-- Embed Django in HTML. +local html = lexer.load('html') +local html_comment = '<!--' * (lexer.any - '-->')^0 * P('-->')^-1 +local django_comment = '{#' * (lexer.any - lexer.newline - '#}')^0 * P('#}')^-1 +html:modify_rule('comment', token(lexer.COMMENT, html_comment + django_comment)) +local django_start_rule = token('django_tag', '{' * S('{%')) +local django_end_rule = token('django_tag', S('%}') * '}') +html:embed(lex, django_start_rule, django_end_rule) +lex:add_style('django_tag', lexer.STYLE_EMBEDDED) + +-- Fold points. +lex:add_fold_point('django_tag', '{{', '}}') +lex:add_fold_point('django_tag', '{%', '%}') + +return lex diff --git a/lexlua/dmd.lua b/lexlua/dmd.lua new file mode 100644 index 000000000..f294043d7 --- /dev/null +++ b/lexlua/dmd.lua @@ -0,0 +1,178 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- D LPeg lexer. +-- Heavily modified by Brian Schott (@Hackerpilot on Github). + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'dmd'} + +-- Whitespace. +local ws = token(lexer.WHITESPACE, lexer.space^1) + +-- Comments. +local line_comment = '//' * lexer.nonnewline_esc^0 +local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +local nested_comment = lexer.nested_pair('/+', '+/') +local comment = token(lexer.COMMENT, line_comment + block_comment + + nested_comment) + +-- Strings. +local sq_str = lexer.delimited_range("'", true) * S('cwd')^-1 +local dq_str = lexer.delimited_range('"') * S('cwd')^-1 +local lit_str = 'r' * lexer.delimited_range('"', false, true) * S('cwd')^-1 +local bt_str = lexer.delimited_range('`', false, true) * S('cwd')^-1 +local hex_str = 'x' * lexer.delimited_range('"') * S('cwd')^-1 +local other_hex_str = '\\x' * (lexer.xdigit * lexer.xdigit)^1 +local del_str = lexer.nested_pair('q"[', ']"') * S('cwd')^-1 + + lexer.nested_pair('q"(', ')"') * S('cwd')^-1 + + lexer.nested_pair('q"{', '}"') * S('cwd')^-1 + + lexer.nested_pair('q"<', '>"') * S('cwd')^-1 + + P('q') * lexer.nested_pair('{', '}') * S('cwd')^-1 +local string = token(lexer.STRING, del_str + sq_str + dq_str + lit_str + + bt_str + hex_str + other_hex_str) + +-- Numbers. +local dec = lexer.digit^1 * ('_' * lexer.digit^1)^0 +local hex_num = lexer.hex_num * ('_' * lexer.xdigit^1)^0 +local bin_num = '0' * S('bB') * S('01_')^1 +local oct_num = '0' * S('01234567_')^1 +local integer = S('+-')^-1 * (hex_num + oct_num + bin_num + dec) +local number = token(lexer.NUMBER, (lexer.float + integer) * S('uUlLdDfFi')^-1) + +-- Keywords. +local keyword = token(lexer.KEYWORD, word_match{ + 'abstract', 'align', 'asm', 'assert', 'auto', 'body', 'break', 'case', 'cast', + 'catch', 'const', 'continue', 'debug', 'default', 'delete', + 'deprecated', 'do', 'else', 'extern', 'export', 'false', 'final', 'finally', + 'for', 'foreach', 'foreach_reverse', 'goto', 'if', 'import', 'immutable', + 'in', 'inout', 'invariant', 'is', 'lazy', 'macro', 'mixin', 'new', 'nothrow', + 'null', 'out', 'override', 'pragma', 'private', 'protected', 'public', 'pure', + 'ref', 'return', 'scope', 'shared', 'static', 'super', 'switch', + 'synchronized', 'this', 'throw','true', 'try', 'typeid', 'typeof', 'unittest', + 'version', 'virtual', 'volatile', 'while', 'with', '__gshared', '__thread', + '__traits', '__vector', '__parameters' +}) + +-- Types. +local type = token(lexer.TYPE, word_match{ + 'alias', 'bool', 'byte', 'cdouble', 'cent', 'cfloat', 'char', 'class', + 'creal', 'dchar', 'delegate', 'double', 'enum', 'float', 'function', + 'idouble', 'ifloat', 'int', 'interface', 'ireal', 'long', 'module', 'package', + 'ptrdiff_t', 'real', 'short', 'size_t', 'struct', 'template', 'typedef', + 'ubyte', 'ucent', 'uint', 'ulong', 'union', 'ushort', 'void', 'wchar', + 'string', 'wstring', 'dstring', 'hash_t', 'equals_t' +}) + +-- Constants. +local constant = token(lexer.CONSTANT, word_match{ + '__FILE__', '__LINE__', '__DATE__', '__EOF__', '__TIME__', '__TIMESTAMP__', + '__VENDOR__', '__VERSION__', '__FUNCTION__', '__PRETTY_FUNCTION__', + '__MODULE__', +}) + +local class_sequence = token(lexer.TYPE, P('class') + P('struct')) * ws^1 * + token(lexer.CLASS, lexer.word) + +-- Identifiers. +local identifier = token(lexer.IDENTIFIER, lexer.word) + +-- Operators. +local operator = token(lexer.OPERATOR, S('?=!<>+-*$/%&|^~.,;()[]{}')) + +-- Properties. +local properties = (type + identifier + operator) * token(lexer.OPERATOR, '.') * + token(lexer.VARIABLE, word_match{ + 'alignof', 'dig', 'dup', 'epsilon', 'idup', 'im', 'init', 'infinity', + 'keys', 'length', 'mangleof', 'mant_dig', 'max', 'max_10_exp', 'max_exp', + 'min', 'min_normal', 'min_10_exp', 'min_exp', 'nan', 'offsetof', 'ptr', + 're', 'rehash', 'reverse', 'sizeof', 'sort', 'stringof', 'tupleof', + 'values' + }) + +-- Preprocs. +local annotation = token('annotation', '@' * lexer.word^1) +local preproc = token(lexer.PREPROCESSOR, '#' * lexer.nonnewline^0) + +-- Traits. +local traits_list = token('traits', word_match{ + 'allMembers', 'classInstanceSize', 'compiles', 'derivedMembers', + 'getAttributes', 'getMember', 'getOverloads', 'getProtection', 'getUnitTests', + 'getVirtualFunctions', 'getVirtualIndex', 'getVirtualMethods', 'hasMember', + 'identifier', 'isAbstractClass', 'isAbstractFunction', 'isArithmetic', + 'isAssociativeArray', 'isFinalClass', 'isFinalFunction', 'isFloating', + 'isIntegral', 'isLazy', 'isNested', 'isOut', 'isOverrideFunction', 'isPOD', + 'isRef', 'isSame', 'isScalar', 'isStaticArray', 'isStaticFunction', + 'isUnsigned', 'isVirtualFunction', 'isVirtualMethod', 'parent' +}) + +local scopes_list = token('scopes', word_match{'exit', 'success', 'failure'}) + +-- versions +local versions_list = token('versions', word_match{ + 'AArch64', 'AIX', 'all', 'Alpha', 'Alpha_HardFloat', 'Alpha_SoftFloat', + 'Android', 'ARM', 'ARM_HardFloat', 'ARM_SoftFloat', 'ARM_SoftFP', 'ARM_Thumb', + 'assert', 'BigEndian', 'BSD', 'Cygwin', 'D_Coverage', 'D_Ddoc', 'D_HardFloat', + 'DigitalMars', 'D_InlineAsm_X86', 'D_InlineAsm_X86_64', 'D_LP64', + 'D_NoBoundsChecks', 'D_PIC', 'DragonFlyBSD', 'D_SIMD', 'D_SoftFloat', + 'D_Version2', 'D_X32', 'FreeBSD', 'GNU', 'Haiku', 'HPPA', 'HPPA64', 'Hurd', + 'IA64', 'LDC', 'linux', 'LittleEndian', 'MIPS32', 'MIPS64', 'MIPS_EABI', + 'MIPS_HardFloat', 'MIPS_N32', 'MIPS_N64', 'MIPS_O32', 'MIPS_O64', + 'MIPS_SoftFloat', 'NetBSD', 'none', 'OpenBSD', 'OSX', 'Posix', 'PPC', 'PPC64', + 'PPC_HardFloat', 'PPC_SoftFloat', 'S390', 'S390X', 'SDC', 'SH', 'SH64', + 'SkyOS', 'Solaris', 'SPARC', 'SPARC64', 'SPARC_HardFloat', 'SPARC_SoftFloat', + 'SPARC_V8Plus', 'SysV3', 'SysV4', 'unittest', 'Win32', 'Win64', 'Windows', + 'X86', 'X86_64' +}) + +local versions = token(lexer.KEYWORD, 'version') * lexer.space^0 * + token(lexer.OPERATOR, '(') * lexer.space^0 * versions_list + +local scopes = token(lexer.KEYWORD, 'scope') * lexer.space^0 * + token(lexer.OPERATOR, '(') * lexer.space^0 * scopes_list + +local traits = token(lexer.KEYWORD, '__traits') * lexer.space^0 * + token(lexer.OPERATOR, '(') * lexer.space^0 * traits_list + +local func = token(lexer.FUNCTION, lexer.word) * + #(lexer.space^0 * (P('!') * lexer.word^-1 * lexer.space^-1)^-1 * + P('(')) + +M._rules = { + {'whitespace', ws}, + {'class', class_sequence}, + {'traits', traits}, + {'versions', versions}, + {'scopes', scopes}, + {'keyword', keyword}, + {'variable', properties}, + {'type', type}, + {'function', func}, + {'constant', constant}, + {'string', string}, + {'identifier', identifier}, + {'comment', comment}, + {'number', number}, + {'preproc', preproc}, + {'operator', operator}, + {'annotation', annotation}, +} + +M._tokenstyles = { + annotation = lexer.STYLE_PREPROCESSOR, + traits = 'fore:$(color.yellow)', + versions = lexer.STYLE_CONSTANT, + scopes = lexer.STYLE_CONSTANT +} + +M._foldsymbols = { + _patterns = {'[{}]', '/[*+]', '[*+]/', '//'}, + [lexer.OPERATOR] = {['{'] = 1, ['}'] = -1}, + [lexer.COMMENT] = { + ['/*'] = 1, ['*/'] = -1, ['/+'] = 1, ['+/'] = -1, + ['//'] = lexer.fold_line_comments('//') + } +} + +return M diff --git a/lexlua/dockerfile.lua b/lexlua/dockerfile.lua new file mode 100644 index 000000000..bec2c4341 --- /dev/null +++ b/lexlua/dockerfile.lua @@ -0,0 +1,41 @@ +-- Copyright 2016-2018 Alejandro Baez (https://keybase.io/baez). See License.txt. +-- Dockerfile LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('dockerfile', {fold_by_indentation = true}) + +-- Whitespace +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + ADD ARG CMD COPY ENTRYPOINT ENV EXPOSE FROM LABEL MAINTAINER ONBUILD RUN + STOPSIGNAL USER VOLUME WORKDIR +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Variable. +lex:add_rule('variable', token(lexer.VARIABLE, + S('$')^1 * (S('{')^1 * lexer.word * S('}')^1 + + lexer.word))) + +-- Strings. +local sq_str = lexer.delimited_range("'", false, true) +local dq_str = lexer.delimited_range('"') +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('\\[],=:{}'))) + +return lex diff --git a/lexlua/dot.lua b/lexlua/dot.lua new file mode 100644 index 000000000..54d55a458 --- /dev/null +++ b/lexlua/dot.lua @@ -0,0 +1,54 @@ +-- Copyright 2006-2018 Brian "Sir Alaran" Schott. See License.txt. +-- Dot LPeg lexer. +-- Based off of lexer code by Mitchell. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('dot') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + graph node edge digraph fontsize rankdir fontname shape label arrowhead + arrowtail arrowsize color comment constraint decorate dir headlabel headport + headURL labelangle labeldistance labelfloat labelfontcolor labelfontname + labelfontsize layer lhead ltail minlen samehead sametail style taillabel + tailport tailURL weight subgraph +]])) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match[[ + box polygon ellipse circle point egg triangle plaintext diamond trapezium + parallelogram house pentagon hexagon septagon octagon doublecircle + doubleoctagon tripleoctagon invtriangle invtrapezium invhouse Mdiamond Msquare + Mcircle rect rectangle none note tab folder box3d record +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") + + lexer.delimited_range('"'))) + +-- Comments. +local line_comment = '//' * lexer.nonnewline_esc^0 +local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.digit^1 + lexer.float)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('->()[]{};'))) + +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, '//', lexer.fold_line_comments('//')) + +return lex diff --git a/lexlua/eiffel.lua b/lexlua/eiffel.lua new file mode 100644 index 000000000..64ccd8bf3 --- /dev/null +++ b/lexlua/eiffel.lua @@ -0,0 +1,60 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Eiffel LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('eiffel') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + alias all and as check class creation debug deferred do else elseif end ensure + expanded export external feature from frozen if implies indexing infix inherit + inspect invariant is like local loop not obsolete old once or prefix redefine + rename require rescue retry select separate then undefine until variant when + xor + current false precursor result strip true unique void +]])) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match[[ + character string bit boolean integer real none any +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + + lexer.delimited_range('"', true))) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '--' * lexer.nonnewline^0)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/*%&|^~.,:;?()[]{}'))) + +-- Fold points. +lex:add_fold_point(lexer.KEYWORD, 'check', 'end') +lex:add_fold_point(lexer.KEYWORD, 'debug', 'end') +lex:add_fold_point(lexer.KEYWORD, 'deferred', function(text, pos, line, s) + return line:find('deferred%s+class') and 0 or 1 +end) +lex:add_fold_point(lexer.KEYWORD, 'do', 'end') +lex:add_fold_point(lexer.KEYWORD, 'from', 'end') +lex:add_fold_point(lexer.KEYWORD, 'if', 'end') +lex:add_fold_point(lexer.KEYWORD, 'inspect', 'end') +lex:add_fold_point(lexer.KEYWORD, 'once', 'end') +lex:add_fold_point(lexer.KEYWORD, 'class', function(text, pos, line, s) + return line:find('deferred%s+class') and 0 or 1 +end) +lex:add_fold_point(lexer.COMMENT, '--', lexer.fold_line_comments('--')) + +return lex diff --git a/lexlua/elixir.lua b/lexlua/elixir.lua new file mode 100644 index 000000000..56bf4ec40 --- /dev/null +++ b/lexlua/elixir.lua @@ -0,0 +1,107 @@ +-- Copyright 2015-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Contributed by Richard Philips. +-- Elixir LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local B, P, R, S = lpeg.B, lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('elixir', {fold_by_indentation = true}) + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Sigils. +local sigil11 = P("~") * S("CRSW") * lexer.delimited_range('<>', false, true) +local sigil12 = P("~") * S("CRSW") * lexer.delimited_range('{}', false, true) +local sigil13 = P("~") * S("CRSW") * lexer.delimited_range('[]', false, true) +local sigil14 = P("~") * S("CRSW") * lexer.delimited_range('()', false, true) +local sigil15 = P("~") * S("CRSW") * lexer.delimited_range('|', false, true) +local sigil16 = P("~") * S("CRSW") * lexer.delimited_range('/', false, true) +local sigil17 = P("~") * S("CRSW") * lexer.delimited_range('"', false, true) +local sigil18 = P("~") * S("CRSW") * lexer.delimited_range("'", false, true) +local sigil19 = P("~") * S("CRSW") * '"""' * (lexer.any - '"""')^0 * P('"""')^-1 +local sigil10 = P("~") * S("CRSW") * "'''" * (lexer.any - "'''")^0 * P("'''")^-1 +local sigil21 = P("~") * S("crsw") * lexer.delimited_range('<>', false, false) +local sigil22 = P("~") * S("crsw") * lexer.delimited_range('{}', false, false) +local sigil23 = P("~") * S("crsw") * lexer.delimited_range('[]', false, false) +local sigil24 = P("~") * S("crsw") * lexer.delimited_range('()', false, false) +local sigil25 = P("~") * S("crsw") * lexer.delimited_range('|', false, false) +local sigil26 = P("~") * S("crsw") * lexer.delimited_range('/', false, false) +local sigil27 = P("~") * S("crsw") * lexer.delimited_range('"', false, false) +local sigil28 = P("~") * S("crsw") * lexer.delimited_range("'", false, false) +local sigil29 = P("~") * S("csrw") * '"""' * (lexer.any - '"""')^0 * P('"""')^-1 +local sigil20 = P("~") * S("csrw") * "'''" * (lexer.any - "'''")^0 * P("'''")^-1 +local sigil_token = token(lexer.REGEX, sigil10 + sigil19 + sigil11 + sigil12 + + sigil13 + sigil14 + sigil15 + sigil16 + + sigil17 + sigil18 + sigil20 + sigil29 + + sigil21 + sigil22 + sigil23 + sigil24 + + sigil25 + sigil26 + sigil27 + sigil28) +local sigiladdon_token = token(lexer.EMBEDDED, R('az', 'AZ')^0) +lex:add_rule('sigil', sigil_token * sigiladdon_token) + +-- Atoms. +local atom1 = B(1 - P(':')) * P(':') * lexer.delimited_range('"', false) +local atom2 = B(1 - P(':')) * P(':') * R('az', 'AZ') * + R('az', 'AZ', '__', '@@', '09')^0 * S('?!')^-1 +local atom3 = B(1 - R('az', 'AZ', '__', '09', '::')) * + R('AZ') * R('az', 'AZ', '__', '@@', '09')^0 * S('?!')^-1 +lex:add_rule('atom', token(lexer.CONSTANT, atom1 + atom2 + atom3)) + +-- Strings. +local dq_str = lexer.delimited_range('"', false) +local triple_dq_str = '"""' * (lexer.any - '"""')^0 * P('"""')^-1 +lex:add_rule('string', token(lexer.STRING, triple_dq_str + dq_str)) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline_esc^0)) + +-- Attributes. +lex:add_rule('attribute', token(lexer.LABEL, B(1 - R('az', 'AZ', '__')) * + P('@') * R('az','AZ') * + R('az','AZ','09','__')^0)) + +-- Booleans. +lex:add_rule('boolean', token(lexer.NUMBER, P(':')^-1 * + word_match[[true false nil]])) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match[[ + defstruct defrecordp defrecord defprotocol defp defoverridable defmodule + defmacrop defmacro defimpl defexception defdelegate defcallback def +]])) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + is_atom is_binary is_bitstring is_boolean is_float is_function is_integer + is_list is_map is_number is_pid is_port is_record is_reference is_tuple + is_exception case when cond for if unless try receive send exit raise throw + after rescue catch else do end quote unquote super import require alias use + self with fn +]])) + +-- Operators +local operator1 = word_match[[and or not when xor in]] +local operator2 = P('!==') + '!=' + '!' + '=~' + '===' + '==' + '=' + '<<<' + + '<<' + '<=' + '<-' + '<' + '>>>' + '>>' + '>=' + '>' + '->' + + '--' + '-' + '++' + '+' + '&&&' + '&&' + '&' + '|||' + '||' + + '|>' + '|' + '..' + '.' + '^^^' + '^' + '\\\\' + '::' + '*' + + '/' + '~~~' + '@' +lex:add_rule('operator', token(lexer.OPERATOR, operator1 + operator2)) + +-- Identifiers +lex:add_rule('identifier', token(lexer.IDENTIFIER, R('az', '__') * + R('az', 'AZ', '__', '09')^0 * + S('?!')^-1)) + +-- Numbers +local dec = lexer.digit * (lexer.digit + P("_"))^0 +local bin = '0b' * S('01')^1 +local oct = '0o' * R('07')^1 +local integer = bin + lexer.hex_num + oct + dec +local float = lexer.digit^1 * P(".") * lexer.digit^1 * S("eE") * + (S('+-')^-1 * lexer.digit^1)^-1 +lex:add_rule('number', B(1 - R('az', 'AZ', '__')) * S('+-')^-1 * + token(lexer.NUMBER, float + integer)) + +return lex diff --git a/lexlua/erlang.lua b/lexlua/erlang.lua new file mode 100644 index 000000000..973601ab2 --- /dev/null +++ b/lexlua/erlang.lua @@ -0,0 +1,89 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Erlang LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('erlang') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + after begin case catch cond end fun if let of query receive try when + -- Operators. + div rem or xor bor bxor bsl bsr and band not bnot badarg nocookie orelse + andalso false true +]])) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match[[ + abs alive apply atom_to_list binary_to_list binary_to_term concat_binary date + disconnect_node element erase exit float float_to_list get get_keys + group_leader halt hd integer_to_list is_alive is_record length link + list_to_atom list_to_binary list_to_float list_to_integer list_to_pid + list_to_tuple load_module make_ref monitor_node node nodes now open_port + pid_to_list process_flag process_info process put register registered round + self setelement size spawn spawn_link split_binary statistics term_to_binary + throw time tl trunc tuple_to_list unlink unregister whereis + -- Others. + any atom binary bitstring byte constant function integer list map mfa + non_neg_integer number pid ports port_close port_info pos_integer reference + record + -- Erlang. + check_process_code delete_module get_cookie hash math module_loaded preloaded + processes purge_module set_cookie set_node + -- Math. + acos asin atan atan2 cos cosh exp log log10 min max pi pow power sin sinh sqrt + tan tanh +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.lower * + ('_' + lexer.alnum)^0)) + +-- Variables. +lex:add_rule('variable', token(lexer.VARIABLE, P('_')^0 * lexer.upper * + ('_' + lexer.alnum)^0)) + +-- Directives. +lex:add_rule('directive', token('directive', '-' * word_match[[ + author behaviour behavior compile copyright define doc else endif export file + ifdef ifndef import include include_lib module record spec type undef +]])) +lex:add_style('directive', lexer.STYLE_PREPROCESSOR) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + + lexer.delimited_range('"') + + '$' * lexer.any * lexer.alnum^0)) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '%' * lexer.nonnewline^0)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('-<>.;=/|+*:,!()[]{}'))) + +-- Preprocessor. +lex:add_rule('preprocessor', token(lexer.TYPE, '?' * lexer.word)) + +-- Records. +lex:add_rule('type', token(lexer.TYPE, '#' * lexer.word)) + +-- Fold points. +lex:add_fold_point(lexer.KEYWORD, 'case', 'end') +lex:add_fold_point(lexer.KEYWORD, 'fun', 'end') +lex:add_fold_point(lexer.KEYWORD, 'if', 'end') +lex:add_fold_point(lexer.KEYWORD, 'query', 'end') +lex:add_fold_point(lexer.KEYWORD, 'receive', 'end') +lex:add_fold_point(lexer.OPERATOR, '(', ')') +lex:add_fold_point(lexer.OPERATOR, '[', ']') +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '%', lexer.fold_line_comments('%')) + +return lex diff --git a/lexlua/faust.lua b/lexlua/faust.lua new file mode 100644 index 000000000..2865a633c --- /dev/null +++ b/lexlua/faust.lua @@ -0,0 +1,47 @@ +-- Copyright 2015-2018 David B. Lamkins <david@lamkins.net>. See License.txt. +-- Faust LPeg lexer, see http://faust.grame.fr/ + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('faust') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + declare import mdoctags dependencies distributed inputs outputs par seq sum + prod xor with environment library component ffunction fvariable fconstant int + float case waveform h: v: t: +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"', true))) + +-- Comments. +local line_comment = '//' * lexer.nonnewline^0 +local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +local int = R('09')^1 +local rad = P('.') +local exp = (P('e') * S('+-')^-1 * int)^-1 +local flt = int * (rad * int)^-1 * exp + int^-1 * rad * int * exp +lex:add_rule('number', token(lexer.NUMBER, flt + int)) + +-- Pragmas. +lex:add_rule('pragma', token(lexer.PREPROCESSOR, P('<mdoc>') * + (lexer.any - P('</mdoc>'))^0 * + P('</mdoc>')^-1)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, + S('+-/*%<>~!=^&|?~:;,.()[]{}@#$`\\\''))) + +return lex diff --git a/lexlua/fish.lua b/lexlua/fish.lua new file mode 100644 index 000000000..5bc27879f --- /dev/null +++ b/lexlua/fish.lua @@ -0,0 +1,58 @@ +-- Copyright 2015-2018 Jason Schindler. See License.txt. +-- Fish (http://fishshell.com/) script LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('fish') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + alias and begin bg bind block break breakpoint builtin case cd command + commandline complete contains continue count dirh dirs echo else emit end eval + exec exit fg fish fish_config fishd fish_indent fish_pager fish_prompt + fish_right_prompt fish_update_completions for funced funcsave function + functions help history if in isatty jobs math mimedb nextd not open or popd + prevd psub pushd pwd random read return set set_color source status switch + test trap type ulimit umask vared while +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Variables. +lex:add_rule('variable', token(lexer.VARIABLE, + '$' * (lexer.word + + lexer.delimited_range('{}', true, true)))) + +-- Strings. +local sq_str = lexer.delimited_range("'", false, true) +local dq_str = lexer.delimited_range('"') +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) + +-- Shebang. +lex:add_rule('shebang', token('shebang', '#!/' * lexer.nonnewline^0)) +lex:add_style('shebang', lexer.STYLE_LABEL) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/*^&|~.,:;?()[]{}'))) + +-- Fold points. +lex:add_fold_point(lexer.KEYWORD, 'begin', 'end') +lex:add_fold_point(lexer.KEYWORD, 'for', 'end') +lex:add_fold_point(lexer.KEYWORD, 'function', 'end') +lex:add_fold_point(lexer.KEYWORD, 'if', 'end') +lex:add_fold_point(lexer.KEYWORD, 'switch', 'end') +lex:add_fold_point(lexer.KEYWORD, 'while', 'end') + +return lex diff --git a/lexlua/forth.lua b/lexlua/forth.lua new file mode 100644 index 000000000..086ce780c --- /dev/null +++ b/lexlua/forth.lua @@ -0,0 +1,56 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Forth LPeg lexer. +-- Contributions from Joseph Eib. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('forth') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Strings. +local c_str = 'c' * lexer.delimited_range('"', true, true) +local s_str = 's' * lexer.delimited_range('"', true, true) +local s_bs_str = 's\\' * lexer.delimited_range('"', true, false) +local dot_str = '.' * lexer.delimited_range('"', true, true) +local dot_paren_str = '.' * lexer.delimited_range('()', true, true, false) +local abort_str = 'abort' * lexer.delimited_range('"', true, true) +lex:add_rule('string', token(lexer.STRING, c_str + s_str + s_bs_str + dot_str + + dot_paren_str + abort_str)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + #> #s */ */mod +loop , . .r /mod 0< 0<> 0> 0= 1+ 1- 2! 2* 2/ 2>r 2@ 2drop 2dup + 2over 2r> 2r@ 2swap :noname <# <> >body >in >number >r ?do ?dup @ abort abs + accept action-of again align aligned allot and base begin bl buffer: c! c, c@ + case cell+ cells char char+ chars compile, constant, count cr create decimal + defer defer! defer@ depth do does> drop dup else emit endcase endof + environment? erase evaluate execute exit false fill find fm/mod here hex hold + holds i if immediate invert is j key leave literal loop lshift m* marker max + min mod move negate nip of or over pad parse parse-name pick postpone quit r> + r@ recurse refill restore-input roll rot rshift s>d save-input sign sm/rem + source source-id space spaces state swap to then true tuck type u. u.r u> u< + um* um/mod unloop until unused value variable while within word xor ['] [char] + [compile] +]], true)) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alnum + + S('+-*=<>.?/\'%,_$#'))^1)) + +-- Comments. +local line_comment = S('|\\') * lexer.nonnewline^0 +local block_comment = '(' * (lexer.any - ')')^0 * P(')')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, P('-')^-1 * lexer.digit^1 * + (S('./') * lexer.digit^1)^-1)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S(':;<>+*-/[]#'))) + +return lex diff --git a/lexlua/fortran.lua b/lexlua/fortran.lua new file mode 100644 index 000000000..98f2e7b38 --- /dev/null +++ b/lexlua/fortran.lua @@ -0,0 +1,72 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Fortran LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('fortran') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Comments. +local c_comment = lexer.starts_line(S('Cc')) * lexer.nonnewline^0 +local d_comment = lexer.starts_line(S('Dd')) * lexer.nonnewline^0 +local ex_comment = lexer.starts_line('!') * lexer.nonnewline^0 +local ast_comment = lexer.starts_line('*') * lexer.nonnewline^0 +local line_comment = '!' * lexer.nonnewline^0 +lex:add_rule('comment', token(lexer.COMMENT, c_comment + d_comment + + ex_comment + ast_comment + + line_comment)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match([[ + include program module subroutine function contains use call return + -- Statements. + case select default continue cycle do while else if elseif then elsewhere end + endif enddo forall where exit goto pause stop + -- Operators. + .not. .and. .or. .xor. .eqv. .neqv. .eq. .ne. .gt. .ge. .lt. .le. + -- Logical. + .false. .true. +]], true))) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match([[ + -- I/O. + backspace close endfile inquire open print read rewind write format + -- Type conversion utility and math. + aimag aint amax0 amin0 anint ceiling cmplx conjg dble dcmplx dfloat dim dprod + float floor ifix imag int logical modulo nint real sign sngl transfer zext abs + acos aimag aint alog alog10 amax0 amax1 amin0 amin1 amod anint asin atan atan2 + cabs ccos char clog cmplx conjg cos cosh csin csqrt dabs dacos dasin datan + datan2 dble dcos dcosh ddim dexp dim dint dlog dlog10 dmax1 dmin1 dmod dnint + dprod dreal dsign dsin dsinh dsqrt dtan dtanh exp float iabs ichar idim idint + idnint ifix index int isign len lge lgt lle llt log log10 max max0 max1 min + min0 min1 mod nint real sign sin sinh sngl sqrt tan tanh +]], true))) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match([[ + implicit explicit none data parameter allocate allocatable allocated + deallocate integer real double precision complex logical character dimension + kind +]], true))) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) * + -lexer.alpha)) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.alnum^1)) + +-- Strings. +local sq_str = lexer.delimited_range("'", true, true) +local dq_str = lexer.delimited_range('"', true, true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('<>=&+-/*,()'))) + +return lex diff --git a/lexlua/fsharp.lua b/lexlua/fsharp.lua new file mode 100644 index 000000000..b651d5fe5 --- /dev/null +++ b/lexlua/fsharp.lua @@ -0,0 +1,59 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- F# LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('fsharp', {fold_by_indentation = true}) + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + abstract and as assert asr begin class default delegate do done downcast + downto else end enum exception false finaly for fun function if in iherit + interface land lazy let lor lsl lsr lxor match member mod module mutable + namespace new null of open or override sig static struct then to true try type + val when inline upcast while with async atomic break checked component const + constructor continue eager event external fixed functor include method mixin + process property protected public pure readonly return sealed switch virtual + void volatile where + -- Booleans. + true false +]])) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match[[ + bool byte sbyte int16 uint16 int uint32 int64 uint64 nativeint unativeint char + string decimal unit void float32 single float double +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + + lexer.delimited_range('"', true))) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '//' * lexer.nonnewline^0 + + lexer.nested_pair('(*', '*)'))) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, (lexer.float + + lexer.integer * S('uUlL')^-1))) + +-- Preprocessor. +local preproc_word = word_match[[ + else endif endregion if ifdef ifndef light region +]] +lex:add_rule('preproc', token(lexer.PREPROCESSOR, lexer.starts_line('#') * + S('\t ')^0 * preproc_word)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, + S('=<>+-*/^.,:;~!@#%^&|?[](){}'))) + +return lex diff --git a/lexlua/gap.lua b/lexlua/gap.lua new file mode 100644 index 000000000..de7c38cf4 --- /dev/null +++ b/lexlua/gap.lua @@ -0,0 +1,42 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Gap LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('gap') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + and break continue do elif else end fail false fi for function if in infinity + local not od or rec repeat return then true until while +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + + lexer.delimited_range('"', true))) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.digit^1 * -lexer.alpha)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('*+-,./:;<=>~^#()[]{}'))) + +-- Fold points. +lex:add_fold_point(lexer.KEYWORD, 'function', 'end') +lex:add_fold_point(lexer.KEYWORD, 'do', 'od') +lex:add_fold_point(lexer.KEYWORD, 'if', 'fi') +lex:add_fold_point(lexer.KEYWORD, 'repeat', 'until') +lex:add_fold_point(lexer.COMMENT, '#', lexer.fold_line_comments('#')) + +return lex diff --git a/lexlua/gettext.lua b/lexlua/gettext.lua new file mode 100644 index 000000000..5ff8c14d4 --- /dev/null +++ b/lexlua/gettext.lua @@ -0,0 +1,31 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Gettext LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('gettext') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match([[ + msgid msgid_plural msgstr fuzzy c-format no-c-format +]], true))) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Variables. +lex:add_rule('variable', token(lexer.VARIABLE, S('%$@') * lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"', true))) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '#' * S(': .~') * + lexer.nonnewline^0)) + +return lex diff --git a/lexlua/gherkin.lua b/lexlua/gherkin.lua new file mode 100644 index 000000000..f4a7924db --- /dev/null +++ b/lexlua/gherkin.lua @@ -0,0 +1,41 @@ +-- Copyright 2015-2018 Jason Schindler. See License.txt. +-- Gherkin (https://github.com/cucumber/cucumber/wiki/Gherkin) LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('gherkin', {fold_by_indentation = true}) + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + And Background But Examples Feature Given Outline Scenario Scenarios Then When +]])) + +-- Strings. +local doc_str = '"""' * (lexer.any - '"""')^0 * P('"""')^-1 +local dq_str = lexer.delimited_range('"') +lex:add_rule('string', token(lexer.STRING, doc_str + dq_str)) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) + +-- Numbers. +local number = token(lexer.NUMBER, lexer.float + lexer.integer) + +-- Tags. +lex:add_rule('tag', token('tag', '@' * lexer.word^0)) +lex:add_style('tag', lexer.STYLE_LABEL) + +-- Placeholders. +lex:add_rule('placeholder', token('placeholder', lexer.nested_pair('<', '>'))) +lex:add_style('placeholder', lexer.STYLE_VARIABLE) + +-- Examples. +lex:add_rule('example', token('example', '|' * lexer.nonnewline^0)) +lex:add_style('example', lexer.STYLE_NUMBER) + +return lex diff --git a/lexlua/glsl.lua b/lexlua/glsl.lua new file mode 100644 index 000000000..497d81636 --- /dev/null +++ b/lexlua/glsl.lua @@ -0,0 +1,109 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- GLSL LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('glsl', {inherit = lexer.load('cpp')}) + +-- Whitespace. +lex:modify_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:modify_rule('keyword', token(lexer.KEYWORD, word_match[[ + attribute const in inout out uniform varying invariant centroid flat smooth + noperspective layout patch sample subroutine lowp mediump highp precision + -- Macros. + __VERSION__ __LINE__ __FILE__ +]]) + lex:get_rule('keyword')) + +-- Types. +lex:modify_rule('type', + token(lexer.TYPE, + S('bdiu')^-1 * 'vec' * R('24') + + P('d')^-1 * 'mat' * R('24') * ('x' * R('24')^-1) + + S('iu')^-1 * 'sampler' * R('13') * 'D' + + 'sampler' * R('12') * 'D' * P('Array')^-1 * 'Shadow' + + S('iu')^-1 * 'sampler' * (R('12') * 'DArray' + + word_match[[ + Cube 2DRect Buffer 2DMS 2DMSArray 2DMSCubeArray + ]]) + + word_match[[ + samplerCubeShadow sampler2DRectShadow + samplerCubeArrayShadow + ]]) + + lex:get_rule('type') + + +-- Functions. +token(lexer.FUNCTION, word_match[[ + radians degrees sin cos tan asin acos atan sinh cosh tanh asinh acosh atanh + pow exp log exp2 log2 sqrt inversesqrt abs sign floor trunc round roundEven + ceil fract mod modf min max clamp mix step smoothstep isnan isinf + floatBitsToInt floatBitsToUint intBitsToFloat uintBitsToFloat fma frexp ldexp + packUnorm2x16 packUnorm4x8 packSnorm4x8 unpackUnorm2x16 unpackUnorm4x8 + unpackSnorm4x8 packDouble2x32 unpackDouble2x32 length distance dot cross + normalize ftransform faceforward reflect refract matrixCompMult outerProduct + transpose determinant inverse lessThan lessThanEqual greaterThan + greaterThanEqual equal notEqual any all not uaddCarry usubBorrow umulExtended + imulExtended bitfieldExtract bitfildInsert bitfieldReverse bitCount findLSB + findMSB textureSize textureQueryLOD texture textureProj textureLod + textureOffset texelFetch texelFetchOffset textureProjOffset textureLodOffset + textureProjLod textureProjLodOffset textureGrad textureGradOffset + textureProjGrad textureProjGradOffset textureGather textureGatherOffset + texture1D texture2D texture3D texture1DProj texture2DProj texture3DProj + texture1DLod texture2DLod texture3DLod texture1DProjLod texture2DProjLod + texture3DProjLod textureCube textureCubeLod shadow1D shadow2D shadow1DProj + shadow2DProj shadow1DLod shadow2DLod shadow1DProjLod shadow2DProjLod dFdx dFdy + fwidth interpolateAtCentroid interpolateAtSample interpolateAtOffset noise1 + noise2 noise3 noise4 EmitStreamVertex EndStreamPrimitive EmitVertex + EndPrimitive barrier +]]) + + +-- Variables. +token(lexer.VARIABLE, word_match[[ + gl_VertexID gl_InstanceID gl_Position gl_PointSize gl_ClipDistance + gl_PrimitiveIDIn gl_InvocationID gl_PrimitiveID gl_Layer gl_PatchVerticesIn + gl_TessLevelOuter gl_TessLevelInner gl_TessCoord gl_FragCoord gl_FrontFacing + gl_PointCoord gl_SampleID gl_SamplePosition gl_FragColor gl_FragData + gl_FragDepth gl_SampleMask gl_ClipVertex gl_FrontColor gl_BackColor + gl_FrontSecondaryColor gl_BackSecondaryColor gl_TexCoord gl_FogFragCoord + gl_Color gl_SecondaryColor gl_Normal gl_Vertex gl_MultiTexCoord0 + gl_MultiTexCoord1 gl_MultiTexCoord2 gl_MultiTexCoord3 gl_MultiTexCoord4 + gl_MultiTexCoord5 gl_MultiTexCoord6 gl_MultiTexCoord7 gl_FogCoord +]]) + + +-- Constants. +token(lexer.CONSTANT, word_match[[ + gl_MaxVertexAttribs gl_MaxVertexUniformComponents gl_MaxVaryingFloats + gl_MaxVaryingComponents gl_MaxVertexOutputComponents + gl_MaxGeometryInputComponents gl_MaxGeometryOutputComponents + gl_MaxFragmentInputComponents gl_MaxVertexTextureImageUnits + gl_MaxCombinedTextureImageUnits gl_MaxTextureImageUnits + gl_MaxFragmentUniformComponents gl_MaxDrawBuffers gl_MaxClipDistances + gl_MaxGeometryTextureImageUnits gl_MaxGeometryOutputVertices + gl_MaxGeometryTotalOutputComponents gl_MaxGeometryUniformComponents + gl_MaxGeometryVaryingComponents gl_MaxTessControlInputComponents + gl_MaxTessControlOutputComponents gl_MaxTessControlTextureImageUnits + gl_MaxTessControlUniformComponents gl_MaxTessControlTotalOutputComponents + gl_MaxTessEvaluationInputComponents gl_MaxTessEvaluationOutputComponents + gl_MaxTessEvaluationTextureImageUnits gl_MaxTessEvaluationUniformComponents + gl_MaxTessPatchComponents gl_MaxPatchVertices gl_MaxTessGenLevel + gl_MaxTextureUnits gl_MaxTextureCoords gl_MaxClipPlanes + + gl_DepthRange gl_ModelViewMatrix gl_ProjectionMatrix + gl_ModelViewProjectionMatrix gl_TextureMatrix gl_NormalMatrix + gl_ModelViewMatrixInverse gl_ProjectionMatrixInverse + gl_ModelViewProjectionMatrixInverse gl_TextureMatrixInverse + gl_ModelViewMatrixTranspose gl_ProjectionMatrixTranspose + gl_ModelViewProjectionMatrixTranspose gl_TextureMatrixTranspose + gl_ModelViewMatrixInverseTranspose gl_ProjectionMatrixInverseTranspose + gl_ModelViewProjectionMatrixInverseTranspose gl_TextureMatrixInverseTranspose + gl_NormalScale gl_ClipPlane gl_Point gl_FrontMaterial gl_BackMaterial + gl_LightSource gl_LightModel gl_FrontLightModelProduct + gl_BackLightModelProduct gl_FrontLightProduct gl_BackLightProduct + gl_TextureEnvColor gl_EyePlaneS gl_EyePlaneT gl_EyePlaneR gl_EyePlaneQ + gl_ObjectPlaneS gl_ObjectPlaneT gl_ObjectPlaneR gl_ObjectPlaneQ gl_Fog +]])) + +return lex diff --git a/lexlua/gnuplot.lua b/lexlua/gnuplot.lua new file mode 100644 index 000000000..c33854d20 --- /dev/null +++ b/lexlua/gnuplot.lua @@ -0,0 +1,59 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Gnuplot LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('gnuplot') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + cd call clear exit fit help history if load pause plot using with index every + smooth thru print pwd quit replot reread reset save set show unset shell splot + system test unset update +]])) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match[[ + abs acos acosh arg asin asinh atan atan2 atanh besj0 besj1 besy0 besy1 ceil + cos cosh erf erfc exp floor gamma ibeta inverf igamma imag invnorm int + lambertw lgamma log log10 norm rand real sgn sin sinh sqrt tan tanh column + defined tm_hour tm_mday tm_min tm_mon tm_sec tm_wday tm_yday tm_year valid +]])) + +-- Variables. +lex:add_rule('variable', token(lexer.VARIABLE, word_match[[ + angles arrow autoscale bars bmargin border boxwidth clabel clip cntrparam + colorbox contour datafile decimalsign dgrid3d dummy encoding fit fontpath + format functions function grid hidden3d historysize isosamples key label + lmargin loadpath locale logscale mapping margin mouse multiplot mx2tics mxtics + my2tics mytics mztics offsets origin output parametric plot pm3d palette + pointsize polar print rmargin rrange samples size style surface terminal tics + ticslevel ticscale timestamp timefmt title tmargin trange urange variables + version view vrange x2data x2dtics x2label x2mtics x2range x2tics x2zeroaxis + xdata xdtics xlabel xmtics xrange xtics xzeroaxis y2data y2dtics y2label + y2mtics y2range y2tics y2zeroaxis ydata ydtics ylabel ymtics yrange ytics + yzeroaxis zdata zdtics cbdata cbdtics zero zeroaxis zlabel zmtics zrange ztics + cblabel cbmtics cbrange cbtics +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") + + lexer.delimited_range('"') + + lexer.delimited_range('[]', true) + + lexer.delimited_range('{}', true))) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('-+~!$*%=<>&|^?:()'))) + +return lex diff --git a/lexlua/go.lua b/lexlua/go.lua new file mode 100644 index 000000000..5fff60938 --- /dev/null +++ b/lexlua/go.lua @@ -0,0 +1,62 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Go LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('go') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + break case chan const continue default defer else fallthrough for func go goto + if import interface map package range return select struct switch type var +]])) + +-- Constants. +lex:add_rule('constant', token(lexer.CONSTANT, word_match[[ + true false iota nil +]])) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match[[ + bool byte complex64 complex128 error float32 float64 int int8 int16 int32 + int64 rune string uint uint8 uint16 uint32 uint64 uintptr +]])) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match[[ + append cap close complex copy delete imag len make new panic print println + real recover +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +local sq_str = lexer.delimited_range("'", true) +local dq_str = lexer.delimited_range('"', true) +local raw_str = lexer.delimited_range('`', false, true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + raw_str)) + +-- Comments. +local line_comment = '//' * lexer.nonnewline^0 +local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) * + P('i')^-1)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('+-*/%&|^<>=!:;.,()[]{}'))) + +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, '//', lexer.fold_line_comments('//')) + +return lex diff --git a/lexlua/groovy.lua b/lexlua/groovy.lua new file mode 100644 index 000000000..d27ea7132 --- /dev/null +++ b/lexlua/groovy.lua @@ -0,0 +1,69 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Groovy LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('groovy') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + abstract break case catch continue default do else extends final finally for + if implements instanceof native new private protected public return static + switch synchronized throw throws transient try volatile while strictfp package + import as assert def mixin property test using in + false null super this true it +]])) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match[[ + abs any append asList asWritable call collect compareTo count div dump each + eachByte eachFile eachLine every find findAll flatten getAt getErr getIn + getOut getText grep immutable inject inspect intersect invokeMethods isCase + join leftShift minus multiply newInputStream newOutputStream newPrintWriter + newReader newWriter next plus pop power previous print println push putAt read + readBytes readLines reverse reverseEach round size sort splitEachLine step + subMap times toInteger toList tokenize upto waitForOrKill withPrintWriter + withReader withStream withWriter withWriterAppend write writeLine +]])) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match[[ + boolean byte char class double float int interface long short void +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Comments. +local line_comment = '//' * lexer.nonnewline_esc^0 +local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Strings. +local sq_str = lexer.delimited_range("'") +local dq_str = lexer.delimited_range('"') +local triple_sq_str = "'''" * (lexer.any - "'''")^0 * P("'''")^-1 +local triple_dq_str = '"""' * (lexer.any - '"""')^0 * P('"""')^-1 +local regex_str = #P('/') * lexer.last_char_includes('=~|!<>+-*?&,:;([{') * + lexer.delimited_range('/', true) +lex:add_rule('string', token(lexer.STRING, triple_sq_str + triple_dq_str + + sq_str + dq_str) + + token(lexer.REGEX, regex_str)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('=~|!<>+-/*?&.,:;()[]{}'))) + +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, '//', lexer.fold_line_comments('//')) + +return lex diff --git a/lexlua/gtkrc.lua b/lexlua/gtkrc.lua new file mode 100644 index 000000000..6c6b23003 --- /dev/null +++ b/lexlua/gtkrc.lua @@ -0,0 +1,58 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Gtkrc LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('gtkrc') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + binding class include module_path pixmap_path im_module_file style widget + widget_class +]])) + +-- Variables. +lex:add_rule('variable', token(lexer.VARIABLE, word_match[[ + bg fg base text xthickness ythickness bg_pixmap font fontset font_name stock + color engine +]])) + +-- States. +lex:add_rule('state', token('state', word_match[[ + ACTIVE SELECTED NORMAL PRELIGHT INSENSITIVE TRUE FALSE +]])) +lex:add_style('state', lexer.STYLE_CONSTANT) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match[[ + mix shade lighter darker +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.alpha * + (lexer.alnum + S('_-'))^0)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + + lexer.delimited_range('"', true))) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.digit^1 * + ('.' * lexer.digit^1)^-1)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S(':=,*()[]{}'))) + +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '#', lexer.fold_line_comments('#')) + +return lex diff --git a/lexlua/haskell.lua b/lexlua/haskell.lua new file mode 100644 index 000000000..484069cb3 --- /dev/null +++ b/lexlua/haskell.lua @@ -0,0 +1,45 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Haskell LPeg lexer. +-- Modified by Alex Suraci. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('haskell', {fold_by_indentation = true}) + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + case class data default deriving do else if import in infix infixl infixr + instance let module newtype of then type where _ as qualified hiding +]])) + +local word = (lexer.alnum + S("._'#"))^0 +local op = lexer.punct - S('()[]{}') + +-- Types & type constructors. +lex:add_rule('type', token(lexer.TYPE, (lexer.upper * word) + + (":" * (op^1 - ":")))) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alpha + '_') * word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + + lexer.delimited_range('"'))) + +-- Comments. +local line_comment = '--' * lexer.nonnewline_esc^0 +local block_comment = '{-' * (lexer.any - '-}')^0 * P('-}')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, op)) + +return lex diff --git a/lexlua/html.lua b/lexlua/html.lua new file mode 100644 index 000000000..8c8d999c7 --- /dev/null +++ b/lexlua/html.lua @@ -0,0 +1,149 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- HTML LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V + +local lex = lexer.new('html') + +-- Whitespace. +local ws = token(lexer.WHITESPACE, lexer.space^1) +lex:add_rule('whitespace', ws) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '<!--' * (lexer.any - '-->')^0 * + P('-->')^-1)) + +-- Doctype. +lex:add_rule('doctype', token('doctype', '<!' * word_match([[doctype]], true) * + (lexer.any - '>')^1 * '>')) +lex:add_style('doctype', lexer.STYLE_COMMENT) + +-- Elements. +local known_element = token('element', '<' * P('/')^-1 * word_match([[ + a abbr address area article aside audio b base bdi bdo blockquote body br + button canvas caption cite code col colgroup content data datalist dd + decorator del details dfn div dl dt element em embed fieldset figcaption + figure footer form h1 h2 h3 h4 h5 h6 head header hr html i iframe img input + ins kbd keygen label legend li link main map mark menu menuitem meta meter nav + noscript object ol optgroup option output p param pre progress q rp rt ruby s + samp script section select shadow small source spacer span strong style sub + summary sup table tbody td template textarea tfoot th thead time title tr + track u ul var video wbr +]], true)) +local unknown_element = token('unknown_element', '<' * P('/')^-1 * lexer.word) +local element = known_element + unknown_element +lex:add_rule('element', element) +lex:add_style('element', lexer.STYLE_KEYWORD) +lex:add_style('unknown_element', lexer.STYLE_KEYWORD..',italics') + +-- Closing tags. +local tag_close = token('element', P('/')^-1 * '>') +lex:add_rule('tag_close', tag_close) + +-- Attributes. +local known_attribute = token('attribute', word_match([[ + accept accept-charset accesskey action align alt async autocomplete autofocus + autoplay bgcolor border buffered challenge charset checked cite class code + codebase color cols colspan content contenteditable contextmenu controls + coords data data- datetime default defer dir dirname disabled download + draggable dropzone enctype for form headers height hidden high href hreflang + http-equiv icon id ismap itemprop keytype kind label lang language list loop + low manifest max maxlength media method min multiple name novalidate open + optimum pattern ping placeholder poster preload pubdate radiogroup readonly + rel required reversed role rows rowspan sandbox scope scoped seamless selected + shape size sizes span spellcheck src srcdoc srclang start step style summary + tabindex target title type usemap value width wrap +]], true) + ((P('data-') + 'aria-') * (lexer.alnum + '-')^1)) +local unknown_attribute = token('unknown_attribute', lexer.word) +local attribute = (known_attribute + unknown_attribute) * #(lexer.space^0 * '=') +lex:add_rule('attribute', attribute) +lex:add_style('attribute', lexer.STYLE_TYPE) +lex:add_style('unknown_attribute', lexer.STYLE_TYPE..',italics') + +-- TODO: performance is terrible on large files. +local in_tag = P(function(input, index) + local before = input:sub(1, index - 1) + local s, e = before:find('<[^>]-$'), before:find('>[^<]-$') + if s and e then return s > e and index or nil end + if s then return index end + return input:find('^[^<]->', index) and index or nil +end) + +-- Equals. +local equals = token(lexer.OPERATOR, '=') --* in_tag +--lex:add_rule('equals', equals) + +-- Strings. +local string = #S('\'"') * lexer.last_char_includes('=') * + token(lexer.STRING, lexer.delimited_range("'") + + lexer.delimited_range('"')) +lex:add_rule('string', string) + +-- Numbers. +lex:add_rule('number', #lexer.digit * lexer.last_char_includes('=') * + token(lexer.NUMBER, lexer.digit^1 * P('%')^-1))--*in_tag) + +-- Entities. +lex:add_rule('entity', token('entity', '&' * (lexer.any - lexer.space - ';')^1 * + ';')) +lex:add_style('entity', lexer.STYLE_COMMENT) + +-- Fold points. +local function disambiguate_lt(text, pos, line, s) + return not line:find('^</', s) and 1 or -1 +end +lex:add_fold_point('element', '<', disambiguate_lt) +lex:add_fold_point('element', '/>', -1) +lex:add_fold_point('unknown_element', '<', disambiguate_lt) +lex:add_fold_point('unknown_element', '/>', -1) +lex:add_fold_point(lexer.COMMENT, '<!--', '-->') + +-- Tags that start embedded languages. +-- Export these patterns for proxy lexers (e.g. ASP) that need them. +lex.embed_start_tag = element * + (ws * attribute * ws^-1 * equals * ws^-1 * string)^0 * + ws^-1 * tag_close +lex.embed_end_tag = element * tag_close + +-- Embedded CSS (<style type="text/css"> ... </style>). +local css = lexer.load('css') +local style_element = word_match([[style]], true) +local css_start_rule = #(P('<') * style_element * + ('>' + P(function(input, index) + if input:find('^%s+type%s*=%s*(["\'])text/css%1', index) then + return index + end +end))) * lex.embed_start_tag +local css_end_rule = #('</' * style_element * ws^-1 * '>') * lex.embed_end_tag +lex:embed(css, css_start_rule, css_end_rule) + +-- Embedded JavaScript (<script type="text/javascript"> ... </script>). +local js = lexer.load('javascript') +local script_element = word_match([[script]], true) +local js_start_rule = #(P('<') * script_element * + ('>' + P(function(input, index) + if input:find('^%s+type%s*=%s*(["\'])text/javascript%1', index) then + return index + end +end))) * lex.embed_start_tag +local js_end_rule = #('</' * script_element * ws^-1 * '>') * lex.embed_end_tag +local js_line_comment = '//' * (lexer.nonnewline_esc - js_end_rule)^0 +local js_block_comment = '/*' * (lexer.any - '*/' - js_end_rule)^0 * P('*/')^-1 +js:modify_rule('comment', token(lexer.COMMENT, js_line_comment + + js_block_comment)) +lex:embed(js, js_start_rule, js_end_rule) + +-- Embedded CoffeeScript (<script type="text/coffeescript"> ... </script>). +local cs = lexer.load('coffeescript') +local script_element = word_match([[script]], true) +local cs_start_rule = #(P('<') * script_element * P(function(input, index) + if input:find('^[^>]+type%s*=%s*(["\'])text/coffeescript%1', index) then + return index + end +end)) * lex.embed_start_tag +local cs_end_rule = #('</' * script_element * ws^-1 * '>') * lex.embed_end_tag +lex:embed(cs, cs_start_rule, cs_end_rule) + +return lex diff --git a/lexlua/html2.lua b/lexlua/html2.lua new file mode 100644 index 000000000..ad1bd9c87 --- /dev/null +++ b/lexlua/html2.lua @@ -0,0 +1,147 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- HTML LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V + +local lexer = l.new('html') + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) +lexer:add_rule('whitespace', ws) + +-- Comments. +lexer:add_rule('comment', + token(l.COMMENT, '<!--' * (l.any - '-->')^0 * P('-->')^-1)) + +-- Doctype. +lexer:add_rule('doctype', token('doctype', '<!' * word_match('doctype', true) * + (l.any - '>')^1 * '>')) +lexer:add_style('doctype', l.STYLE_COMMENT) + +-- Elements. +local known_element = token('element', '<' * P('/')^-1 * word_match([[ + a abbr address area article aside audio b base bdi bdo blockquote body + br button canvas caption cite code col colgroup content data datalist dd + decorator del details dfn div dl dt element em embed fieldset figcaption + figure footer form h1 h2 h3 h4 h5 h6 head header hr html i iframe img input + ins kbd keygen label legend li link main map mark menu menuitem meta meter + nav noscript object ol optgroup option output p param pre progress q rp rt + ruby s samp script section select shadow small source spacer span strong + style sub summary sup table tbody td template textarea tfoot th thead time + title tr track u ul var video wbr +]], true)) +lexer:add_style('element', l.STYLE_KEYWORD) +local unknown_element = token('unknown_element', '<' * P('/')^-1 * l.word) +lexer:add_style('unknown_element', l.STYLE_KEYWORD..',italics') +local element = known_element + unknown_element +lexer:add_rule('element', element) + +-- Closing tags. +local tag_close = token('element', P('/')^-1 * '>') +lexer:add_rule('tag_close', tag_close) + +-- Attributes. +local known_attribute = token('attribute', word_match([[ + accept accept-charset accesskey action align alt async autocomplete autofocus + autoplay bgcolor border buffered challenge charset checked cite class code + codebase color cols colspan content contenteditable contextmenu controls + coords data data- datetime default defer dir dirname disabled download + draggable dropzone enctype for form headers height hidden high href hreflang + http-equiv icon id ismap itemprop keytype kind label lang language list + loop low manifest max maxlength media method min multiple name novalidate + open optimum pattern ping placeholder poster preload pubdate radiogroup + readonly rel required reversed role rows rowspan sandbox scope scoped + seamless selected shape size sizes span spellcheck src srcdoc srclang + start step style summary tabindex target title type usemap value width wrap +]], true) + ((P('data-') + 'aria-') * (l.alnum + '-')^1)) +lexer:add_style('attribute', l.STYLE_TYPE) +local unknown_attribute = token('unknown_attribute', l.word) +lexer:add_style('unknown_attribute', l.STYLE_TYPE..',italics') +local attribute = (known_attribute + unknown_attribute) * #(l.space^0 * '=') +lexer:add_rule('attribute', attribute) + +-- TODO: performance is terrible on large files. +local in_tag = P(function(input, index) + local before = input:sub(1, index - 1) + local s, e = before:find('<[^>]-$'), before:find('>[^<]-$') + if s and e then return s > e and index or nil end + if s then return index end + return input:find('^[^<]->', index) and index or nil +end) + +-- Equals. +local equals = token(l.OPERATOR, '=') --* in_tag +--lexer:add_rule('equals', equals) + +-- Strings. +local sq_str = l.delimited_range("'") +local dq_str = l.delimited_range('"') +local string = #S('\'"') * l.last_char_includes('=') * + token(l.STRING, sq_str + dq_str) +lexer:add_rule('string', string) + +-- Numbers. +lexer:add_rule('number', #l.digit * l.last_char_includes('=') * + token(l.NUMBER, l.digit^1 * P('%')^-1)) --* in_tag) + +-- Entities. +lexer:add_rule('entity', token('entity', '&' * (l.any - l.space - ';')^1 * ';')) +lexer:add_style('entity', l.STYLE_COMMENT) + +-- Fold points. +lexer:add_fold_point('element', '<', '</') +lexer:add_fold_point('element', '<', '/>') +lexer:add_fold_point('unknown_element', '<', '</') +lexer:add_fold_point('unknown_element', '<', '/>') +lexer:add_fold_point(l.COMMENT, '<!--', '-->') + +-- Tags that start embedded languages. +lexer.embed_start_tag = element * + (ws * attribute * ws^-1 * equals * ws^-1 * string)^0 * + ws^-1 * tag_close +lexer.embed_end_tag = element * tag_close + +-- Embedded CSS. +local css = l.load('css') +local style_element = word_match('style', true) +local css_start_rule = #(P('<') * style_element * + ('>' + P(function(input, index) + if input:find('^%s+type%s*=%s*(["\'])text/css%1', index) then + return index + end +end))) * lexer.embed_start_tag -- <style type="text/css"> +local css_end_rule = #('</' * style_element * ws^-1 * '>') * + lexer.embed_end_tag -- </style> +lexer:embed(css, css_start_rule, css_end_rule) + +-- Embedded JavaScript. +local js = l.load('javascript') +local script_element = word_match('script', true) +local js_start_rule = #(P('<') * script_element * + ('>' + P(function(input, index) + if input:find('^%s+type%s*=%s*(["\'])text/javascript%1', index) then + return index + end +end))) * lexer.embed_start_tag -- <script type="text/javascript"> +local js_end_rule = #('</' * script_element * ws^-1 * '>') * + lexer.embed_end_tag -- </script> +local js_line_comment = '//' * (l.nonnewline_esc - js_end_rule)^0 +local js_block_comment = '/*' * (l.any - '*/' - js_end_rule)^0 * P('*/')^-1 +js:modify_rule('comment', token(l.COMMENT, js_line_comment + js_block_comment)) +lexer:embed(js, js_start_rule, js_end_rule) + +-- Embedded CoffeeScript. +local cs = l.load('coffeescript') +local script_element = word_match('script', true) +local cs_start_rule = #(P('<') * script_element * P(function(input, index) + if input:find('^[^>]+type%s*=%s*(["\'])text/coffeescript%1', index) then + return index + end +end)) * lexer.embed_start_tag -- <script type="text/coffeescript"> +local cs_end_rule = #('</' * script_element * ws^-1 * '>') * + lexer.embed_end_tag -- </script> +lexer:embed(cs, cs_start_rule, cs_end_rule) + +return lexer diff --git a/lexlua/icon.lua b/lexlua/icon.lua new file mode 100644 index 000000000..2759b3946 --- /dev/null +++ b/lexlua/icon.lua @@ -0,0 +1,61 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- LPeg lexer for the Icon programming language. +-- http://www.cs.arizona.edu/icon +-- Contributed by Carl Sturtivant. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('icon') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + break by case create default do else end every fail global if initial + invocable link local next not of procedure record repeat return static suspend + then to until while +]])) + +-- Icon Keywords: unique to Icon. +lex:add_rule('special_keyword', token('special_keyword', P('&') * word_match[[ + allocated ascii clock collections cset current date dateline digits dump e + error errornumber errortext errorvalue errout fail features file host input + lcase letters level line main null output phi pi pos progname random regions + source storage subject time trace ucase version +]])) +lex:add_style('special_keyword', lexer.STYLE_TYPE) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") + + lexer.delimited_range('"'))) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline_esc^0)) + +-- Numbers. +local radix_literal = P('-')^-1 * lexer.dec_num * S('rR') * lexer.alnum^1 +lex:add_rule('number', token(lexer.NUMBER, radix_literal + lexer.float + + lexer.integer)) + +-- Preprocessor. +local preproc_word = word_match[[ + define else endif error ifdef ifndef include line undef +]] +lex:add_rule('preproc', token(lexer.PREPROCESSOR, P('$') * preproc_word)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>~!=^&|?~@:;,.()[]{}'))) + +-- Fold points. +lex:add_fold_point(lexer.PREPROCESSOR, 'ifdef', 'endif') +lex:add_fold_point(lexer.PREPROCESSOR, 'ifndef', 'endif') +lex:add_fold_point(lexer.KEYWORD, 'procedure', 'end') +lex:add_fold_point(lexer.COMMENT, '#', lexer.fold_line_comments('#')) + +return lex diff --git a/lexlua/idl.lua b/lexlua/idl.lua new file mode 100644 index 000000000..cf2b6c8aa --- /dev/null +++ b/lexlua/idl.lua @@ -0,0 +1,51 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- IDL LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('idl') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + abstract attribute case const context custom default enum exception factory + FALSE in inout interface local module native oneway out private public raises + readonly struct support switch TRUE truncatable typedef union valuetype +]])) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match[[ + any boolean char double fixed float long Object octet sequence short string + unsigned ValueBase void wchar wstring +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + + lexer.delimited_range('"', true))) + +-- Comments. +local line_comment = '//' * lexer.nonnewline_esc^0 +local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Preprocessor. +local preproc_word = word_match[[ + define undef ifdef ifndef if elif else endif include warning pragma +]] +lex:add_rule('preproc', token(lexer.PREPROCESSOR, lexer.starts_line('#') * + preproc_word)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('!<>=+-/*%&|^~.,:;?()[]{}'))) + +return lex diff --git a/lexlua/inform.lua b/lexlua/inform.lua new file mode 100644 index 000000000..59f9a26b0 --- /dev/null +++ b/lexlua/inform.lua @@ -0,0 +1,72 @@ +-- Copyright 2010-2018 Jeff Stone. See License.txt. +-- Inform LPeg lexer for Scintilla. +-- JMS 2010-04-25. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('inform') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + Abbreviate Array Attribute Class Constant Default End Endif Extend Global + Ifdef Iffalse Ifndef Ifnot Iftrue Import Include Link Lowstring Message Object + Property Release Replace Serial StartDaemon Statusline StopDaemon Switches + Verb + absent action actor add_to_scope address additive after and animate article + articles before bold box break cant_go capacity char class child children + clothing concealed container continue creature daemon deadflag default + describe description do door door_dir door_to d_to d_obj e_to e_obj each_turn + edible else enterable false female first font for found_in general give + grammar has hasnt held if in in_to in_obj initial inside_description invent + jump last life light list_together location lockable locked male move moved + multi multiexcept multiheld multiinside n_to n_obj ne_to ne_obj nw_to nw_obj + name neuter new_line nothing notin noun number objectloop ofclass off on only + open openable or orders out_to out_obj parent parse_name player plural + pluralname print print_ret private proper provides random react_after + react_before remove replace return reverse rfalseroman rtrue s_to s_obj se_to + se_obj sw_to sw_obj scenery scope score scored second self short_name + short_name_indef sibling spaces static string style supporter switch + switchable talkable thedark time_left time_out to topic transparent true + underline u_to u_obj visited w_to w_obj when_closed when_off when_on when_open + while with with_key workflag worn +]])) + +-- Library actions. +lex:add_rule('action', token('action', word_match[[ + Answer Ask AskFor Attack Blow Burn Buy Climb Close Consult Cut Dig Disrobe + Drink Drop Eat Empty EmptyT Enter Examine Exit Fill FullScore GetOff Give Go + GoIn Insert Inv InvTall InvWide Jump JumpOver Kiss LetGo Listen LMode1 LMode2 + LMode3 Lock Look LookUnder Mild No NotifyOff NotifyOn Objects Open Order + Places Pray Pronouns Pull Push PushDir PutOn Quit Receive Remove Restart + Restore Rub Save Score ScriptOff ScriptOn Search Set SetTo Show Sing Sleep + Smell Sorry Squeeze Strong Swim Swing SwitchOff SwitchOn Take Taste Tell Think + ThrowAt ThrownAt Tie Touch Transfer Turn Unlock VagueGo Verify Version Wait + Wake WakeOther Wave WaveHands Wear Yes +]])) +lex:add_style('action', lexer.STYLE_VARIABLE) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") + + lexer.delimited_range('"'))) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '!' * lexer.nonnewline^0)) + +-- Numbers. +local inform_hex = '$' * lexer.xdigit^1 +local inform_bin = '$$' * S('01')^1 +lex:add_rule('number', token(lexer.NUMBER, lexer.integer + inform_hex + + inform_bin)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('@~=+-*/%^#=<>;:,.{}[]()&|?'))) + +return lex diff --git a/lexlua/ini.lua b/lexlua/ini.lua new file mode 100644 index 000000000..907788405 --- /dev/null +++ b/lexlua/ini.lua @@ -0,0 +1,43 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Ini LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('ini') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + true false on off yes no +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alpha + '_') * + (lexer.alnum + S('_.'))^0)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") + + lexer.delimited_range('"'))) + +-- Labels. +lex:add_rule('label', token(lexer.LABEL, + lexer.delimited_range('[]', true, true))) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, lexer.starts_line(S(';#')) * + lexer.nonnewline^0)) + +-- Numbers. +local dec = lexer.digit^1 * ('_' * lexer.digit^1)^0 +local oct_num = '0' * S('01234567_')^1 +local integer = S('+-')^-1 * (lexer.hex_num + oct_num + dec) +lex:add_rule('number', token(lexer.NUMBER, lexer.float + integer)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, '=')) + +return lex diff --git a/lexlua/io_lang.lua b/lexlua/io_lang.lua new file mode 100644 index 000000000..28772bec4 --- /dev/null +++ b/lexlua/io_lang.lua @@ -0,0 +1,51 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Io LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('io_lang') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + block method while foreach if else do super self clone proto setSlot hasSlot + type write print forward +]])) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match[[ + Block Buffer CFunction Date Duration File Future LinkedList List Map Message + Nil Nop Number Object String WeakLink +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +local sq_str = lexer.delimited_range("'") +local dq_str = lexer.delimited_range('"') +local tq_str = '"""' * (lexer.any - '"""')^0 * P('"""')^-1 +lex:add_rule('string', token(lexer.STRING, tq_str + sq_str + dq_str)) + +-- Comments. +local line_comment = (P('#') + '//') * lexer.nonnewline^0 +local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, + S('`~@$%^&*-+/=\\<>?.,:;()[]{}'))) + +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '(', ')') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, '//', lexer.fold_line_comments('//')) + +return lex diff --git a/lexlua/java.lua b/lexlua/java.lua new file mode 100644 index 000000000..a85bf154e --- /dev/null +++ b/lexlua/java.lua @@ -0,0 +1,66 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Java LPeg lexer. +-- Modified by Brian Schott. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('java') + +-- Whitespace. +local ws = token(lexer.WHITESPACE, lexer.space^1) +lex:add_rule('whitespace', ws) + +-- Classes. +lex:add_rule('classdef', token(lexer.KEYWORD, P('class')) * ws * + token(lexer.CLASS, lexer.word)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + abstract assert break case catch class const continue default do else enum + extends final finally for goto if implements import instanceof interface + native new package private protected public return static strictfp super + switch synchronized this throw throws transient try while volatile + -- Literals. + true false null +]])) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match[[ + boolean byte char double float int long short void + Boolean Byte Character Double Float Integer Long Short String +]])) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, lexer.word) * #P('(')) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + + lexer.delimited_range('"', true))) + +-- Comments. +local line_comment = '//' * lexer.nonnewline_esc^0 +local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) * + S('LlFfDd')^-1)) + +-- Annotations. +lex:add_rule('annotation', token('annotation', '@' * lexer.word)) +lex:add_style('annotation', lexer.STYLE_PREPROCESSOR) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}'))) + +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, '//', lexer.fold_line_comments('//')) + +return lex diff --git a/lexlua/javascript.lua b/lexlua/javascript.lua new file mode 100644 index 000000000..11acab636 --- /dev/null +++ b/lexlua/javascript.lua @@ -0,0 +1,50 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- JavaScript LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('javascript') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + abstract boolean break byte case catch char class const continue debugger + default delete do double else enum export extends false final finally float + for function get goto if implements import in instanceof int interface let + long native new null of package private protected public return set short + static super switch synchronized this throw throws transient true try typeof + var void volatile while with yield +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Comments. +local line_comment = '//' * lexer.nonnewline_esc^0 +local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Strings. +local regex_str = #P('/') * lexer.last_char_includes('+-*%^!=&|?:;,([{<>') * + lexer.delimited_range('/', true) * S('igm')^0 +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") + + lexer.delimited_range('"') + + lexer.delimited_range('`')) + + token(lexer.REGEX, regex_str)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%^!=&|?:;,.()[]{}<>'))) + +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, '//', lexer.fold_line_comments('//')) + +return lex diff --git a/lexlua/json.lua b/lexlua/json.lua new file mode 100644 index 000000000..d2b5cf35b --- /dev/null +++ b/lexlua/json.lua @@ -0,0 +1,39 @@ +-- Copyright 2006-2018 Brian "Sir Alaran" Schott. See License.txt. +-- JSON LPeg lexer. +-- Based off of lexer code by Mitchell. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('json') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + + lexer.delimited_range('"', true))) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[true false null]])) + +-- Comments. +local line_comment = '//' * lexer.nonnewline_esc^0 +local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +local integer = S('+-')^-1 * lexer.digit^1 * S('Ll')^-1 +lex:add_rule('number', token(lexer.NUMBER, lexer.float + integer)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('[]{}:,'))) + +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '[', ']') +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, '//', lexer.fold_line_comments('//')) + +return lex diff --git a/lexlua/jsp.lua b/lexlua/jsp.lua new file mode 100644 index 000000000..49f6625eb --- /dev/null +++ b/lexlua/jsp.lua @@ -0,0 +1,20 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- JSP LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('jsp', {inherit = lexer.load('html')}) + +-- Embedded Java. +local java = lexer.load('java') +local java_start_rule = token('jsp_tag', '<%' * P('=')^-1) +local java_end_rule = token('jsp_tag', '%>') +lex:embed(java, java_start_rule, java_end_rule, true) +lex:add_style('jsp_tag', lexer.STYLE_EMBEDDED) + +-- Fold points. +lex:add_fold_point('jsp_tag', '<%', '%>') + +return lex diff --git a/lexlua/latex.lua b/lexlua/latex.lua new file mode 100644 index 000000000..a1a0f6eb4 --- /dev/null +++ b/lexlua/latex.lua @@ -0,0 +1,58 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Latex LPeg lexer. +-- Modified by Brian Schott. +-- Modified by Robert Gieseke. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('latex') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Comments. +local line_comment = '%' * lexer.nonnewline^0 +local block_comment = '\\begin' * P(' ')^0 * '{comment}' * + (lexer.any - '\\end' * P(' ')^0 * '{comment}')^0 * + P('\\end' * P(' ')^0 * '{comment}')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Math environments. +local math_word = word_match[[ + align displaymath eqnarray equation gather math multline +]] +local math_begin_end = (P('begin') + P('end')) * P(' ')^0 * + '{' * math_word * P('*')^-1 * '}' +lex:add_rule('math', token('math', '$' + '\\' * (S('[]()') + math_begin_end))) +lex:add_style('math', lexer.STYLE_FUNCTION) + +-- LaTeX environments. +lex:add_rule('environment', token('environment', '\\' * + (P('begin') + P('end')) * + P(' ')^0 * '{' * lexer.word * + P('*')^-1 * '}')) +lex:add_style('environment', lexer.STYLE_KEYWORD) + +-- Sections. +lex:add_rule('section', token('section', '\\' * word_match[[ + part chapter section subsection subsubsection paragraph subparagraph +]] * P('*')^-1)) +lex:add_style('section', lexer.STYLE_CLASS) + +-- Commands. +lex:add_rule('command', token('command', '\\' * + (lexer.alpha^1 + S('#$&~_^%{}')))) +lex:add_style('command', lexer.STYLE_KEYWORD) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('&#{}[]'))) + +-- Fold points. +lex:add_fold_point(lexer.COMMENT, '\\begin', '\\end') +lex:add_fold_point(lexer.COMMENT, '%', lexer.fold_line_comments('%')) +lex:add_fold_point('environment', '\\begin', '\\end') +lex:add_fold_point(lexer.OPERATOR, '{', '}') + +return lex diff --git a/lexlua/ledger.lua b/lexlua/ledger.lua new file mode 100644 index 000000000..a697a6d8b --- /dev/null +++ b/lexlua/ledger.lua @@ -0,0 +1,48 @@ +-- Copyright 2015-2018 Charles Lehner. See License.txt. +-- ledger journal LPeg lexer, see http://www.ledger-cli.org/ + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('ledger', {lex_by_line = true}) + +local delim = P('\t') + P(' ') + +-- Account. +lex:add_rule('account', token(lexer.VARIABLE, + lexer.starts_line(S(' \t')^1 * + (lexer.print - delim)^1))) + +-- Amount. +lex:add_rule('amount', token(lexer.NUMBER, delim * (1 - S(';\r\n'))^1)) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, S(';#') * lexer.nonnewline^0)) + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Strings. +local sq_str = lexer.delimited_range("'") +local dq_str = lexer.delimited_range('"') +local label = lexer.delimited_range('[]', true, true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + label)) + +-- Date. +lex:add_rule('date', token(lexer.CONSTANT, + lexer.starts_line((lexer.digit + S('/-'))^1))) + +-- Automated transactions. +lex:add_rule('auto_tx', token(lexer.PREPROCESSOR, + lexer.starts_line(S('=~') * lexer.nonnewline^0))) + +-- Directives. +local directive_word = word_match[[ + account alias assert bucket capture check comment commodity define end fixed + endfixed include payee apply tag test year +]] + S('AYNDCIiOobh') +lex:add_rule('directive', token(lexer.KEYWORD, + lexer.starts_line(S('!@')^-1 * directive_word))) + +return lex diff --git a/lexlua/less.lua b/lexlua/less.lua new file mode 100644 index 000000000..8544f9f78 --- /dev/null +++ b/lexlua/less.lua @@ -0,0 +1,21 @@ +-- Copyright 2006-2018 Robert Gieseke. See License.txt. +-- Less CSS LPeg lexer. +-- http://lesscss.org + +local lexer = require('lexer') +local token = lexer.token +local S = lpeg.S + +local lex = lexer.new('less', {inherit = lexer.load('css')}) + +-- Line comments. +lex:add_rule('line_comment', token(lexer.COMMENT, '//' * lexer.nonnewline^0)) + +-- Variables. +lex:add_rule('variable', token(lexer.VARIABLE, '@' * + (lexer.alnum + S('_-{}'))^1)) + +-- Fold points. +lex:add_fold_point(lexer.COMMENT, '//', lexer.fold_line_comments('//')) + +return lex diff --git a/lexlua/lexer.lua b/lexlua/lexer.lua new file mode 100644 index 000000000..6c063fcb5 --- /dev/null +++ b/lexlua/lexer.lua @@ -0,0 +1,1865 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. + +local M = {} + +--[=[ This comment is for LuaDoc. +--- +-- Lexes Scintilla documents and source code with Lua and LPeg. +-- +-- ## Writing Lua Lexers +-- +-- Lexers highlight the syntax of source code. Scintilla (the editing component +-- behind [Textadept][]) traditionally uses static, compiled C++ lexers which +-- are notoriously difficult to create and/or extend. On the other hand, Lua +-- makes it easy to to rapidly create new lexers, extend existing ones, and +-- embed lexers within one another. Lua lexers tend to be more readable than C++ +-- lexers too. +-- +-- Lexers are Parsing Expression Grammars, or PEGs, composed with the Lua +-- [LPeg library][]. The following table comes from the LPeg documentation and +-- summarizes all you need to know about constructing basic LPeg patterns. This +-- module provides convenience functions for creating and working with other +-- more advanced patterns and concepts. +-- +-- Operator | Description +-- ---------------------|------------ +-- `lpeg.P(string)` | Matches `string` literally. +-- `lpeg.P(`_`n`_`)` | Matches exactly _`n`_ characters. +-- `lpeg.S(string)` | Matches any character in set `string`. +-- `lpeg.R("`_`xy`_`")` | Matches any character between range `x` and `y`. +-- `patt^`_`n`_ | Matches at least _`n`_ repetitions of `patt`. +-- `patt^-`_`n`_ | Matches at most _`n`_ repetitions of `patt`. +-- `patt1 * patt2` | Matches `patt1` followed by `patt2`. +-- `patt1 + patt2` | Matches `patt1` or `patt2` (ordered choice). +-- `patt1 - patt2` | Matches `patt1` if `patt2` does not match. +-- `-patt` | Equivalent to `("" - patt)`. +-- `#patt` | Matches `patt` but consumes no input. +-- +-- The first part of this document deals with rapidly constructing a simple +-- lexer. The next part deals with more advanced techniques, such as custom +-- coloring and embedding lexers within one another. Following that is a +-- discussion about code folding, or being able to tell Scintilla which code +-- blocks are "foldable" (temporarily hideable from view). After that are +-- instructions on how to use Lua lexers with the aforementioned Textadept +-- editor. Finally there are comments on lexer performance and limitations. +-- +-- [LPeg library]: http://www.inf.puc-rio.br/~roberto/lpeg/lpeg.html +-- [Textadept]: http://foicica.com/textadept +-- +-- ### Lexer Basics +-- +-- The *lexers/* directory contains all lexers, including your new one. Before +-- attempting to write one from scratch though, first determine if your +-- programming language is similar to any of the 100+ languages supported. If +-- so, you may be able to copy and modify that lexer, saving some time and +-- effort. The filename of your lexer should be the name of your programming +-- language in lower case followed by a *.lua* extension. For example, a new Lua +-- lexer has the name *lua.lua*. +-- +-- Note: Try to refrain from using one-character language names like "c", "d", +-- or "r". For example, Lua lexers for those language names are named "ansi_c", +-- "dmd", and "rstats", respectively. +-- +-- #### New Lexer Template +-- +-- There is a *lexers/template.txt* file that contains a simple template for a +-- new lexer. Feel free to use it, replacing the '?'s with the name of your +-- lexer. Consider this snippet from the template: +-- +-- -- ? LPeg lexer. +-- +-- local lexer = require('lexer') +-- local token, word_match = lexer.token, lexer.word_match +-- local P, R, S = lpeg.P, lpeg.R, lpeg.S +-- +-- local lex = lexer.new('?') +-- +-- -- Whitespace. +-- local ws = token(lexer.WHITESPACE, lexer.space^1) +-- lex:add_rule('whitespace', ws) +-- +-- [...] +-- +-- return lex +-- +-- The first 3 lines of code simply define often used convenience variables. The +-- fourth and last lines [define](#lexer.new) and return the lexer object +-- Scintilla uses; they are very important and must be part of every lexer. The +-- fifth line defines something called a "token", an essential building block of +-- lexers. You will learn about tokens shortly. The sixth line defines a lexer +-- grammar rule, which you will learn about later, as well as token styles. (Be +-- aware that it is common practice to combine these two lines for short rules.) +-- Note, however, the `local` prefix in front of variables, which is needed +-- so-as not to affect Lua's global environment. All in all, this is a minimal, +-- working lexer that you can build on. +-- +-- #### Tokens +-- +-- Take a moment to think about your programming language's structure. What kind +-- of key elements does it have? In the template shown earlier, one predefined +-- element all languages have is whitespace. Your language probably also has +-- elements like comments, strings, and keywords. Lexers refer to these elements +-- as "tokens". Tokens are the fundamental "building blocks" of lexers. Lexers +-- break down source code into tokens for coloring, which results in the syntax +-- highlighting familiar to you. It is up to you how specific your lexer is when +-- it comes to tokens. Perhaps only distinguishing between keywords and +-- identifiers is necessary, or maybe recognizing constants and built-in +-- functions, methods, or libraries is desirable. The Lua lexer, for example, +-- defines 11 tokens: whitespace, keywords, built-in functions, constants, +-- built-in libraries, identifiers, strings, comments, numbers, labels, and +-- operators. Even though constants, built-in functions, and built-in libraries +-- are subsets of identifiers, Lua programmers find it helpful for the lexer to +-- distinguish between them all. It is perfectly acceptable to just recognize +-- keywords and identifiers. +-- +-- In a lexer, tokens consist of a token name and an LPeg pattern that matches a +-- sequence of characters recognized as an instance of that token. Create tokens +-- using the [`lexer.token()`]() function. Let us examine the "whitespace" token +-- defined in the template shown earlier: +-- +-- local ws = token(lexer.WHITESPACE, lexer.space^1) +-- +-- At first glance, the first argument does not appear to be a string name and +-- the second argument does not appear to be an LPeg pattern. Perhaps you +-- expected something like: +-- +-- local ws = token('whitespace', S('\t\v\f\n\r ')^1) +-- +-- The `lexer` module actually provides a convenient list of common token names +-- and common LPeg patterns for you to use. Token names include +-- [`lexer.DEFAULT`](), [`lexer.WHITESPACE`](), [`lexer.COMMENT`](), +-- [`lexer.STRING`](), [`lexer.NUMBER`](), [`lexer.KEYWORD`](), +-- [`lexer.IDENTIFIER`](), [`lexer.OPERATOR`](), [`lexer.ERROR`](), +-- [`lexer.PREPROCESSOR`](), [`lexer.CONSTANT`](), [`lexer.VARIABLE`](), +-- [`lexer.FUNCTION`](), [`lexer.CLASS`](), [`lexer.TYPE`](), [`lexer.LABEL`](), +-- [`lexer.REGEX`](), and [`lexer.EMBEDDED`](). Patterns include +-- [`lexer.any`](), [`lexer.ascii`](), [`lexer.extend`](), [`lexer.alpha`](), +-- [`lexer.digit`](), [`lexer.alnum`](), [`lexer.lower`](), [`lexer.upper`](), +-- [`lexer.xdigit`](), [`lexer.cntrl`](), [`lexer.graph`](), [`lexer.print`](), +-- [`lexer.punct`](), [`lexer.space`](), [`lexer.newline`](), +-- [`lexer.nonnewline`](), [`lexer.nonnewline_esc`](), [`lexer.dec_num`](), +-- [`lexer.hex_num`](), [`lexer.oct_num`](), [`lexer.integer`](), +-- [`lexer.float`](), and [`lexer.word`](). You may use your own token names if +-- none of the above fit your language, but an advantage to using predefined +-- token names is that your lexer's tokens will inherit the universal syntax +-- highlighting color theme used by your text editor. +-- +-- ##### Example Tokens +-- +-- So, how might you define other tokens like keywords, comments, and strings? +-- Here are some examples. +-- +-- **Keywords** +-- +-- Instead of matching _n_ keywords with _n_ `P('keyword_`_`n`_`')` ordered +-- choices, use another convenience function: [`lexer.word_match()`](). It is +-- much easier and more efficient to write word matches like: +-- +-- local keyword = token(lexer.KEYWORD, lexer.word_match[[ +-- keyword_1 keyword_2 ... keyword_n +-- ]]) +-- +-- local case_insensitive_keyword = token(lexer.KEYWORD, lexer.word_match([[ +-- KEYWORD_1 keyword_2 ... KEYword_n +-- ]], true)) +-- +-- local hyphened_keyword = token(lexer.KEYWORD, lexer.word_match[[ +-- keyword-1 keyword-2 ... keyword-n +-- ]]) +-- +-- In order to more easily separate or categorize keyword sets, you can use Lua +-- line comments within keyword strings. Such comments will be ignored. For +-- example: +-- +-- local keyword = token(lexer.KEYWORD, lexer.word_match[[ +-- -- Version 1 keywords. +-- keyword_11, keyword_12 ... keyword_1n +-- -- Version 2 keywords. +-- keyword_21, keyword_22 ... keyword_2n +-- ... +-- -- Version N keywords. +-- keyword_m1, keyword_m2 ... keyword_mn +-- ]]) +-- +-- **Comments** +-- +-- Line-style comments with a prefix character(s) are easy to express with LPeg: +-- +-- local shell_comment = token(lexer.COMMENT, '#' * lexer.nonnewline^0) +-- local c_line_comment = token(lexer.COMMENT, +-- '//' * lexer.nonnewline_esc^0) +-- +-- The comments above start with a '#' or "//" and go to the end of the line. +-- The second comment recognizes the next line also as a comment if the current +-- line ends with a '\' escape character. +-- +-- C-style "block" comments with a start and end delimiter are also easy to +-- express: +-- +-- local c_comment = token(lexer.COMMENT, +-- '/*' * (lexer.any - '*/')^0 * P('*/')^-1) +-- +-- This comment starts with a "/\*" sequence and contains anything up to and +-- including an ending "\*/" sequence. The ending "\*/" is optional so the lexer +-- can recognize unfinished comments as comments and highlight them properly. +-- +-- **Strings** +-- +-- It is tempting to think that a string is not much different from the block +-- comment shown above in that both have start and end delimiters: +-- +-- local dq_str = '"' * (lexer.any - '"')^0 * P('"')^-1 +-- local sq_str = "'" * (lexer.any - "'")^0 * P("'")^-1 +-- local simple_string = token(lexer.STRING, dq_str + sq_str) +-- +-- However, most programming languages allow escape sequences in strings such +-- that a sequence like "\\"" in a double-quoted string indicates that the +-- '"' is not the end of the string. The above token incorrectly matches +-- such a string. Instead, use the [`lexer.delimited_range()`]() convenience +-- function. +-- +-- local dq_str = lexer.delimited_range('"') +-- local sq_str = lexer.delimited_range("'") +-- local string = token(lexer.STRING, dq_str + sq_str) +-- +-- In this case, the lexer treats '\' as an escape character in a string +-- sequence. +-- +-- **Numbers** +-- +-- Most programming languages have the same format for integer and float tokens, +-- so it might be as simple as using a couple of predefined LPeg patterns: +-- +-- local number = token(lexer.NUMBER, lexer.float + lexer.integer) +-- +-- However, some languages allow postfix characters on integers. +-- +-- local integer = P('-')^-1 * (lexer.dec_num * S('lL')^-1) +-- local number = token(lexer.NUMBER, lexer.float + lexer.hex_num + integer) +-- +-- Your language may need other tweaks, but it is up to you how fine-grained you +-- want your highlighting to be. After all, you are not writing a compiler or +-- interpreter! +-- +-- #### Rules +-- +-- Programming languages have grammars, which specify valid token structure. For +-- example, comments usually cannot appear within a string. Grammars consist of +-- rules, which are simply combinations of tokens. Recall from the lexer +-- template the [`lexer.add_rule()`]() call, which adds a rule to the lexer's +-- grammar: +-- +-- lex:add_rule('whitespace', ws) +-- +-- Each rule has an associated name, but rule names are completely arbitrary and +-- serve only to identify and distinguish between different rules. Rule order is +-- important: if text does not match the first rule added to the grammar, the +-- lexer tries to match the second rule added, and so on. Right now this lexer +-- simply matches whitespace tokens under a rule named "whitespace". +-- +-- To illustrate the importance of rule order, here is an example of a +-- simplified Lua lexer: +-- +-- lex:add_rule('whitespace', token(lexer.WHITESPACE, ...)) +-- lex:add_rule('keyword', token(lexer.KEYWORD, ...)) +-- lex:add_rule('identifier', token(lexer.IDENTIFIER, ...)) +-- lex:add_rule('string', token(lexer.STRING, ...)) +-- lex:add_rule('comment', token(lexer.COMMENT, ...)) +-- lex:add_rule('number', token(lexer.NUMBER, ...)) +-- lex:add_rule('label', token(lexer.LABEL, ...)) +-- lex:add_rule('operator', token(lexer.OPERATOR, ...)) +-- +-- Note how identifiers come after keywords. In Lua, as with most programming +-- languages, the characters allowed in keywords and identifiers are in the same +-- set (alphanumerics plus underscores). If the lexer added the "identifier" +-- rule before the "keyword" rule, all keywords would match identifiers and thus +-- incorrectly highlight as identifiers instead of keywords. The same idea +-- applies to function, constant, etc. tokens that you may want to distinguish +-- between: their rules should come before identifiers. +-- +-- So what about text that does not match any rules? For example in Lua, the '!' +-- character is meaningless outside a string or comment. Normally the lexer +-- skips over such text. If instead you want to highlight these "syntax errors", +-- add an additional end rule: +-- +-- lex:add_rule('whitespace', ws) +-- ... +-- lex:add_rule('error', token(lexer.ERROR, lexer.any)) +-- +-- This identifies and highlights any character not matched by an existing +-- rule as a `lexer.ERROR` token. +-- +-- Even though the rules defined in the examples above contain a single token, +-- rules may consist of multiple tokens. For example, a rule for an HTML tag +-- could consist of a tag token followed by an arbitrary number of attribute +-- tokens, allowing the lexer to highlight all tokens separately. That rule +-- might look something like this: +-- +-- lex:add_rule('tag', tag_start * (ws * attributes)^0 * tag_end^-1) +-- +-- Note however that lexers with complex rules like these are more prone to lose +-- track of their state, especially if they span multiple lines. +-- +-- #### Summary +-- +-- Lexers primarily consist of tokens and grammar rules. At your disposal are a +-- number of convenience patterns and functions for rapidly creating a lexer. If +-- you choose to use predefined token names for your tokens, you do not have to +-- define how the lexer highlights them. The tokens will inherit the default +-- syntax highlighting color theme your editor uses. +-- +-- ### Advanced Techniques +-- +-- #### Styles and Styling +-- +-- The most basic form of syntax highlighting is assigning different colors to +-- different tokens. Instead of highlighting with just colors, Scintilla allows +-- for more rich highlighting, or "styling", with different fonts, font sizes, +-- font attributes, and foreground and background colors, just to name a few. +-- The unit of this rich highlighting is called a "style". Styles are simply +-- strings of comma-separated property settings. By default, lexers associate +-- predefined token names like `lexer.WHITESPACE`, `lexer.COMMENT`, +-- `lexer.STRING`, etc. with particular styles as part of a universal color +-- theme. These predefined styles include [`lexer.STYLE_CLASS`](), +-- [`lexer.STYLE_COMMENT`](), [`lexer.STYLE_CONSTANT`](), +-- [`lexer.STYLE_ERROR`](), [`lexer.STYLE_EMBEDDED`](), +-- [`lexer.STYLE_FUNCTION`](), [`lexer.STYLE_IDENTIFIER`](), +-- [`lexer.STYLE_KEYWORD`](), [`lexer.STYLE_LABEL`](), [`lexer.STYLE_NUMBER`](), +-- [`lexer.STYLE_OPERATOR`](), [`lexer.STYLE_PREPROCESSOR`](), +-- [`lexer.STYLE_REGEX`](), [`lexer.STYLE_STRING`](), [`lexer.STYLE_TYPE`](), +-- [`lexer.STYLE_VARIABLE`](), and [`lexer.STYLE_WHITESPACE`](). Like with +-- predefined token names and LPeg patterns, you may define your own styles. At +-- their core, styles are just strings, so you may create new ones and/or modify +-- existing ones. Each style consists of the following comma-separated settings: +-- +-- Setting | Description +-- ---------------|------------ +-- font:_name_ | The name of the font the style uses. +-- size:_int_ | The size of the font the style uses. +-- [not]bold | Whether or not the font face is bold. +-- weight:_int_ | The weight or boldness of a font, between 1 and 999. +-- [not]italics | Whether or not the font face is italic. +-- [not]underlined| Whether or not the font face is underlined. +-- fore:_color_ | The foreground color of the font face. +-- back:_color_ | The background color of the font face. +-- [not]eolfilled | Does the background color extend to the end of the line? +-- case:_char_ | The case of the font ('u': upper, 'l': lower, 'm': normal). +-- [not]visible | Whether or not the text is visible. +-- [not]changeable| Whether the text is changeable or read-only. +-- +-- Specify font colors in either "#RRGGBB" format, "0xBBGGRR" format, or the +-- decimal equivalent of the latter. As with token names, LPeg patterns, and +-- styles, there is a set of predefined color names, but they vary depending on +-- the current color theme in use. Therefore, it is generally not a good idea to +-- manually define colors within styles in your lexer since they might not fit +-- into a user's chosen color theme. Try to refrain from even using predefined +-- colors in a style because that color may be theme-specific. Instead, the best +-- practice is to either use predefined styles or derive new color-agnostic +-- styles from predefined ones. For example, Lua "longstring" tokens use the +-- existing `lexer.STYLE_STRING` style instead of defining a new one. +-- +-- ##### Example Styles +-- +-- Defining styles is pretty straightforward. An empty style that inherits the +-- default theme settings is simply an empty string: +-- +-- local style_nothing = '' +-- +-- A similar style but with a bold font face looks like this: +-- +-- local style_bold = 'bold' +-- +-- If you want the same style, but also with an italic font face, define the new +-- style in terms of the old one: +-- +-- local style_bold_italic = style_bold..',italics' +-- +-- This allows you to derive new styles from predefined ones without having to +-- rewrite them. This operation leaves the old style unchanged. Thus if you +-- had a "static variable" token whose style you wanted to base off of +-- `lexer.STYLE_VARIABLE`, it would probably look like: +-- +-- local style_static_var = lexer.STYLE_VARIABLE..',italics' +-- +-- The color theme files in the *lexers/themes/* folder give more examples of +-- style definitions. +-- +-- #### Token Styles +-- +-- Lexers use the [`lexer.add_style()`]() function to assign styles to +-- particular tokens. Recall the token definition and from the lexer template: +-- +-- local ws = token(lexer.WHITESPACE, lexer.space^1) +-- lex:add_rule('whitespace', ws) +-- +-- Why is a style not assigned to the `lexer.WHITESPACE` token? As mentioned +-- earlier, lexers automatically associate tokens that use predefined token +-- names with a particular style. Only tokens with custom token names need +-- manual style associations. As an example, consider a custom whitespace token: +-- +-- local ws = token('custom_whitespace', lexer.space^1) +-- +-- Assigning a style to this token looks like: +-- +-- lex:add_style('custom_whitespace', lexer.STYLE_WHITESPACE) +-- +-- Do not confuse token names with rule names. They are completely different +-- entities. In the example above, the lexer associates the "custom_whitespace" +-- token with the existing style for `lexer.WHITESPACE` tokens. If instead you +-- prefer to color the background of whitespace a shade of grey, it might look +-- like: +-- +-- local custom_style = lexer.STYLE_WHITESPACE..',back:$(color.grey)' +-- lex:add_style('custom_whitespace', custom_style) +-- +-- Notice that the lexer peforms Scintilla-style "$()" property expansion. You +-- may also use "%()". Remember to refrain from assigning specific colors in +-- styles, but in this case, all user color themes probably define the +-- "color.grey" property. +-- +-- #### Line Lexers +-- +-- By default, lexers match the arbitrary chunks of text passed to them by +-- Scintilla. These chunks may be a full document, only the visible part of a +-- document, or even just portions of lines. Some lexers need to match whole +-- lines. For example, a lexer for the output of a file "diff" needs to know if +-- the line started with a '+' or '-' and then style the entire line +-- accordingly. To indicate that your lexer matches by line, create the lexer +-- with an extra parameter: +-- +-- local lex = lexer.new('?', {lex_by_line = true}) +-- +-- Now the input text for the lexer is a single line at a time. Keep in mind +-- that line lexers do not have the ability to look ahead at subsequent lines. +-- +-- #### Embedded Lexers +-- +-- Lexers embed within one another very easily, requiring minimal effort. In the +-- following sections, the lexer being embedded is called the "child" lexer and +-- the lexer a child is being embedded in is called the "parent". For example, +-- consider an HTML lexer and a CSS lexer. Either lexer stands alone for styling +-- their respective HTML and CSS files. However, CSS can be embedded inside +-- HTML. In this specific case, the CSS lexer is the "child" lexer with the HTML +-- lexer being the "parent". Now consider an HTML lexer and a PHP lexer. This +-- sounds a lot like the case with CSS, but there is a subtle difference: PHP +-- _embeds itself into_ HTML while CSS is _embedded in_ HTML. This fundamental +-- difference results in two types of embedded lexers: a parent lexer that +-- embeds other child lexers in it (like HTML embedding CSS), and a child lexer +-- that embeds itself into a parent lexer (like PHP embedding itself in HTML). +-- +-- ##### Parent Lexer +-- +-- Before embedding a child lexer into a parent lexer, the parent lexer needs to +-- load the child lexer. This is done with the [`lexer.load()`]() function. For +-- example, loading the CSS lexer within the HTML lexer looks like: +-- +-- local css = lexer.load('css') +-- +-- The next part of the embedding process is telling the parent lexer when to +-- switch over to the child lexer and when to switch back. The lexer refers to +-- these indications as the "start rule" and "end rule", respectively, and are +-- just LPeg patterns. Continuing with the HTML/CSS example, the transition from +-- HTML to CSS is when the lexer encounters a "style" tag with a "type" +-- attribute whose value is "text/css": +-- +-- local css_tag = P('<style') * P(function(input, index) +-- if input:find('^[^>]+type="text/css"', index) then +-- return index +-- end +-- end) +-- +-- This pattern looks for the beginning of a "style" tag and searches its +-- attribute list for the text "`type="text/css"`". (In this simplified example, +-- the Lua pattern does not consider whitespace between the '=' nor does it +-- consider that using single quotes is valid.) If there is a match, the +-- functional pattern returns a value instead of `nil`. In this case, the value +-- returned does not matter because we ultimately want to style the "style" tag +-- as an HTML tag, so the actual start rule looks like this: +-- +-- local css_start_rule = #css_tag * tag +-- +-- Now that the parent knows when to switch to the child, it needs to know when +-- to switch back. In the case of HTML/CSS, the switch back occurs when the +-- lexer encounters an ending "style" tag, though the lexer should still style +-- the tag as an HTML tag: +-- +-- local css_end_rule = #P('</style>') * tag +-- +-- Once the parent loads the child lexer and defines the child's start and end +-- rules, it embeds the child with the [`lexer.embed()`]() function: +-- +-- lex:embed(css, css_start_rule, css_end_rule) +-- +-- ##### Child Lexer +-- +-- The process for instructing a child lexer to embed itself into a parent is +-- very similar to embedding a child into a parent: first, load the parent lexer +-- into the child lexer with the [`lexer.load()`]() function and then create +-- start and end rules for the child lexer. However, in this case, call +-- [`lexer.embed()`]() with switched arguments. For example, in the PHP lexer: +-- +-- local html = lexer.load('html') +-- local php_start_rule = token('php_tag', '<?php ') +-- local php_end_rule = token('php_tag', '?>') +-- lex:add_style('php_tag', lexer.STYLE_EMBEDDED) +-- html:embed(lex, php_start_rule, php_end_rule) +-- +-- #### Lexers with Complex State +-- +-- A vast majority of lexers are not stateful and can operate on any chunk of +-- text in a document. However, there may be rare cases where a lexer does need +-- to keep track of some sort of persistent state. Rather than using `lpeg.P` +-- function patterns that set state variables, it is recommended to make use of +-- Scintilla's built-in, per-line state integers via [`lexer.line_state`](). It +-- was designed to accommodate up to 32 bit flags for tracking state. +-- [`lexer.line_from_position()`]() will return the line for any position given +-- to an `lpeg.P` function pattern. (Any positions derived from that position +-- argument will also work.) +-- +-- Writing stateful lexers is beyond the scope of this document. +-- +-- ### Code Folding +-- +-- When reading source code, it is occasionally helpful to temporarily hide +-- blocks of code like functions, classes, comments, etc. This is the concept of +-- "folding". In many Scintilla-based editors, such as Textadept, little +-- indicators in the editor margins appear next to code that can be folded at +-- places called "fold points". When the user clicks an indicator, the editor +-- hides the code associated with the indicator until the user clicks the +-- indicator again. The lexer specifies these fold points and what code exactly +-- to fold. +-- +-- The fold points for most languages occur on keywords or character sequences. +-- Examples of fold keywords are "if" and "end" in Lua and examples of fold +-- character sequences are '{', '}', "/\*", and "\*/" in C for code block and +-- comment delimiters, respectively. However, these fold points cannot occur +-- just anywhere. For example, lexers should not recognize fold keywords that +-- appear within strings or comments. The [`lexer.add_fold_point()`]() function +-- allows you to conveniently define fold points with such granularity. For +-- example, consider C: +-- +-- lex:add_fold_point(lexer.OPERATOR, '{', '}') +-- lex:add_fold_point(lexer.COMMENT, '/*', '*/') +-- +-- The first assignment states that any '{' or '}' that the lexer recognized as +-- an `lexer.OPERATOR` token is a fold point. Likewise, the second assignment +-- states that any "/\*" or "\*/" that the lexer recognizes as part of a +-- `lexer.COMMENT` token is a fold point. The lexer does not consider any +-- occurrences of these characters outside their defined tokens (such as in a +-- string) as fold points. How do you specify fold keywords? Here is an example +-- for Lua: +-- +-- lex:add_fold_point(lexer.KEYWORD, 'if', 'end') +-- lex:add_fold_point(lexer.KEYWORD, 'do', 'end') +-- lex:add_fold_point(lexer.KEYWORD, 'function', 'end') +-- lex:add_fold_point(lexer.KEYWORD, 'repeat', 'until') +-- +-- If your lexer has case-insensitive keywords as fold points, simply add a +-- `case_insensitive_fold_points = true` option to [`lexer.new()`](), and +-- specify keywords in lower case. +-- +-- If your lexer needs to do some additional processing in order to determine if +-- a token is a fold point, pass a function that returns an integer to +-- `lex:add_fold_point()`. Returning `1` indicates the token is a beginning fold +-- point and returning `-1` indicates the token is an ending fold point. +-- Returning `0` indicates the token is not a fold point. For example: +-- +-- local function fold_strange_token(text, pos, line, s, symbol) +-- if ... then +-- return 1 -- beginning fold point +-- elseif ... then +-- return -1 -- ending fold point +-- end +-- return 0 +-- end +-- +-- lex:add_fold_point('strange_token', '|', fold_strange_token) +-- +-- Any time the lexer encounters a '|' that is a "strange_token", it calls the +-- `fold_strange_token` function to determine if '|' is a fold point. The lexer +-- calls these functions with the following arguments: the text to identify fold +-- points in, the beginning position of the current line in the text to fold, +-- the current line's text, the position in the current line the fold point text +-- starts at, and the fold point text itself. +-- +-- #### Fold by Indentation +-- +-- Some languages have significant whitespace and/or no delimiters that indicate +-- fold points. If your lexer falls into this category and you would like to +-- mark fold points based on changes in indentation, create the lexer with a +-- `fold_by_indentation = true` option: +-- +-- local lex = lexer.new('?', {fold_by_indentation = true}) +-- +-- ### Using Lexers +-- +-- #### Textadept +-- +-- Put your lexer in your *~/.textadept/lexers/* directory so you do not +-- overwrite it when upgrading Textadept. Also, lexers in this directory +-- override default lexers. Thus, Textadept loads a user *lua* lexer instead of +-- the default *lua* lexer. This is convenient for tweaking a default lexer to +-- your liking. Then add a [file type][] for your lexer if necessary. +-- +-- [file type]: textadept.file_types.html +-- +-- ### Migrating Legacy Lexers +-- +-- Legacy lexers are of the form: +-- +-- local l = require('lexer') +-- local token, word_match = l.token, l.word_match +-- local P, R, S = lpeg.P, lpeg.R, lpeg.S +-- +-- local M = {_NAME = '?'} +-- +-- [... token and pattern definitions ...] +-- +-- M._rules = { +-- {'rule', pattern}, +-- [...] +-- } +-- +-- M._tokenstyles = { +-- 'token' = 'style', +-- [...] +-- } +-- +-- M._foldsymbols = { +-- _patterns = {...}, +-- ['token'] = {['start'] = 1, ['end'] = -1}, +-- [...] +-- } +-- +-- return M +-- +-- While such legacy lexers will be handled just fine without any changes, it is +-- recommended that you migrate yours. The migration process is fairly +-- straightforward: +-- +-- 1. Replace all instances of `l` with `lexer`, as it's better practice and +-- results in less confusion. +-- 2. Replace `local M = {_NAME = '?'}` with `local lex = lexer.new('?')`, where +-- `?` is the name of your legacy lexer. At the end of the lexer, change +-- `return M` to `return lex`. +-- 3. Instead of defining rules towards the end of your lexer, define your rules +-- as you define your tokens and patterns using +-- [`lex:add_rule()`](#lexer.add_rule). +-- 4. Similarly, any custom token names should have their styles immediately +-- defined using [`lex:add_style()`](#lexer.add_style). +-- 5. Convert any table arguments passed to [`lexer.word_match()`]() to a +-- space-separated string of words. +-- 6. Replace any calls to `lexer.embed(M, child, ...)` and +-- `lexer.embed(parent, M, ...)` with +-- [`lex:embed`](#lexer.embed)`(child, ...)` and `parent:embed(lex, ...)`, +-- respectively. +-- 7. Define fold points with simple calls to +-- [`lex:add_fold_point()`](#lexer.add_fold_point). No need to mess with Lua +-- patterns anymore. +-- 8. Any legacy lexer options such as `M._FOLDBYINDENTATION`, `M._LEXBYLINE`, +-- `M._lexer`, etc. should be added as table options to [`lexer.new()`](). +-- 9. Any external lexer rule fetching and/or modifications via `lexer._RULES` +-- should be changed to use [`lexer.get_rule()`]() and +-- [`lexer.modify_rule()`](). +-- +-- As an example, consider the following sample legacy lexer: +-- +-- local l = require('lexer') +-- local token, word_match = l.token, l.word_match +-- local P, R, S = lpeg.P, lpeg.R, lpeg.S +-- +-- local M = {_NAME = 'legacy'} +-- +-- local ws = token(l.WHITESPACE, l.space^1) +-- local comment = token(l.COMMENT, '#' * l.nonnewline^0) +-- local string = token(l.STRING, l.delimited_range('"')) +-- local number = token(l.NUMBER, l.float + l.integer) +-- local keyword = token(l.KEYWORD, word_match{'foo', 'bar', 'baz'}) +-- local custom = token('custom', P('quux')) +-- local identifier = token(l.IDENTIFIER, l.word) +-- local operator = token(l.OPERATOR, S('+-*/%^=<>,.()[]{}')) +-- +-- M._rules = { +-- {'whitespace', ws}, +-- {'keyword', keyword}, +-- {'custom', custom}, +-- {'identifier', identifier}, +-- {'string', string}, +-- {'comment', comment}, +-- {'number', number}, +-- {'operator', operator} +-- } +-- +-- M._tokenstyles = { +-- 'custom' = l.STYLE_KEYWORD..',bold' +-- } +-- +-- M._foldsymbols = { +-- _patterns = {'[{}]'}, +-- [l.OPERATOR] = {['{'] = 1, ['}'] = -1} +-- } +-- +-- return M +-- +-- Following the migration steps would yield: +-- +-- local lexer = require('lexer') +-- local token, word_match = lexer.token, lexer.word_match +-- local P, R, S = lpeg.P, lpeg.R, lpeg.S +-- +-- local lex = lexer.new('legacy') +-- +-- lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) +-- lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[foo bar baz]])) +-- lex:add_rule('custom', token('custom', P('quux'))) +-- lex:add_style('custom', lexer.STYLE_KEYWORD..',bold') +-- lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) +-- lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"'))) +-- lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) +-- lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) +-- lex:add_rule('operator', token(lexer.OPERATOR, S('+-*/%^=<>,.()[]{}'))) +-- +-- lex:add_fold_point(lexer.OPERATOR, '{', '}') +-- +-- return lex +-- +-- ### Considerations +-- +-- #### Performance +-- +-- There might be some slight overhead when initializing a lexer, but loading a +-- file from disk into Scintilla is usually more expensive. On modern computer +-- systems, I see no difference in speed between Lua lexers and Scintilla's C++ +-- ones. Optimize lexers for speed by re-arranging `lexer.add_rule()` calls so +-- that the most common rules match first. Do keep in mind that order matters +-- for similar rules. +-- +-- In some cases, folding may be far more expensive than lexing, particularly +-- in lexers with a lot of potential fold points. If your lexer is exhibiting +-- signs of slowness, try disabling folding in your text editor first. If that +-- speeds things up, you can try reducing the number of fold points you added, +-- overriding `lexer.fold()` with your own implementation, or simply eliminating +-- folding support from your lexer. +-- +-- #### Limitations +-- +-- Embedded preprocessor languages like PHP cannot completely embed in their +-- parent languages in that the parent's tokens do not support start and end +-- rules. This mostly goes unnoticed, but code like +-- +-- <div id="<?php echo $id; ?>"> +-- +-- will not style correctly. +-- +-- #### Troubleshooting +-- +-- Errors in lexers can be tricky to debug. Lexers print Lua errors to +-- `io.stderr` and `_G.print()` statements to `io.stdout`. Running your editor +-- from a terminal is the easiest way to see errors as they occur. +-- +-- #### Risks +-- +-- Poorly written lexers have the ability to crash Scintilla (and thus its +-- containing application), so unsaved data might be lost. However, I have only +-- observed these crashes in early lexer development, when syntax errors or +-- pattern errors are present. Once the lexer actually starts styling text +-- (either correctly or incorrectly, it does not matter), I have not observed +-- any crashes. +-- +-- #### Acknowledgements +-- +-- Thanks to Peter Odding for his [lexer post][] on the Lua mailing list +-- that inspired me, and thanks to Roberto Ierusalimschy for LPeg. +-- +-- [lexer post]: http://lua-users.org/lists/lua-l/2007-04/msg00116.html +-- @field path (string) +-- The path used to search for a lexer to load. +-- Identical in format to Lua's `package.path` string. +-- The default value is `package.path`. +-- @field DEFAULT (string) +-- The token name for default tokens. +-- @field WHITESPACE (string) +-- The token name for whitespace tokens. +-- @field COMMENT (string) +-- The token name for comment tokens. +-- @field STRING (string) +-- The token name for string tokens. +-- @field NUMBER (string) +-- The token name for number tokens. +-- @field KEYWORD (string) +-- The token name for keyword tokens. +-- @field IDENTIFIER (string) +-- The token name for identifier tokens. +-- @field OPERATOR (string) +-- The token name for operator tokens. +-- @field ERROR (string) +-- The token name for error tokens. +-- @field PREPROCESSOR (string) +-- The token name for preprocessor tokens. +-- @field CONSTANT (string) +-- The token name for constant tokens. +-- @field VARIABLE (string) +-- The token name for variable tokens. +-- @field FUNCTION (string) +-- The token name for function tokens. +-- @field CLASS (string) +-- The token name for class tokens. +-- @field TYPE (string) +-- The token name for type tokens. +-- @field LABEL (string) +-- The token name for label tokens. +-- @field REGEX (string) +-- The token name for regex tokens. +-- @field STYLE_CLASS (string) +-- The style typically used for class definitions. +-- @field STYLE_COMMENT (string) +-- The style typically used for code comments. +-- @field STYLE_CONSTANT (string) +-- The style typically used for constants. +-- @field STYLE_ERROR (string) +-- The style typically used for erroneous syntax. +-- @field STYLE_FUNCTION (string) +-- The style typically used for function definitions. +-- @field STYLE_KEYWORD (string) +-- The style typically used for language keywords. +-- @field STYLE_LABEL (string) +-- The style typically used for labels. +-- @field STYLE_NUMBER (string) +-- The style typically used for numbers. +-- @field STYLE_OPERATOR (string) +-- The style typically used for operators. +-- @field STYLE_REGEX (string) +-- The style typically used for regular expression strings. +-- @field STYLE_STRING (string) +-- The style typically used for strings. +-- @field STYLE_PREPROCESSOR (string) +-- The style typically used for preprocessor statements. +-- @field STYLE_TYPE (string) +-- The style typically used for static types. +-- @field STYLE_VARIABLE (string) +-- The style typically used for variables. +-- @field STYLE_WHITESPACE (string) +-- The style typically used for whitespace. +-- @field STYLE_EMBEDDED (string) +-- The style typically used for embedded code. +-- @field STYLE_IDENTIFIER (string) +-- The style typically used for identifier words. +-- @field STYLE_DEFAULT (string) +-- The style all styles are based off of. +-- @field STYLE_LINENUMBER (string) +-- The style used for all margins except fold margins. +-- @field STYLE_BRACELIGHT (string) +-- The style used for highlighted brace characters. +-- @field STYLE_BRACEBAD (string) +-- The style used for unmatched brace characters. +-- @field STYLE_CONTROLCHAR (string) +-- The style used for control characters. +-- Color attributes are ignored. +-- @field STYLE_INDENTGUIDE (string) +-- The style used for indentation guides. +-- @field STYLE_CALLTIP (string) +-- The style used by call tips if [`buffer.call_tip_use_style`]() is set. +-- Only the font name, size, and color attributes are used. +-- @field STYLE_FOLDDISPLAYTEXT (string) +-- The style used for fold display text. +-- @field any (pattern) +-- A pattern that matches any single character. +-- @field ascii (pattern) +-- A pattern that matches any ASCII character (codes 0 to 127). +-- @field extend (pattern) +-- A pattern that matches any ASCII extended character (codes 0 to 255). +-- @field alpha (pattern) +-- A pattern that matches any alphabetic character ('A'-'Z', 'a'-'z'). +-- @field digit (pattern) +-- A pattern that matches any digit ('0'-'9'). +-- @field alnum (pattern) +-- A pattern that matches any alphanumeric character ('A'-'Z', 'a'-'z', +-- '0'-'9'). +-- @field lower (pattern) +-- A pattern that matches any lower case character ('a'-'z'). +-- @field upper (pattern) +-- A pattern that matches any upper case character ('A'-'Z'). +-- @field xdigit (pattern) +-- A pattern that matches any hexadecimal digit ('0'-'9', 'A'-'F', 'a'-'f'). +-- @field cntrl (pattern) +-- A pattern that matches any control character (ASCII codes 0 to 31). +-- @field graph (pattern) +-- A pattern that matches any graphical character ('!' to '~'). +-- @field print (pattern) +-- A pattern that matches any printable character (' ' to '~'). +-- @field punct (pattern) +-- A pattern that matches any punctuation character ('!' to '/', ':' to '@', +-- '[' to ''', '{' to '~'). +-- @field space (pattern) +-- A pattern that matches any whitespace character ('\t', '\v', '\f', '\n', +-- '\r', space). +-- @field newline (pattern) +-- A pattern that matches any set of end of line characters. +-- @field nonnewline (pattern) +-- A pattern that matches any single, non-newline character. +-- @field nonnewline_esc (pattern) +-- A pattern that matches any single, non-newline character or any set of end +-- of line characters escaped with '\'. +-- @field dec_num (pattern) +-- A pattern that matches a decimal number. +-- @field hex_num (pattern) +-- A pattern that matches a hexadecimal number. +-- @field oct_num (pattern) +-- A pattern that matches an octal number. +-- @field integer (pattern) +-- A pattern that matches either a decimal, hexadecimal, or octal number. +-- @field float (pattern) +-- A pattern that matches a floating point number. +-- @field word (pattern) +-- A pattern that matches a typical word. Words begin with a letter or +-- underscore and consist of alphanumeric and underscore characters. +-- @field FOLD_BASE (number) +-- The initial (root) fold level. +-- @field FOLD_BLANK (number) +-- Flag indicating that the line is blank. +-- @field FOLD_HEADER (number) +-- Flag indicating the line is fold point. +-- @field fold_level (table, Read-only) +-- Table of fold level bit-masks for line numbers starting from zero. +-- Fold level masks are composed of an integer level combined with any of the +-- following bits: +-- +-- * `lexer.FOLD_BASE` +-- The initial fold level. +-- * `lexer.FOLD_BLANK` +-- The line is blank. +-- * `lexer.FOLD_HEADER` +-- The line is a header, or fold point. +-- @field indent_amount (table, Read-only) +-- Table of indentation amounts in character columns, for line numbers +-- starting from zero. +-- @field line_state (table) +-- Table of integer line states for line numbers starting from zero. +-- Line states can be used by lexers for keeping track of persistent states. +-- @field property (table) +-- Map of key-value string pairs. +-- @field property_expanded (table, Read-only) +-- Map of key-value string pairs with `$()` and `%()` variable replacement +-- performed in values. +-- @field property_int (table, Read-only) +-- Map of key-value pairs with values interpreted as numbers, or `0` if not +-- found. +-- @field style_at (table, Read-only) +-- Table of style names at positions in the buffer starting from 1. +module('lexer')]=] + +local lpeg = require('lpeg') +local lpeg_P, lpeg_R, lpeg_S, lpeg_V = lpeg.P, lpeg.R, lpeg.S, lpeg.V +local lpeg_Ct, lpeg_Cc, lpeg_Cp = lpeg.Ct, lpeg.Cc, lpeg.Cp +local lpeg_Cmt, lpeg_C = lpeg.Cmt, lpeg.C +local lpeg_match = lpeg.match + +M.path = package.path + +if not package.searchpath then + -- Searches for the given *name* in the given *path*. + -- This is an implementation of Lua 5.2's `package.searchpath()` function for + -- Lua 5.1. + function package.searchpath(name, path) + local tried = {} + for part in path:gmatch('[^;]+') do + local filename = part:gsub('%?', name) + local f = io.open(filename, 'r') + if f then + f:close() + return filename + end + tried[#tried + 1] = string.format("no file '%s'", filename) + end + return nil, table.concat(tried, '\n') + end +end + +local string_upper = string.upper +-- Default styles. +local default = { + 'nothing', 'whitespace', 'comment', 'string', 'number', 'keyword', + 'identifier', 'operator', 'error', 'preprocessor', 'constant', 'variable', + 'function', 'class', 'type', 'label', 'regex', 'embedded' +} +for i = 1, #default do + local name, upper_name = default[i], string_upper(default[i]) + M[upper_name], M['STYLE_'..upper_name] = name, '$(style.'..name..')' +end +-- Predefined styles. +local predefined = { + 'default', 'linenumber', 'bracelight', 'bracebad', 'controlchar', + 'indentguide', 'calltip', 'folddisplaytext' +} +for i = 1, #predefined do + local name, upper_name = predefined[i], string_upper(predefined[i]) + M[upper_name], M['STYLE_'..upper_name] = name, '$(style.'..name..')' +end + +--- +-- Adds pattern *rule* identified by string *id* to the ordered list of rules +-- for lexer *lexer*. +-- @param lexer The lexer to add the given rule to. +-- @param id The id associated with this rule. It does not have to be the same +-- as the name passed to `token()`. +-- @param rule The LPeg pattern of the rule. +-- @see modify_rule +-- @name add_rule +function M.add_rule(lexer, id, rule) + if lexer._lexer then lexer = lexer._lexer end -- proxy; get true parent + if not lexer._RULES then + lexer._RULES = {} + -- Contains an ordered list (by numerical index) of rule names. This is used + -- in conjunction with lexer._RULES for building _TOKENRULE. + lexer._RULEORDER = {} + end + lexer._RULES[id] = rule + lexer._RULEORDER[#lexer._RULEORDER + 1] = id + lexer:build_grammar() +end + +--- +-- Replaces in lexer *lexer* the existing rule identified by string *id* with +-- pattern *rule*. +-- @param lexer The lexer to modify. +-- @param id The id associated with this rule. +-- @param rule The LPeg pattern of the rule. +-- @name modify_rule +function M.modify_rule(lexer, id, rule) + if lexer._lexer then lexer = lexer._lexer end -- proxy; get true parent + lexer._RULES[id] = rule + lexer:build_grammar() +end + +--- +-- Returns the rule identified by string *id*. +-- @param lexer The lexer to fetch a rule from. +-- @param id The id of the rule to fetch. +-- @return pattern +-- @name get_rule +function M.get_rule(lexer, id) + if lexer._lexer then lexer = lexer._lexer end -- proxy; get true parent + return lexer._RULES[id] +end + +--- +-- Associates string *token_name* in lexer *lexer* with Scintilla style string +-- *style*. +-- Style strings are comma-separated property settings. Available property +-- settings are: +-- +-- * `font:name`: Font name. +-- * `size:int`: Font size. +-- * `bold` or `notbold`: Whether or not the font face is bold. +-- * `weight:int`: Font weight (between 1 and 999). +-- * `italics` or `notitalics`: Whether or not the font face is italic. +-- * `underlined` or `notunderlined`: Whether or not the font face is +-- underlined. +-- * `fore:color`: Font face foreground color in "#RRGGBB" or 0xBBGGRR format. +-- * `back:color`: Font face background color in "#RRGGBB" or 0xBBGGRR format. +-- * `eolfilled` or `noteolfilled`: Whether or not the background color +-- extends to the end of the line. +-- * `case:char`: Font case ('u' for uppercase, 'l' for lowercase, and 'm' for +-- mixed case). +-- * `visible` or `notvisible`: Whether or not the text is visible. +-- * `changeable` or `notchangeable`: Whether or not the text is changeable or +-- read-only. +-- +-- Property settings may also contain "$(property.name)" expansions for +-- properties defined in Scintilla, theme files, etc. +-- @param lexer The lexer to add a style to. +-- @param token_name The name of the token to associated with the style. +-- @param style A style string for Scintilla. +-- @usage lex:add_style('longstring', lexer.STYLE_STRING) +-- @usage lex:add_style('deprecated_function', lexer.STYLE_FUNCTION..',italics') +-- @usage lex:add_style('visible_ws', +-- lexer.STYLE_WHITESPACE..',back:$(color.grey)') +-- @name add_style +function M.add_style(lexer, token_name, style) + local num_styles = lexer._numstyles + if num_styles == 32 then num_styles = num_styles + 8 end -- skip predefined + if num_styles >= 255 then print('Too many styles defined (255 MAX)') end + lexer._TOKENSTYLES[token_name], lexer._numstyles = num_styles, num_styles + 1 + lexer._EXTRASTYLES[token_name] = style + -- If the lexer is a proxy or a child that embedded itself, copy this style to + -- the parent lexer. + if lexer._lexer then lexer._lexer:add_style(token_name, style) end +end + +--- +-- Adds to lexer *lexer* a fold point whose beginning and end tokens are string +-- *token_name* tokens with string content *start_symbol* and *end_symbol*, +-- respectively. +-- In the event that *start_symbol* may or may not be a fold point depending on +-- context, and that additional processing is required, *end_symbol* may be a +-- function that ultimately returns `1` (indicating a beginning fold point), +-- `-1` (indicating an ending fold point), or `0` (indicating no fold point). +-- That function is passed the following arguments: +-- +-- * `text`: The text being processed for fold points. +-- * `pos`: The position in *text* of the beginning of the line currently +-- being processed. +-- * `line`: The text of the line currently being processed. +-- * `s`: The position of *start_symbol* in *line*. +-- * `symbol`: *start_symbol* itself. +-- @param lexer The lexer to add a fold point to. +-- @param token_name The token name of text that indicates a fold point. +-- @param start_symbol The text that indicates the beginning of a fold point. +-- @param end_symbol Either the text that indicates the end of a fold point, or +-- a function that returns whether or not *start_symbol* is a beginning fold +-- point (1), an ending fold point (-1), or not a fold point at all (0). +-- @usage lex:add_fold_point(lexer.OPERATOR, '{', '}') +-- @usage lex:add_fold_point(lexer.KEYWORD, 'if', 'end') +-- @usage lex:add_fold_point(lexer.COMMENT, '#', lexer.fold_line_comments('#')) +-- @usage lex:add_fold_point('custom', function(text, pos, line, s, symbol) +-- ... end) +-- @name add_fold_point +function M.add_fold_point(lexer, token_name, start_symbol, end_symbol) + if not lexer._FOLDPOINTS then lexer._FOLDPOINTS = {_SYMBOLS = {}} end + local symbols = lexer._FOLDPOINTS._SYMBOLS + if not symbols[start_symbol] then + symbols[#symbols + 1], symbols[start_symbol] = start_symbol, true + end + if not lexer._FOLDPOINTS[token_name] then + lexer._FOLDPOINTS[token_name] = {} + end + if type(end_symbol) == 'string' then + if not symbols[end_symbol] then + symbols[#symbols + 1], symbols[end_symbol] = end_symbol, true + end + lexer._FOLDPOINTS[token_name][start_symbol] = 1 + lexer._FOLDPOINTS[token_name][end_symbol] = -1 + else + lexer._FOLDPOINTS[token_name][start_symbol] = end_symbol -- function or int + end + -- If the lexer is a proxy or a child that embedded itself, copy this fold + -- point to the parent lexer. + if lexer._lexer then + lexer._lexer:add_fold_point(token_name, start_symbol, end_symbol) + end +end + +-- (Re)constructs `lexer._TOKENRULE`. +local function join_tokens(lexer) + local patterns, order = lexer._RULES, lexer._RULEORDER + local token_rule = patterns[order[1]] + for i = 2, #order do token_rule = token_rule + patterns[order[i]] end + lexer._TOKENRULE = token_rule + M.token(M.DEFAULT, M.any) + return lexer._TOKENRULE +end + +-- Metatable for lexer grammars. +-- These grammars are just tables ultimately passed to `lpeg.P()`. +local grammar_mt = {__index = { + -- Adds lexer *lexer* and any of its embedded lexers to this grammar. + -- @param lexer The lexer to add. + add_lexer = function(self, lexer) + local token_rule = lexer:join_tokens() + for i = 1, #lexer._CHILDREN do + local child = lexer._CHILDREN[i] + if child._CHILDREN then self:add_lexer(child) end + local rules = child._EMBEDDEDRULES[lexer._NAME] + local rules_token_rule = self['__'..child._NAME] or rules.token_rule + self[child._NAME] = (-rules.end_rule * rules_token_rule)^0 * + rules.end_rule^-1 * lpeg_V(lexer._NAME) + local embedded_child = '_'..child._NAME + self[embedded_child] = rules.start_rule * + (-rules.end_rule * rules_token_rule)^0 * + rules.end_rule^-1 + token_rule = lpeg_V(embedded_child) + token_rule + end + self['__'..lexer._NAME] = token_rule -- can contain embedded lexer rules + self[lexer._NAME] = token_rule^0 + end +}} + +-- (Re)constructs `lexer._GRAMMAR`. +-- @param initial_rule The name of the rule to start lexing with. The default +-- value is `lexer._NAME`. Multilang lexers use this to start with a child +-- rule if necessary. +local function build_grammar(lexer, initial_rule) + if not lexer._RULES then return end + if lexer._CHILDREN then + if not initial_rule then initial_rule = lexer._NAME end + local grammar = setmetatable({initial_rule}, grammar_mt) + grammar:add_lexer(lexer) + lexer._INITIALRULE = initial_rule + lexer._GRAMMAR = lpeg_Ct(lpeg_P(grammar)) + else + lexer._GRAMMAR = lpeg_Ct(lexer:join_tokens()^0) + end +end + +--- +-- Embeds child lexer *child* in parent lexer *lexer* using patterns +-- *start_rule* and *end_rule*, which signal the beginning and end of the +-- embedded lexer, respectively. +-- @param lexer The parent lexer. +-- @param child The child lexer. +-- @param start_rule The pattern that signals the beginning of the embedded +-- lexer. +-- @param end_rule The pattern that signals the end of the embedded lexer. +-- @usage html:embed(css, css_start_rule, css_end_rule) +-- @usage html:embed(lex, php_start_rule, php_end_rule) -- from php lexer +-- @name embed +function M.embed(lexer, child, start_rule, end_rule) + if lexer._lexer then lexer = lexer._lexer end -- proxy; get true parent + -- Add child rules. + if not child._EMBEDDEDRULES then child._EMBEDDEDRULES = {} end + if not child._RULES then error('Cannot embed lexer with no rules') end + child._EMBEDDEDRULES[lexer._NAME] = { + ['start_rule'] = start_rule, + token_rule = child:join_tokens(), + ['end_rule'] = end_rule + } + if not lexer._CHILDREN then lexer._CHILDREN = {} end + local children = lexer._CHILDREN + children[#children + 1] = child + -- Add child styles. + for token, style in pairs(child._EXTRASTYLES) do + lexer:add_style(token, style) + end + -- Add child fold symbols. + if child._FOLDPOINTS then + for token_name, symbols in pairs(child._FOLDPOINTS) do + if token_name ~= '_SYMBOLS' then + for symbol, v in pairs(symbols) do + lexer:add_fold_point(token_name, symbol, v) + end + end + end + end + lexer:build_grammar() + child._lexer = lexer -- use parent's tokens if child is embedding itself +end + +--- +-- Lexes a chunk of text *text* (that has an initial style number of +-- *init_style*) using lexer *lexer*, returning a table of token names and +-- positions. +-- @param lexer The lexer to lex text with. +-- @param text The text in the buffer to lex. +-- @param init_style The current style. Multiple-language lexers use this to +-- determine which language to start lexing in. +-- @return table of token names and positions. +-- @name lex +function M.lex(lexer, text, init_style) + if not lexer._GRAMMAR then return {M.DEFAULT, #text + 1} end + if not lexer._LEXBYLINE then + -- For multilang lexers, build a new grammar whose initial_rule is the + -- current language. + if lexer._CHILDREN then + for style, style_num in pairs(lexer._TOKENSTYLES) do + if style_num == init_style then + local lexer_name = style:match('^(.+)_whitespace') or lexer._NAME + if lexer._INITIALRULE ~= lexer_name then + lexer:build_grammar(lexer_name) + end + break + end + end + end + return lpeg_match(lexer._GRAMMAR, text) + else + local tokens = {} + local function append(tokens, line_tokens, offset) + for i = 1, #line_tokens, 2 do + tokens[#tokens + 1] = line_tokens[i] + tokens[#tokens + 1] = line_tokens[i + 1] + offset + end + end + local offset = 0 + local grammar = lexer._GRAMMAR + for line in text:gmatch('[^\r\n]*\r?\n?') do + local line_tokens = lpeg_match(grammar, line) + if line_tokens then append(tokens, line_tokens, offset) end + offset = offset + #line + -- Use the default style to the end of the line if none was specified. + if tokens[#tokens] ~= offset then + tokens[#tokens + 1], tokens[#tokens + 2] = 'default', offset + 1 + end + end + return tokens + end +end + +--- +-- Determines fold points in a chunk of text *text* using lexer *lexer*, +-- returning a table of fold levels associated with line numbers. +-- *text* starts at position *start_pos* on line number *start_line* with a +-- beginning fold level of *start_level* in the buffer. +-- @param lexer The lexer to fold text with. +-- @param text The text in the buffer to fold. +-- @param start_pos The position in the buffer *text* starts at, starting at +-- zero. +-- @param start_line The line number *text* starts on. +-- @param start_level The fold level *text* starts on. +-- @return table of fold levels associated with line numbers. +-- @name fold +function M.fold(lexer, text, start_pos, start_line, start_level) + local folds = {} + if text == '' then return folds end + local fold = M.property_int['fold'] > 0 + local FOLD_BASE = M.FOLD_BASE + local FOLD_HEADER, FOLD_BLANK = M.FOLD_HEADER, M.FOLD_BLANK + if fold and lexer._FOLDPOINTS then + local lines = {} + for p, l in (text..'\n'):gmatch('()(.-)\r?\n') do + lines[#lines + 1] = {p, l} + end + local fold_zero_sum_lines = M.property_int['fold.on.zero.sum.lines'] > 0 + local fold_compact = M.property_int['fold.compact'] > 0 + local fold_points = lexer._FOLDPOINTS + local fold_point_symbols = fold_points._SYMBOLS + local style_at, fold_level = M.style_at, M.fold_level + local line_num, prev_level = start_line, start_level + local current_level = prev_level + for i = 1, #lines do + local pos, line = lines[i][1], lines[i][2] + if line ~= '' then + if lexer._CASEINSENSITIVEFOLDPOINTS then line = line:lower() end + local level_decreased = false + for j = 1, #fold_point_symbols do + local symbol = fold_point_symbols[j] + local word = not symbol:find('[^%w_]') + local s, e = line:find(symbol, 1, true) + while s and e do + --if not word or line:find('^%f[%w_]'..symbol..'%f[^%w_]', s) then + if not word or not ((s > 1 and line:find('^[%w_]', s - 1)) or + line:find('^[%w_]', e + 1)) then + local symbols = fold_points[style_at[start_pos + pos + s - 1]] + local level = symbols and symbols[symbol] + if type(level) == 'function' then + level = level(text, pos, line, s, symbol) + end + if type(level) == 'number' then + current_level = current_level + level + if level < 0 and current_level < prev_level then + -- Potential zero-sum line. If the level were to go back up on + -- the same line, the line may be marked as a fold header. + level_decreased = true + end + end + end + s = line:find(symbol, s + 1, true) + end + end + folds[line_num] = prev_level + if current_level > prev_level then + folds[line_num] = prev_level + FOLD_HEADER + elseif level_decreased and current_level == prev_level and + fold_zero_sum_lines then + if line_num > start_line then + folds[line_num] = prev_level - 1 + FOLD_HEADER + else + -- Typing within a zero-sum line. + local level = fold_level[line_num - 1] - 1 + if level > FOLD_HEADER then level = level - FOLD_HEADER end + if level > FOLD_BLANK then level = level - FOLD_BLANK end + folds[line_num] = level + FOLD_HEADER + current_level = current_level + 1 + end + end + if current_level < FOLD_BASE then current_level = FOLD_BASE end + prev_level = current_level + else + folds[line_num] = prev_level + (fold_compact and FOLD_BLANK or 0) + end + line_num = line_num + 1 + end + elseif fold and (lexer._FOLDBYINDENTATION or + M.property_int['fold.by.indentation'] > 0) then + -- Indentation based folding. + -- Calculate indentation per line. + local indentation = {} + for indent, line in (text..'\n'):gmatch('([\t ]*)([^\r\n]*)\r?\n') do + indentation[#indentation + 1] = line ~= '' and #indent + end + -- Find the first non-blank line before start_line. If the current line is + -- indented, make that previous line a header and update the levels of any + -- blank lines inbetween. If the current line is blank, match the level of + -- the previous non-blank line. + local current_level = start_level + for i = start_line - 1, 0, -1 do + local level = M.fold_level[i] + if level >= FOLD_HEADER then level = level - FOLD_HEADER end + if level < FOLD_BLANK then + local indent = M.indent_amount[i] + if indentation[1] and indentation[1] > indent then + folds[i] = FOLD_BASE + indent + FOLD_HEADER + for j = i + 1, start_line - 1 do + folds[j] = start_level + FOLD_BLANK + end + elseif not indentation[1] then + current_level = FOLD_BASE + indent + end + break + end + end + -- Iterate over lines, setting fold numbers and fold flags. + for i = 1, #indentation do + if indentation[i] then + current_level = FOLD_BASE + indentation[i] + folds[start_line + i - 1] = current_level + for j = i + 1, #indentation do + if indentation[j] then + if FOLD_BASE + indentation[j] > current_level then + folds[start_line + i - 1] = current_level + FOLD_HEADER + current_level = FOLD_BASE + indentation[j] -- for any blanks below + end + break + end + end + else + folds[start_line + i - 1] = current_level + FOLD_BLANK + end + end + else + -- No folding, reset fold levels if necessary. + local current_line = start_line + for _ in text:gmatch('\r?\n') do + folds[current_line] = start_level + current_line = current_line + 1 + end + end + return folds +end + +--- +-- Creates a returns a new lexer with the given name. +-- @param name The lexer's name. +-- @param opts Table of lexer options. Options currently supported: +-- * `lex_by_line`: Whether or not the lexer only processes whole lines of +-- text (instead of arbitrary chunks of text) at a time. +-- Line lexers cannot look ahead to subsequent lines. +-- The default value is `false`. +-- * `fold_by_indentation`: Whether or not the lexer does not define any fold +-- points and that fold points should be calculated based on changes in line +-- indentation. +-- The default value is `false`. +-- * `case_insensitive_fold_points`: Whether or not fold points added via +-- `lexer.add_fold_point()` ignore case. +-- The default value is `false`. +-- * `inherit`: Lexer to inherit from. +-- The default value is `nil`. +-- @usage lexer.new('rhtml', {inherit = lexer.load('html')}) +-- @name new +function M.new(name, opts) + local lexer = { + _NAME = assert(name, 'lexer name expected'), + _LEXBYLINE = opts and opts['lex_by_line'], + _FOLDBYINDENTATION = opts and opts['fold_by_indentation'], + _CASEINSENSITIVEFOLDPOINTS = opts and opts['case_insensitive_fold_points'], + _lexer = opts and opts['inherit'] + } + + -- Create the initial maps for token names to style numbers and styles. + local token_styles = {} + for i = 1, #default do token_styles[default[i]] = i - 1 end + for i = 1, #predefined do token_styles[predefined[i]] = i + 31 end + lexer._TOKENSTYLES, lexer._numstyles = token_styles, #default + lexer._EXTRASTYLES = {} + + return setmetatable(lexer, {__index = { + add_rule = M.add_rule, modify_rule = M.modify_rule, get_rule = M.get_rule, + add_style = M.add_style, add_fold_point = M.add_fold_point, + join_tokens = join_tokens, build_grammar = build_grammar, embed = M.embed, + lex = M.lex, fold = M.fold + }}) +end + +-- Legacy support for older lexers. +-- Processes the `lex._rules`, `lex._tokenstyles`, and `lex._foldsymbols` +-- tables. +-- Since legacy lexers may be processed up to twice, ensure their default styles +-- and rules are not processed more than once. +local function process_legacy_lexer(lexer) + local function warn(msg) --[[io.stderr:write(msg, "\n")]] end + if not lexer._LEGACY then + lexer._LEGACY = true + warn("lexers as tables are deprecated; use 'lexer.new()'") + local token_styles = {} + for i = 1, #default do token_styles[default[i]] = i - 1 end + for i = 1, #predefined do token_styles[predefined[i]] = i + 31 end + lexer._TOKENSTYLES, lexer._numstyles = token_styles, #default + lexer._EXTRASTYLES = {} + setmetatable(lexer, getmetatable(M.new(''))) + if lexer._rules then + warn("lexer '_rules' table is deprecated; use 'add_rule()'") + for i = 1, #lexer._rules do + lexer:add_rule(lexer._rules[i][1], lexer._rules[i][2]) + end + end + end + if lexer._tokenstyles then + warn("lexer '_tokenstyles' table is deprecated; use 'add_style()'") + for token, style in pairs(lexer._tokenstyles) do + -- If this legacy lexer is being processed a second time, only add styles + -- added since the first processing. + if not lexer._TOKENSTYLES[token] then lexer:add_style(token, style) end + end + end + if lexer._foldsymbols then + warn("lexer '_foldsymbols' table is deprecated; use 'add_fold_point()'") + for token_name, symbols in pairs(lexer._foldsymbols) do + if type(symbols) == 'table' and token_name ~= '_patterns' then + for symbol, v in pairs(symbols) do + lexer:add_fold_point(token_name, symbol, v) + end + end + end + if lexer._foldsymbols._case_insensitive then + lexer._CASEINSENSITIVEFOLDPOINTS = true + end + end +end + +local lexers = {} -- cache of loaded lexers +--- +-- Initializes or loads and returns the lexer of string name *name*. +-- Scintilla calls this function in order to load a lexer. Parent lexers also +-- call this function in order to load child lexers and vice-versa. The user +-- calls this function in order to load a lexer when using this module as a Lua +-- library. +-- @param name The name of the lexing language. +-- @param alt_name The alternate name of the lexing language. This is useful for +-- embedding the same child lexer with multiple sets of start and end tokens. +-- @param cache Flag indicating whether or not to load lexers from the cache. +-- This should only be `true` when initially loading a lexer (e.g. not from +-- within another lexer for embedding purposes). +-- The default value is `false`. +-- @return lexer object +-- @name load +function M.load(name, alt_name, cache) + if cache and lexers[alt_name or name] then return lexers[alt_name or name] end + + -- When using this module as a stand-alone module, the `property` and + -- `property_int` tables do not exist (they are not useful). Create them in + -- order prevent errors from occurring. + if not M.property then + M.property, M.property_int = {}, setmetatable({}, { + __index = function(t, k) return tonumber(M.property[k]) or 0 end, + __newindex = function() error('read-only property') end + }) + end + + -- Load the language lexer with its rules, styles, etc. + -- However, replace the default `WHITESPACE` style name with a unique + -- whitespace style name (and then automatically add it afterwards), since + -- embedded lexing relies on these unique whitespace style names. Note that + -- loading embedded lexers changes `WHITESPACE` again, so when adding it + -- later, do not reference the potentially incorrect value. + M.WHITESPACE = (alt_name or name)..'_whitespace' + local lexer = dofile(assert(package.searchpath(name, M.path))) + assert(lexer, string.format("'%s.lua' did not return a lexer", name)) + if alt_name then lexer._NAME = alt_name end + if not getmetatable(lexer) or lexer._LEGACY then + -- A legacy lexer may need to be processed a second time in order to pick up + -- any `_tokenstyles` or `_foldsymbols` added after `lexer.embed_lexer()`. + process_legacy_lexer(lexer) + if lexer._lexer and lexer._lexer._LEGACY then + process_legacy_lexer(lexer._lexer) -- mainly for `_foldsymbols` edits + end + end + lexer:add_style((alt_name or name)..'_whitespace', M.STYLE_WHITESPACE) + + -- If the lexer is a proxy or a child that embedded itself, set the parent to + -- be the main lexer. + if lexer._lexer then + lexer = lexer._lexer + lexer._NAME = alt_name or name + end + + if cache then lexers[alt_name or name] = lexer end + return lexer +end + +-- The following are utility functions lexers will have access to. + +-- Common patterns. +M.any = lpeg_P(1) +M.ascii = lpeg_R('\000\127') +M.extend = lpeg_R('\000\255') +M.alpha = lpeg_R('AZ', 'az') +M.digit = lpeg_R('09') +M.alnum = lpeg_R('AZ', 'az', '09') +M.lower = lpeg_R('az') +M.upper = lpeg_R('AZ') +M.xdigit = lpeg_R('09', 'AF', 'af') +M.cntrl = lpeg_R('\000\031') +M.graph = lpeg_R('!~') +M.print = lpeg_R(' ~') +M.punct = lpeg_R('!/', ':@', '[\'', '{~') +M.space = lpeg_S('\t\v\f\n\r ') + +M.newline = lpeg_S('\r\n\f')^1 +M.nonnewline = 1 - M.newline +M.nonnewline_esc = 1 - (M.newline + '\\') + '\\' * M.any + +M.dec_num = M.digit^1 +M.hex_num = '0' * lpeg_S('xX') * M.xdigit^1 +M.oct_num = '0' * lpeg_R('07')^1 +M.integer = lpeg_S('+-')^-1 * (M.hex_num + M.oct_num + M.dec_num) +M.float = lpeg_S('+-')^-1 * + ((M.digit^0 * '.' * M.digit^1 + M.digit^1 * '.' * M.digit^0) * + (lpeg_S('eE') * lpeg_S('+-')^-1 * M.digit^1)^-1 + + (M.digit^1 * lpeg_S('eE') * lpeg_S('+-')^-1 * M.digit^1)) + +M.word = (M.alpha + '_') * (M.alnum + '_')^0 + +--- +-- Creates and returns a token pattern with token name *name* and pattern +-- *patt*. +-- If *name* is not a predefined token name, its style must be defined via +-- `lexer.add_style()`. +-- @param name The name of token. If this name is not a predefined token name, +-- then a style needs to be assiciated with it via `lexer.add_style()`. +-- @param patt The LPeg pattern associated with the token. +-- @return pattern +-- @usage local ws = token(lexer.WHITESPACE, lexer.space^1) +-- @usage local annotation = token('annotation', '@' * lexer.word) +-- @name token +function M.token(name, patt) + return lpeg_Cc(name) * patt * lpeg_Cp() +end + +--- +-- Creates and returns a pattern that matches a range of text bounded by +-- *chars* characters. +-- This is a convenience function for matching more complicated delimited ranges +-- like strings with escape characters and balanced parentheses. *single_line* +-- indicates whether or not the range must be on a single line, *no_escape* +-- indicates whether or not to ignore '\' as an escape character, and *balanced* +-- indicates whether or not to handle balanced ranges like parentheses and +-- requires *chars* to be composed of two characters. +-- @param chars The character(s) that bound the matched range. +-- @param single_line Optional flag indicating whether or not the range must be +-- on a single line. +-- @param no_escape Optional flag indicating whether or not the range end +-- character may be escaped by a '\\' character. +-- @param balanced Optional flag indicating whether or not to match a balanced +-- range, like the "%b" Lua pattern. This flag only applies if *chars* +-- consists of two different characters (e.g. "()"). +-- @return pattern +-- @usage local dq_str_escapes = lexer.delimited_range('"') +-- @usage local dq_str_noescapes = lexer.delimited_range('"', false, true) +-- @usage local unbalanced_parens = lexer.delimited_range('()') +-- @usage local balanced_parens = lexer.delimited_range('()', false, false, +-- true) +-- @see nested_pair +-- @name delimited_range +function M.delimited_range(chars, single_line, no_escape, balanced) + local s = chars:sub(1, 1) + local e = #chars == 2 and chars:sub(2, 2) or s + local range + local b = balanced and s or '' + local n = single_line and '\n' or '' + if no_escape then + local invalid = lpeg_S(e..n..b) + range = M.any - invalid + else + local invalid = lpeg_S(e..n..b) + '\\' + range = M.any - invalid + '\\' * M.any + end + if balanced and s ~= e then + return lpeg_P{s * (range + lpeg_V(1))^0 * e} + else + return s * range^0 * lpeg_P(e)^-1 + end +end + +--- +-- Creates and returns a pattern that matches pattern *patt* only at the +-- beginning of a line. +-- @param patt The LPeg pattern to match on the beginning of a line. +-- @return pattern +-- @usage local preproc = token(lexer.PREPROCESSOR, lexer.starts_line('#') * +-- lexer.nonnewline^0) +-- @name starts_line +function M.starts_line(patt) + return lpeg_Cmt(lpeg_C(patt), function(input, index, match, ...) + local pos = index - #match + if pos == 1 then return index, ... end + local char = input:sub(pos - 1, pos - 1) + if char == '\n' or char == '\r' or char == '\f' then return index, ... end + end) +end + +--- +-- Creates and returns a pattern that verifies that string set *s* contains the +-- first non-whitespace character behind the current match position. +-- @param s String character set like one passed to `lpeg.S()`. +-- @return pattern +-- @usage local regex = lexer.last_char_includes('+-*!%^&|=,([{') * +-- lexer.delimited_range('/') +-- @name last_char_includes +function M.last_char_includes(s) + s = '['..s:gsub('[-%%%[]', '%%%1')..']' + return lpeg_P(function(input, index) + if index == 1 then return index end + local i = index + while input:sub(i - 1, i - 1):match('[ \t\r\n\f]') do i = i - 1 end + if input:sub(i - 1, i - 1):match(s) then return index end + end) +end + +--- +-- Returns a pattern that matches a balanced range of text that starts with +-- string *start_chars* and ends with string *end_chars*. +-- With single-character delimiters, this function is identical to +-- `delimited_range(start_chars..end_chars, false, true, true)`. +-- @param start_chars The string starting a nested sequence. +-- @param end_chars The string ending a nested sequence. +-- @return pattern +-- @usage local nested_comment = lexer.nested_pair('/*', '*/') +-- @see delimited_range +-- @name nested_pair +function M.nested_pair(start_chars, end_chars) + local s, e = start_chars, lpeg_P(end_chars)^-1 + return lpeg_P{s * (M.any - s - end_chars + lpeg_V(1))^0 * e} +end + +--- +-- Creates and returns a pattern that matches any single word in string *words*. +-- *case_insensitive* indicates whether or not to ignore case when matching +-- words. +-- This is a convenience function for simplifying a set of ordered choice word +-- patterns. +-- If *words* is a multi-line string, it may contain Lua line comments (`--`) +-- that will ultimately be ignored. +-- @param words A string list of words separated by spaces. +-- @param case_insensitive Optional boolean flag indicating whether or not the +-- word match is case-insensitive. The default value is `false`. +-- @param word_chars Unused legacy parameter. +-- @return pattern +-- @usage local keyword = token(lexer.KEYWORD, word_match[[foo bar baz]]) +-- @usage local keyword = token(lexer.KEYWORD, word_match([[foo-bar foo-baz +-- bar-foo bar-baz baz-foo baz-bar]], true)) +-- @name word_match +function M.word_match(words, case_insensitive, word_chars) + local word_list = {} + if type(words) == 'table' then + -- Legacy `word_match(word_list, word_chars, case_insensitive)` form. + words = table.concat(words, ' ') + word_chars, case_insensitive = case_insensitive, word_chars + end + for word in words:gsub('%-%-[^\n]+', ''):gmatch('%S+') do + word_list[case_insensitive and word:lower() or word] = true + for char in word:gmatch('[^%w_]') do + if not (word_chars or ''):find(char, 1, true) then + word_chars = (word_chars or '')..char + end + end + end + local chars = M.alnum + '_' + if (word_chars or '') ~= '' then chars = chars + lpeg_S(word_chars) end + return lpeg_Cmt(chars^1, function(input, index, word) + if case_insensitive then word = word:lower() end + return word_list[word] and index or nil + end) +end + +-- Deprecated legacy function. Use `parent:embed()` instead. +-- Embeds child lexer *child* in parent lexer *parent* using patterns +-- *start_rule* and *end_rule*, which signal the beginning and end of the +-- embedded lexer, respectively. +-- @param parent The parent lexer. +-- @param child The child lexer. +-- @param start_rule The pattern that signals the beginning of the embedded +-- lexer. +-- @param end_rule The pattern that signals the end of the embedded lexer. +-- @usage lexer.embed_lexer(M, css, css_start_rule, css_end_rule) +-- @usage lexer.embed_lexer(html, M, php_start_rule, php_end_rule) +-- @usage lexer.embed_lexer(html, ruby, ruby_start_rule, ruby_end_rule) +-- @see embed +-- @name embed_lexer +function M.embed_lexer(parent, child, start_rule, end_rule) + if not getmetatable(parent) then process_legacy_lexer(parent) end + if not getmetatable(child) then process_legacy_lexer(child) end + parent:embed(child, start_rule, end_rule) +end + +-- Determines if the previous line is a comment. +-- This is used for determining if the current comment line is a fold point. +-- @param prefix The prefix string defining a comment. +-- @param text The text passed to a fold function. +-- @param pos The pos passed to a fold function. +-- @param line The line passed to a fold function. +-- @param s The s passed to a fold function. +local function prev_line_is_comment(prefix, text, pos, line, s) + local start = line:find('%S') + if start < s and not line:find(prefix, start, true) then return false end + local p = pos - 1 + if text:sub(p, p) == '\n' then + p = p - 1 + if text:sub(p, p) == '\r' then p = p - 1 end + if text:sub(p, p) ~= '\n' then + while p > 1 and text:sub(p - 1, p - 1) ~= '\n' do p = p - 1 end + while text:sub(p, p):find('^[\t ]$') do p = p + 1 end + return text:sub(p, p + #prefix - 1) == prefix + end + end + return false +end + +-- Determines if the next line is a comment. +-- This is used for determining if the current comment line is a fold point. +-- @param prefix The prefix string defining a comment. +-- @param text The text passed to a fold function. +-- @param pos The pos passed to a fold function. +-- @param line The line passed to a fold function. +-- @param s The s passed to a fold function. +local function next_line_is_comment(prefix, text, pos, line, s) + local p = text:find('\n', pos + s) + if p then + p = p + 1 + while text:sub(p, p):find('^[\t ]$') do p = p + 1 end + return text:sub(p, p + #prefix - 1) == prefix + end + return false +end + +--- +-- Returns a fold function (to be passed to `lexer.add_fold_point()`) that folds +-- consecutive line comments that start with string *prefix*. +-- @param prefix The prefix string defining a line comment. +-- @usage lex:add_fold_point(lexer.COMMENT, '--', +-- lexer.fold_line_comments('--')) +-- @usage lex:add_fold_point(lexer.COMMENT, '//', +-- lexer.fold_line_comments('//')) +-- @name fold_line_comments +function M.fold_line_comments(prefix) + local property_int = M.property_int + return function(text, pos, line, s) + if property_int['fold.line.comments'] == 0 then return 0 end + if s > 1 and line:match('^%s*()') < s then return 0 end + local prev_line_comment = prev_line_is_comment(prefix, text, pos, line, s) + local next_line_comment = next_line_is_comment(prefix, text, pos, line, s) + if not prev_line_comment and next_line_comment then return 1 end + if prev_line_comment and not next_line_comment then return -1 end + return 0 + end +end + +M.property_expanded = setmetatable({}, { + -- Returns the string property value associated with string property *key*, + -- replacing any "$()" and "%()" expressions with the values of their keys. + __index = function(t, key) + return M.property[key]:gsub('[$%%]%b()', function(key) + return t[key:sub(3, -2)] + end) + end, + __newindex = function() error('read-only property') end +}) + +--[[ The functions and fields below were defined in C. + +--- +-- Returns the line number of the line that contains position *pos*, which +-- starts from 1. +-- @param pos The position to get the line number of. +-- @return number +local function line_from_position(pos) end +]] + +return M diff --git a/lexlua/lexer2.lua b/lexlua/lexer2.lua new file mode 100644 index 000000000..b32240aab --- /dev/null +++ b/lexlua/lexer2.lua @@ -0,0 +1,1723 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. + +local M = {} + +--[=[ This comment is for LuaDoc. +--- +-- Lexes Scintilla documents and source code with Lua and LPeg. +-- +-- ## Overview +-- +-- Lexers highlight the syntax of source code. Scintilla (the editing component +-- behind [Textadept][] and [SciTE][]) traditionally uses static, compiled C++ +-- lexers which are notoriously difficult to create and/or extend. On the other +-- hand, Lua makes it easy to to rapidly create new lexers, extend existing +-- ones, and embed lexers within one another. Lua lexers tend to be more +-- readable than C++ lexers too. +-- +-- Lexers are Parsing Expression Grammars, or PEGs, composed with the Lua +-- [LPeg library][]. The following table comes from the LPeg documentation and +-- summarizes all you need to know about constructing basic LPeg patterns. This +-- module provides convenience functions for creating and working with other +-- more advanced patterns and concepts. +-- +-- Operator | Description +-- ---------------------|------------ +-- `lpeg.P(string)` | Matches `string` literally. +-- `lpeg.P(`_`n`_`)` | Matches exactly _`n`_ characters. +-- `lpeg.S(string)` | Matches any character in set `string`. +-- `lpeg.R("`_`xy`_`")` | Matches any character between range `x` and `y`. +-- `patt^`_`n`_ | Matches at least _`n`_ repetitions of `patt`. +-- `patt^-`_`n`_ | Matches at most _`n`_ repetitions of `patt`. +-- `patt1 * patt2` | Matches `patt1` followed by `patt2`. +-- `patt1 + patt2` | Matches `patt1` or `patt2` (ordered choice). +-- `patt1 - patt2` | Matches `patt1` if `patt2` does not match. +-- `-patt` | Equivalent to `("" - patt)`. +-- `#patt` | Matches `patt` but consumes no input. +-- +-- The first part of this document deals with rapidly constructing a simple +-- lexer. The next part deals with more advanced techniques, such as custom +-- coloring and embedding lexers within one another. Following that is a +-- discussion about code folding, or being able to tell Scintilla which code +-- blocks are "foldable" (temporarily hideable from view). After that are +-- instructions on how to use LPeg lexers with the aforementioned Textadept and +-- SciTE editors. Finally there are comments on lexer performance and +-- limitations. +-- +-- [LPeg library]: http://www.inf.puc-rio.br/~roberto/lpeg/lpeg.html +-- [Textadept]: http://foicica.com/textadept +-- [SciTE]: http://scintilla.org/SciTE.html +-- +-- ## Lexer Basics +-- +-- The *lexers/* directory contains all lexers, including your new one. Before +-- attempting to write one from scratch though, first determine if your +-- programming language is similar to any of the 100+ languages supported. If +-- so, you may be able to copy and modify that lexer, saving some time and +-- effort. The filename of your lexer should be the name of your programming +-- language in lower case followed by a *.lua* extension. For example, a new Lua +-- lexer has the name *lua.lua*. +-- +-- Note: Try to refrain from using one-character language names like "c", "d", +-- or "r". For example, Scintillua uses "ansi_c", "dmd", and "rstats", +-- respectively. +-- +-- ### New Lexer Template +-- +-- There is a *lexers/template.txt* file that contains a simple template for a +-- new lexer. Feel free to use it, replacing the '?'s with the name of your +-- lexer: +-- +-- -- ? LPeg lexer. +-- +-- local l = require('lexer') +-- local token, word_match = l.token, l.word_match +-- local P, R, S = lpeg.P, lpeg.R, lpeg.S +-- +-- local lexer = l.new('?') +-- +-- -- Whitespace. +-- local ws = token(l.WHITESPACE, l.space^1) +-- lexer:add_rule('whitespace', ws) +-- +-- return lexer +-- +-- The first 3 lines of code simply define often used convenience variables. The +-- fourth and last lines [define](#lexer.new) and return the lexer object +-- Scintilla uses; they are very important and must be part of every lexer. The +-- fifth line defines something called a "token", an essential building block of +-- lexers. You will learn about tokens shortly. The sixth line defines a lexer +-- grammar rule, which you will learn about later, as well as token styles. +-- Note, however, the `local` prefix in front of variables, which is needed +-- so-as not to affect Lua's global environment. All in all, this is a minimal, +-- working lexer that you can build on. +-- +-- ### Tokens +-- +-- Take a moment to think about your programming language's structure. What kind +-- of key elements does it have? In the template shown earlier, one predefined +-- element all languages have is whitespace. Your language probably also has +-- elements like comments, strings, and keywords. Lexers refer to these elements +-- as "tokens". Tokens are the fundamental "building blocks" of lexers. Lexers +-- break down source code into tokens for coloring, which results in the syntax +-- highlighting familiar to you. It is up to you how specific your lexer is when +-- it comes to tokens. Perhaps only distinguishing between keywords and +-- identifiers is necessary, or maybe recognizing constants and built-in +-- functions, methods, or libraries is desirable. The Lua lexer, for example, +-- defines 11 tokens: whitespace, keywords, built-in functions, constants, +-- built-in libraries, identifiers, strings, comments, numbers, labels, and +-- operators. Even though constants, built-in functions, and built-in libraries +-- are subsets of identifiers, Lua programmers find it helpful for the lexer to +-- distinguish between them all. It is perfectly acceptable to just recognize +-- keywords and identifiers. +-- +-- In a lexer, tokens consist of a token name and an LPeg pattern that matches a +-- sequence of characters recognized as an instance of that token. Create tokens +-- using the [`lexer.token()`]() function. Let us examine the "whitespace" token +-- defined in the template shown earlier: +-- +-- local ws = token(l.WHITESPACE, l.space^1) +-- +-- At first glance, the first argument does not appear to be a string name and +-- the second argument does not appear to be an LPeg pattern. Perhaps you +-- expected something like: +-- +-- local ws = token('whitespace', S('\t\v\f\n\r ')^1) +-- +-- The `lexer` (`l`) module actually provides a convenient list of common token +-- names and common LPeg patterns for you to use. Token names include +-- [`lexer.DEFAULT`](), [`lexer.WHITESPACE`](), [`lexer.COMMENT`](), +-- [`lexer.STRING`](), [`lexer.NUMBER`](), [`lexer.KEYWORD`](), +-- [`lexer.IDENTIFIER`](), [`lexer.OPERATOR`](), [`lexer.ERROR`](), +-- [`lexer.PREPROCESSOR`](), [`lexer.CONSTANT`](), [`lexer.VARIABLE`](), +-- [`lexer.FUNCTION`](), [`lexer.CLASS`](), [`lexer.TYPE`](), [`lexer.LABEL`](), +-- [`lexer.REGEX`](), and [`lexer.EMBEDDED`](). Patterns include +-- [`lexer.any`](), [`lexer.ascii`](), [`lexer.extend`](), [`lexer.alpha`](), +-- [`lexer.digit`](), [`lexer.alnum`](), [`lexer.lower`](), [`lexer.upper`](), +-- [`lexer.xdigit`](), [`lexer.cntrl`](), [`lexer.graph`](), [`lexer.print`](), +-- [`lexer.punct`](), [`lexer.space`](), [`lexer.newline`](), +-- [`lexer.nonnewline`](), [`lexer.nonnewline_esc`](), [`lexer.dec_num`](), +-- [`lexer.hex_num`](), [`lexer.oct_num`](), [`lexer.integer`](), +-- [`lexer.float`](), and [`lexer.word`](). You may use your own token names if +-- none of the above fit your language, but an advantage to using predefined +-- token names is that your lexer's tokens will inherit the universal syntax +-- highlighting color theme used by your text editor. +-- +-- #### Example Tokens +-- +-- So, how might you define other tokens like keywords, comments, and strings? +-- Here are some examples. +-- +-- **Keywords** +-- +-- Instead of matching _n_ keywords with _n_ `P('keyword_`_`n`_`')` ordered +-- choices, use another convenience function: [`lexer.word_match()`](). It is +-- much easier and more efficient to write word matches like: +-- +-- local keyword = token(l.KEYWORD, l.word_match[[ +-- keyword_1 keyword_2 ... keyword_n +-- ]]) +-- +-- local case_insensitive_keyword = token(l.KEYWORD, l.word_match([[ +-- KEYWORD_1 keyword_2 ... KEYword_n +-- ]], true)) +-- +-- local hyphened_keyword = token(l.KEYWORD, l.word_match[[ +-- keyword-1 keyword-2 ... keyword-n +-- ]]) +-- +-- **Comments** +-- +-- Line-style comments with a prefix character(s) are easy to express with LPeg: +-- +-- local shell_comment = token(l.COMMENT, '#' * l.nonnewline^0) +-- local c_line_comment = token(l.COMMENT, '//' * l.nonnewline_esc^0) +-- +-- The comments above start with a '#' or "//" and go to the end of the line. +-- The second comment recognizes the next line also as a comment if the current +-- line ends with a '\' escape character. +-- +-- C-style "block" comments with a start and end delimiter are also easy to +-- express: +-- +-- local c_comment = token(l.COMMENT, '/*' * (l.any - '*/')^0 * P('*/')^-1) +-- +-- This comment starts with a "/\*" sequence and contains anything up to and +-- including an ending "\*/" sequence. The ending "\*/" is optional so the lexer +-- can recognize unfinished comments as comments and highlight them properly. +-- +-- **Strings** +-- +-- It is tempting to think that a string is not much different from the block +-- comment shown above in that both have start and end delimiters: +-- +-- local dq_str = '"' * (l.any - '"')^0 * P('"')^-1 +-- local sq_str = "'" * (l.any - "'")^0 * P("'")^-1 +-- local simple_string = token(l.STRING, dq_str + sq_str) +-- +-- However, most programming languages allow escape sequences in strings such +-- that a sequence like "\\"" in a double-quoted string indicates that the +-- '"' is not the end of the string. The above token incorrectly matches +-- such a string. Instead, use the [`lexer.delimited_range()`]() convenience +-- function. +-- +-- local dq_str = l.delimited_range('"') +-- local sq_str = l.delimited_range("'") +-- local string = token(l.STRING, dq_str + sq_str) +-- +-- In this case, the lexer treats '\' as an escape character in a string +-- sequence. +-- +-- **Numbers** +-- +-- Most programming languages have the same format for integer and float tokens, +-- so it might be as simple as using a couple of predefined LPeg patterns: +-- +-- local number = token(l.NUMBER, l.float + l.integer) +-- +-- However, some languages allow postfix characters on integers. +-- +-- local integer = P('-')^-1 * (l.dec_num * S('lL')^-1) +-- local number = token(l.NUMBER, l.float + l.hex_num + integer) +-- +-- Your language may need other tweaks, but it is up to you how fine-grained you +-- want your highlighting to be. After all, you are not writing a compiler or +-- interpreter! +-- +-- ### Rules +-- +-- Programming languages have grammars, which specify valid token structure. For +-- example, comments usually cannot appear within a string. Grammars consist of +-- rules, which are simply combinations of tokens. Recall from the lexer +-- template the [`lexer:add_rule()`]() call, which adds a rule to the lexer's +-- grammar: +-- +-- lexer:add_rule('whitespace', ws) +-- +-- Each rule has an associated name, but rule names are completely arbitrary and +-- serve only to identify and distinguish between different rules. Rule order is +-- important: if text does not match the first rule added to the grammar, the +-- lexer tries to match the second rule added, and so on. Right now this lexer +-- simply matches whitespace tokens under a rule named "whitespace". +-- +-- To illustrate the importance of rule order, here is an example of a +-- simplified Lua lexer: +-- +-- lexer:add_rule('whitespace', token(l.WHITESPACE, ...)) +-- lexer:add_rule('keyword', token(l.KEYWORD, ...)) +-- lexer:add_rule('identifier', token(l.IDENTIFIER, ...)) +-- lexer:add_rule('string', token(l.STRING, ...)) +-- lexer:add_rule('comment', token(l.COMMENT, ...)) +-- lexer:add_rule('number', token(l.NUMBER, ...)) +-- lexer:add_rule('label', token(l.LABEL, ...)) +-- lexer:add_rule('operator', token(l.OPERATOR, ...)) +-- +-- Note how identifiers come after keywords. In Lua, as with most programming +-- languages, the characters allowed in keywords and identifiers are in the same +-- set (alphanumerics plus underscores). If the lexer added the "identifier" +-- rule before the "keyword" rule, all keywords would match identifiers and thus +-- incorrectly highlight as identifiers instead of keywords. The same idea +-- applies to function, constant, etc. tokens that you may want to distinguish +-- between: their rules should come before identifiers. +-- +-- So what about text that does not match any rules? For example in Lua, the '!' +-- character is meaningless outside a string or comment. Normally the lexer +-- skips over such text. If instead you want to highlight these "syntax errors", +-- add an additional end rule: +-- +-- lexer:add_rule('whitespace', ws) +-- ... +-- lexer:add_rule('error', token(l.ERROR, l.any)) +-- +-- This identifies and highlights any character not matched by an existing +-- rule as a `lexer.ERROR` token. +-- +-- Even though the rules defined in the examples above contain a single token, +-- rules may consist of multiple tokens. For example, a rule for an HTML tag +-- could consist of a tag token followed by an arbitrary number of attribute +-- tokens, allowing the lexer to highlight all tokens separately. That rule +-- might look something like this: +-- +-- lexer:add_rule('tag', tag_start * (ws * attributes)^0 * tag_end^-1) +-- +-- Note however that lexers with complex rules like these are more prone to lose +-- track of their state, especially if they span multiple lines. +-- +-- ### Summary +-- +-- Lexers primarily consist of tokens and grammar rules. At your disposal are a +-- number of convenience patterns and functions for rapidly creating a lexer. If +-- you choose to use predefined token names for your tokens, you do not have to +-- define how the lexer highlights them. The tokens will inherit the default +-- syntax highlighting color theme your editor uses. +-- +-- ## Advanced Techniques +-- +-- ### Styles and Styling +-- +-- The most basic form of syntax highlighting is assigning different colors to +-- different tokens. Instead of highlighting with just colors, Scintilla allows +-- for more rich highlighting, or "styling", with different fonts, font sizes, +-- font attributes, and foreground and background colors, just to name a few. +-- The unit of this rich highlighting is called a "style". Styles are simply +-- strings of comma-separated property settings. By default, lexers associate +-- predefined token names like `lexer.WHITESPACE`, `lexer.COMMENT`, +-- `lexer.STRING`, etc. with particular styles as part of a universal color +-- theme. These predefined styles include [`lexer.STYLE_CLASS`](), +-- [`lexer.STYLE_COMMENT`](), [`lexer.STYLE_CONSTANT`](), +-- [`lexer.STYLE_ERROR`](), [`lexer.STYLE_EMBEDDED`](), +-- [`lexer.STYLE_FUNCTION`](), [`lexer.STYLE_IDENTIFIER`](), +-- [`lexer.STYLE_KEYWORD`](), [`lexer.STYLE_LABEL`](), [`lexer.STYLE_NUMBER`](), +-- [`lexer.STYLE_OPERATOR`](), [`lexer.STYLE_PREPROCESSOR`](), +-- [`lexer.STYLE_REGEX`](), [`lexer.STYLE_STRING`](), [`lexer.STYLE_TYPE`](), +-- [`lexer.STYLE_VARIABLE`](), and [`lexer.STYLE_WHITESPACE`](). Like with +-- predefined token names and LPeg patterns, you may define your own styles. At +-- their core, styles are just strings, so you may create new ones and/or modify +-- existing ones. Each style consists of the following comma-separated settings: +-- +-- Setting | Description +-- ---------------|------------ +-- font:_name_ | The name of the font the style uses. +-- size:_int_ | The size of the font the style uses. +-- [not]bold | Whether or not the font face is bold. +-- weight:_int_ | The weight or boldness of a font, between 1 and 999. +-- [not]italics | Whether or not the font face is italic. +-- [not]underlined| Whether or not the font face is underlined. +-- fore:_color_ | The foreground color of the font face. +-- back:_color_ | The background color of the font face. +-- [not]eolfilled | Does the background color extend to the end of the line? +-- case:_char_ | The case of the font ('u': upper, 'l': lower, 'm': normal). +-- [not]visible | Whether or not the text is visible. +-- [not]changeable| Whether the text is changeable or read-only. +-- +-- Specify font colors in either "#RRGGBB" format, "0xBBGGRR" format, or the +-- decimal equivalent of the latter. As with token names, LPeg patterns, and +-- styles, there is a set of predefined color names, but they vary depending on +-- the current color theme in use. Therefore, it is generally not a good idea to +-- manually define colors within styles in your lexer since they might not fit +-- into a user's chosen color theme. Try to refrain from even using predefined +-- colors in a style because that color may be theme-specific. Instead, the best +-- practice is to either use predefined styles or derive new color-agnostic +-- styles from predefined ones. For example, Lua "longstring" tokens use the +-- existing `lexer.STYLE_STRING` style instead of defining a new one. +-- +-- #### Example Styles +-- +-- Defining styles is pretty straightforward. An empty style that inherits the +-- default theme settings is simply an empty string: +-- +-- local style_nothing = '' +-- +-- A similar style but with a bold font face looks like this: +-- +-- local style_bold = 'bold' +-- +-- If you want the same style, but also with an italic font face, define the new +-- style in terms of the old one: +-- +-- local style_bold_italic = style_bold..',italics' +-- +-- This allows you to derive new styles from predefined ones without having to +-- rewrite them. This operation leaves the old style unchanged. Thus if you +-- had a "static variable" token whose style you wanted to base off of +-- `lexer.STYLE_VARIABLE`, it would probably look like: +-- +-- local style_static_var = l.STYLE_VARIABLE..',italics' +-- +-- The color theme files in the *lexers/themes/* folder give more examples of +-- style definitions. +-- +-- ### Token Styles +-- +-- Lexers use the [`lexer:add_style()`]() function to assign styles to +-- particular tokens. Recall the token definition and from the lexer template: +-- +-- local ws = token(l.WHITESPACE, l.space^1) +-- lexer:add_rule('whitespace', ws) +-- +-- Why is a style not assigned to the `lexer.WHITESPACE` token? As mentioned +-- earlier, lexers automatically associate tokens that use predefined token +-- names with a particular style. Only tokens with custom token names need +-- manual style associations. As an example, consider a custom whitespace token: +-- +-- local ws = token('custom_whitespace', l.space^1) +-- +-- Assigning a style to this token looks like: +-- +-- lexer:add_style('custom_whitespace', l.STYLE_WHITESPACE) +-- +-- Do not confuse token names with rule names. They are completely different +-- entities. In the example above, the lexer associates the "custom_whitespace" +-- token with the existing style for `lexer.WHITESPACE` tokens. If instead you +-- prefer to color the background of whitespace a shade of grey, it might look +-- like: +-- +-- local custom_style = l.STYLE_WHITESPACE..',back:$(color.grey)' +-- lexer:add_style('custom_whitespace', custom_style) +-- +-- Notice that the lexer peforms Scintilla/SciTE-style "$()" property expansion. +-- You may also use "%()". Remember to refrain from assigning specific colors in +-- styles, but in this case, all user color themes probably define the +-- "color.grey" property. +-- +-- ### Line Lexers +-- +-- By default, lexers match the arbitrary chunks of text passed to them by +-- Scintilla. These chunks may be a full document, only the visible part of a +-- document, or even just portions of lines. Some lexers need to match whole +-- lines. For example, a lexer for the output of a file "diff" needs to know if +-- the line started with a '+' or '-' and then style the entire line +-- accordingly. To indicate that your lexer matches by line, create the lexer +-- with an extra parameter: +-- +-- local lexer = l.new('?', {lex_by_line = true}) +-- +-- Now the input text for the lexer is a single line at a time. Keep in mind +-- that line lexers do not have the ability to look ahead at subsequent lines. +-- +-- ### Embedded Lexers +-- +-- Lexers embed within one another very easily, requiring minimal effort. In the +-- following sections, the lexer being embedded is called the "child" lexer and +-- the lexer a child is being embedded in is called the "parent". For example, +-- consider an HTML lexer and a CSS lexer. Either lexer stands alone for styling +-- their respective HTML and CSS files. However, CSS can be embedded inside +-- HTML. In this specific case, the CSS lexer is the "child" lexer with the HTML +-- lexer being the "parent". Now consider an HTML lexer and a PHP lexer. This +-- sounds a lot like the case with CSS, but there is a subtle difference: PHP +-- _embeds itself into_ HTML while CSS is _embedded in_ HTML. This fundamental +-- difference results in two types of embedded lexers: a parent lexer that +-- embeds other child lexers in it (like HTML embedding CSS), and a child lexer +-- that embeds itself into a parent lexer (like PHP embedding itself in HTML). +-- +-- #### Parent Lexer +-- +-- Before embedding a child lexer into a parent lexer, the parent lexer needs to +-- load the child lexer. This is done with the [`lexer.load()`]() function. For +-- example, loading the CSS lexer within the HTML lexer looks like: +-- +-- local css = l.load('css') +-- +-- The next part of the embedding process is telling the parent lexer when to +-- switch over to the child lexer and when to switch back. The lexer refers to +-- these indications as the "start rule" and "end rule", respectively, and are +-- just LPeg patterns. Continuing with the HTML/CSS example, the transition from +-- HTML to CSS is when the lexer encounters a "style" tag with a "type" +-- attribute whose value is "text/css": +-- +-- local css_tag = P('<style') * P(function(input, index) +-- if input:find('^[^>]+type="text/css"', index) then +-- return index +-- end +-- end) +-- +-- This pattern looks for the beginning of a "style" tag and searches its +-- attribute list for the text "`type="text/css"`". (In this simplified example, +-- the Lua pattern does not consider whitespace between the '=' nor does it +-- consider that using single quotes is valid.) If there is a match, the +-- functional pattern returns a value instead of `nil`. In this case, the value +-- returned does not matter because we ultimately want to style the "style" tag +-- as an HTML tag, so the actual start rule looks like this: +-- +-- local css_start_rule = #css_tag * tag +-- +-- Now that the parent knows when to switch to the child, it needs to know when +-- to switch back. In the case of HTML/CSS, the switch back occurs when the +-- lexer encounters an ending "style" tag, though the lexer should still style +-- the tag as an HTML tag: +-- +-- local css_end_rule = #P('</style>') * tag +-- +-- Once the parent loads the child lexer and defines the child's start and end +-- rules, it embeds the child with the [`lexer:embed()`]() function: +-- +-- lexer:embed(css, css_start_rule, css_end_rule) +-- +-- #### Child Lexer +-- +-- The process for instructing a child lexer to embed itself into a parent is +-- very similar to embedding a child into a parent: first, load the parent lexer +-- into the child lexer with the [`lexer.load()`]() function and then create +-- start and end rules for the child lexer. However, in this case, call +-- [`lexer:embed()`]() with switched arguments. For example, in the PHP lexer: +-- +-- local html = l.load('html') +-- local php_start_rule = token('php_tag', '<?php ') +-- local php_end_rule = token('php_tag', '?>') +-- lexer:add_style('php_tag', l.STYLE_EMBEDDED) +-- html:embed(lexer, php_start_rule, php_end_rule) +-- +-- ### Lexers with Complex State +-- +-- A vast majority of lexers are not stateful and can operate on any chunk of +-- text in a document. However, there may be rare cases where a lexer does need +-- to keep track of some sort of persistent state. Rather than using `lpeg.P` +-- function patterns that set state variables, it is recommended to make use of +-- Scintilla's built-in, per-line state integers via [`lexer.line_state`](). It +-- was designed to accommodate up to 32 bit flags for tracking state. +-- [`lexer.line_from_position()`]() will return the line for any position given +-- to an `lpeg.P` function pattern. (Any positions derived from that position +-- argument will also work.) +-- +-- Writing stateful lexers is beyond the scope of this document. +-- +-- ## Code Folding +-- +-- When reading source code, it is occasionally helpful to temporarily hide +-- blocks of code like functions, classes, comments, etc. This is the concept of +-- "folding". In the Textadept and SciTE editors for example, little indicators +-- in the editor margins appear next to code that can be folded at places called +-- "fold points". When the user clicks an indicator, the editor hides the code +-- associated with the indicator until the user clicks the indicator again. The +-- lexer specifies these fold points and what code exactly to fold. +-- +-- The fold points for most languages occur on keywords or character sequences. +-- Examples of fold keywords are "if" and "end" in Lua and examples of fold +-- character sequences are '{', '}', "/\*", and "\*/" in C for code block and +-- comment delimiters, respectively. However, these fold points cannot occur +-- just anywhere. For example, lexers should not recognize fold keywords that +-- appear within strings or comments. The [`lexer:add_fold_point()`]() function +-- allows you to conveniently define fold points with such granularity. For +-- example, consider C: +-- +-- lexer:add_fold_point(l.OPERATOR, '{', '}') +-- lexer:add_fold_point(l.COMMENT, '/*', '*/') +-- +-- The first assignment states that any '{' or '}' that the lexer recognized as +-- an `lexer.OPERATOR` token is a fold point. Likewise, the second assignment +-- states that any "/\*" or "\*/" that the lexer recognizes as part of a +-- `lexer.COMMENT` token is a fold point. The lexer does not consider any +-- occurrences of these characters outside their defined tokens (such as in a +-- string) as fold points. How do you specify fold keywords? Here is an example +-- for Lua: +-- +-- lexer:add_fold_point(l.KEYWORD, 'if', 'end') +-- lexer:add_fold_point(l.KEYWORD, 'do', 'end') +-- lexer:add_fold_point(l.KEYWORD, 'function', 'end') +-- lexer:add_fold_point(l.KEYWORD, 'repeat', 'until') +-- +-- If your lexer has case-insensitive keywords as fold points, simply add a +-- `case_insensitive_fold_points = true` option to [`lexer.new()`](), and +-- specify keywords in lower case. +-- +-- If your lexer needs to do some additional processing in order to determine if +-- a token is a fold point, pass a function that returns an integer to +-- `lexer:add_fold_point()`. Returning `1` indicates the token is a beginning +-- fold point and returning `-1` indicates the token is an ending fold point. +-- Returning `0` indicates the token is not a fold point. For example: +-- +-- local function fold_strange_token(text, pos, line, s, symbol) +-- if ... then +-- return 1 -- beginning fold point +-- elseif ... then +-- return -1 -- ending fold point +-- end +-- return 0 +-- end +-- +-- lexer:add_fold_point('strange_token', '|', fold_strange_token) +-- +-- Any time the lexer encounters a '|' that is a "strange_token", it calls the +-- `fold_strange_token` function to determine if '|' is a fold point. The lexer +-- calls these functions with the following arguments: the text to identify fold +-- points in, the beginning position of the current line in the text to fold, +-- the current line's text, the position in the current line the fold point text +-- starts at, and the fold point text itself. +-- +-- ### Fold by Indentation +-- +-- Some languages have significant whitespace and/or no delimiters that indicate +-- fold points. If your lexer falls into this category and you would like to +-- mark fold points based on changes in indentation, create the lexer with a +-- `fold_by_indentation = true` option: +-- +-- local lexer = l.new('?', {fold_by_indentation = true}) +-- +-- ## Using Lexers +-- +-- ### Textadept +-- +-- Put your lexer in your *~/.textadept/lexers/* directory so you do not +-- overwrite it when upgrading Textadept. Also, lexers in this directory +-- override default lexers. Thus, Textadept loads a user *lua* lexer instead of +-- the default *lua* lexer. This is convenient for tweaking a default lexer to +-- your liking. Then add a [file type][] for your lexer if necessary. +-- +-- [file type]: textadept.file_types.html +-- +-- ### SciTE +-- +-- Create a *.properties* file for your lexer and `import` it in either your +-- *SciTEUser.properties* or *SciTEGlobal.properties*. The contents of the +-- *.properties* file should contain: +-- +-- file.patterns.[lexer_name]=[file_patterns] +-- lexer.$(file.patterns.[lexer_name])=[lexer_name] +-- +-- where `[lexer_name]` is the name of your lexer (minus the *.lua* extension) +-- and `[file_patterns]` is a set of file extensions to use your lexer for. +-- +-- Please note that Lua lexers ignore any styling information in *.properties* +-- files. Your theme file in the *lexers/themes/* directory contains styling +-- information. +-- +-- ## Considerations +-- +-- ### Performance +-- +-- There might be some slight overhead when initializing a lexer, but loading a +-- file from disk into Scintilla is usually more expensive. On modern computer +-- systems, I see no difference in speed between LPeg lexers and Scintilla's C++ +-- ones. Optimize lexers for speed by re-arranging `lexer:add_rule()` calls so +-- that the most common rules match first. Do keep in mind that order matters +-- for similar rules. +-- +-- In some cases, folding may be far more expensive than lexing, particularly +-- in lexers with a lot of potential fold points. If your lexer is exhibiting +-- signs of slowness, try disabling folding your text editor first. If that +-- speeds things up, you can try reducing the number of fold points you added, +-- overriding `lexer:fold()` with your own implementation, or simply eliminating +-- folding support from your lexer. +-- +-- ### Limitations +-- +-- Embedded preprocessor languages like PHP cannot completely embed in their +-- parent languages in that the parent's tokens do not support start and end +-- rules. This mostly goes unnoticed, but code like +-- +-- <div id="<?php echo $id; ?>"> +-- +-- will not style correctly. +-- +-- ### Troubleshooting +-- +-- Errors in lexers can be tricky to debug. Lexers print Lua errors to +-- `io.stderr` and `_G.print()` statements to `io.stdout`. Running your editor +-- from a terminal is the easiest way to see errors as they occur. +-- +-- ### Risks +-- +-- Poorly written lexers have the ability to crash Scintilla (and thus its +-- containing application), so unsaved data might be lost. However, I have only +-- observed these crashes in early lexer development, when syntax errors or +-- pattern errors are present. Once the lexer actually starts styling text +-- (either correctly or incorrectly, it does not matter), I have not observed +-- any crashes. +-- +-- ### Acknowledgements +-- +-- Thanks to Peter Odding for his [lexer post][] on the Lua mailing list +-- that inspired me, and thanks to Roberto Ierusalimschy for LPeg. +-- +-- [lexer post]: http://lua-users.org/lists/lua-l/2007-04/msg00116.html +-- @field path (string) +-- The path used to search for a lexer to load. +-- Identical in format to Lua's `package.path` string. +-- The default value is `package.path`. +-- @field DEFAULT (string) +-- The token name for default tokens. +-- @field WHITESPACE (string) +-- The token name for whitespace tokens. +-- @field COMMENT (string) +-- The token name for comment tokens. +-- @field STRING (string) +-- The token name for string tokens. +-- @field NUMBER (string) +-- The token name for number tokens. +-- @field KEYWORD (string) +-- The token name for keyword tokens. +-- @field IDENTIFIER (string) +-- The token name for identifier tokens. +-- @field OPERATOR (string) +-- The token name for operator tokens. +-- @field ERROR (string) +-- The token name for error tokens. +-- @field PREPROCESSOR (string) +-- The token name for preprocessor tokens. +-- @field CONSTANT (string) +-- The token name for constant tokens. +-- @field VARIABLE (string) +-- The token name for variable tokens. +-- @field FUNCTION (string) +-- The token name for function tokens. +-- @field CLASS (string) +-- The token name for class tokens. +-- @field TYPE (string) +-- The token name for type tokens. +-- @field LABEL (string) +-- The token name for label tokens. +-- @field REGEX (string) +-- The token name for regex tokens. +-- @field STYLE_CLASS (string) +-- The style typically used for class definitions. +-- @field STYLE_COMMENT (string) +-- The style typically used for code comments. +-- @field STYLE_CONSTANT (string) +-- The style typically used for constants. +-- @field STYLE_ERROR (string) +-- The style typically used for erroneous syntax. +-- @field STYLE_FUNCTION (string) +-- The style typically used for function definitions. +-- @field STYLE_KEYWORD (string) +-- The style typically used for language keywords. +-- @field STYLE_LABEL (string) +-- The style typically used for labels. +-- @field STYLE_NUMBER (string) +-- The style typically used for numbers. +-- @field STYLE_OPERATOR (string) +-- The style typically used for operators. +-- @field STYLE_REGEX (string) +-- The style typically used for regular expression strings. +-- @field STYLE_STRING (string) +-- The style typically used for strings. +-- @field STYLE_PREPROCESSOR (string) +-- The style typically used for preprocessor statements. +-- @field STYLE_TYPE (string) +-- The style typically used for static types. +-- @field STYLE_VARIABLE (string) +-- The style typically used for variables. +-- @field STYLE_WHITESPACE (string) +-- The style typically used for whitespace. +-- @field STYLE_EMBEDDED (string) +-- The style typically used for embedded code. +-- @field STYLE_IDENTIFIER (string) +-- The style typically used for identifier words. +-- @field STYLE_DEFAULT (string) +-- The style all styles are based off of. +-- @field STYLE_LINENUMBER (string) +-- The style used for all margins except fold margins. +-- @field STYLE_BRACELIGHT (string) +-- The style used for highlighted brace characters. +-- @field STYLE_BRACEBAD (string) +-- The style used for unmatched brace characters. +-- @field STYLE_CONTROLCHAR (string) +-- The style used for control characters. +-- Color attributes are ignored. +-- @field STYLE_INDENTGUIDE (string) +-- The style used for indentation guides. +-- @field STYLE_CALLTIP (string) +-- The style used by call tips if [`buffer.call_tip_use_style`]() is set. +-- Only the font name, size, and color attributes are used. +-- @field STYLE_FOLDDISPLAYTEXT (string) +-- The style used for fold display text. +-- @field any (pattern) +-- A pattern that matches any single character. +-- @field ascii (pattern) +-- A pattern that matches any ASCII character (codes 0 to 127). +-- @field extend (pattern) +-- A pattern that matches any ASCII extended character (codes 0 to 255). +-- @field alpha (pattern) +-- A pattern that matches any alphabetic character ('A'-'Z', 'a'-'z'). +-- @field digit (pattern) +-- A pattern that matches any digit ('0'-'9'). +-- @field alnum (pattern) +-- A pattern that matches any alphanumeric character ('A'-'Z', 'a'-'z', +-- '0'-'9'). +-- @field lower (pattern) +-- A pattern that matches any lower case character ('a'-'z'). +-- @field upper (pattern) +-- A pattern that matches any upper case character ('A'-'Z'). +-- @field xdigit (pattern) +-- A pattern that matches any hexadecimal digit ('0'-'9', 'A'-'F', 'a'-'f'). +-- @field cntrl (pattern) +-- A pattern that matches any control character (ASCII codes 0 to 31). +-- @field graph (pattern) +-- A pattern that matches any graphical character ('!' to '~'). +-- @field print (pattern) +-- A pattern that matches any printable character (' ' to '~'). +-- @field punct (pattern) +-- A pattern that matches any punctuation character ('!' to '/', ':' to '@', +-- '[' to ''', '{' to '~'). +-- @field space (pattern) +-- A pattern that matches any whitespace character ('\t', '\v', '\f', '\n', +-- '\r', space). +-- @field newline (pattern) +-- A pattern that matches any set of end of line characters. +-- @field nonnewline (pattern) +-- A pattern that matches any single, non-newline character. +-- @field nonnewline_esc (pattern) +-- A pattern that matches any single, non-newline character or any set of end +-- of line characters escaped with '\'. +-- @field dec_num (pattern) +-- A pattern that matches a decimal number. +-- @field hex_num (pattern) +-- A pattern that matches a hexadecimal number. +-- @field oct_num (pattern) +-- A pattern that matches an octal number. +-- @field integer (pattern) +-- A pattern that matches either a decimal, hexadecimal, or octal number. +-- @field float (pattern) +-- A pattern that matches a floating point number. +-- @field word (pattern) +-- A pattern that matches a typical word. Words begin with a letter or +-- underscore and consist of alphanumeric and underscore characters. +-- @field FOLD_BASE (number) +-- The initial (root) fold level. +-- @field FOLD_BLANK (number) +-- Flag indicating that the line is blank. +-- @field FOLD_HEADER (number) +-- Flag indicating the line is fold point. +-- @field fold_level (table, Read-only) +-- Table of fold level bit-masks for line numbers starting from zero. +-- Fold level masks are composed of an integer level combined with any of the +-- following bits: +-- +-- * `lexer.FOLD_BASE` +-- The initial fold level. +-- * `lexer.FOLD_BLANK` +-- The line is blank. +-- * `lexer.FOLD_HEADER` +-- The line is a header, or fold point. +-- @field indent_amount (table, Read-only) +-- Table of indentation amounts in character columns, for line numbers +-- starting from zero. +-- @field line_state (table) +-- Table of integer line states for line numbers starting from zero. +-- Line states can be used by lexers for keeping track of persistent states. +-- @field property (table) +-- Map of key-value string pairs. +-- @field property_expanded (table, Read-only) +-- Map of key-value string pairs with `$()` and `%()` variable replacement +-- performed in values. +-- @field property_int (table, Read-only) +-- Map of key-value pairs with values interpreted as numbers, or `0` if not +-- found. +-- @field style_at (table, Read-only) +-- Table of style names at positions in the buffer starting from 1. +module('lexer')]=] + +local lpeg = require('lpeg') +local lpeg_P, lpeg_R, lpeg_S, lpeg_V = lpeg.P, lpeg.R, lpeg.S, lpeg.V +local lpeg_Ct, lpeg_Cc, lpeg_Cp = lpeg.Ct, lpeg.Cc, lpeg.Cp +local lpeg_Cmt, lpeg_C = lpeg.Cmt, lpeg.C +local lpeg_match = lpeg.match + +M.path = package.path + +if not package.searchpath then + -- Searches for the given *name* in the given *path*. + -- This is an implementation of Lua 5.2's `package.searchpath()` function for + -- Lua 5.1. + function package.searchpath(name, path) + local tried = {} + for part in path:gmatch('[^;]+') do + local filename = part:gsub('%?', name) + local f = io.open(filename, 'r') + if f then + f:close() + return filename + end + tried[#tried + 1] = string.format("no file '%s'", filename) + end + return nil, table.concat(tried, '\n') + end +end + +local string_upper = string.upper +-- Default styles. +local default = { + 'nothing', 'whitespace', 'comment', 'string', 'number', 'keyword', + 'identifier', 'operator', 'error', 'preprocessor', 'constant', 'variable', + 'function', 'class', 'type', 'label', 'regex', 'embedded' +} +for i = 1, #default do + local name, upper_name = default[i], string_upper(default[i]) + M[upper_name], M['STYLE_'..upper_name] = name, '$(style.'..name..')' +end +-- Predefined styles. +local predefined = { + 'default', 'linenumber', 'bracelight', 'bracebad', 'controlchar', + 'indentguide', 'calltip', 'folddisplaytext' +} +for i = 1, #predefined do + local name, upper_name = predefined[i], string_upper(predefined[i]) + M[upper_name], M['STYLE_'..upper_name] = name, '$(style.'..name..')' +end + +--- +-- Adds pattern *rule* identified by string *id* to the ordered list of rules +-- for lexer *lexer*. +-- @param lexer The lexer to add the given rule to. +-- @param id The id associated with this rule. It does not have to be the same +-- as the name passed to `token()`. +-- @param rule The LPeg pattern of the rule. +-- @see modify_rule +-- @name add_rule +function M.add_rule(lexer, id, rule) + if lexer._lexer then lexer = lexer._lexer end -- proxy; get true parent + if not lexer._RULES then + lexer._RULES = {} + -- Contains an ordered list (by numerical index) of rule names. This is used + -- in conjunction with lexer._RULES for building _TOKENRULE. + lexer._RULEORDER = {} + end + lexer._RULES[id] = rule + lexer._RULEORDER[#lexer._RULEORDER + 1] = id + lexer:build_grammar() +end + +--- +-- Replaces in lexer *lexer* the existing rule identified by string *id* with +-- pattern *rule*. +-- @param lexer The lexer to modify. +-- @param id The id associated with this rule. +-- @param rule The LPeg pattern of the rule. +-- @name modify_rule +function M.modify_rule(lexer, id, rule) + if lexer._lexer then lexer = lexer._lexer end -- proxy; get true parent + lexer._RULES[id] = rule + lexer:build_grammar() +end + +--- +-- Associates string *token_name* in lexer *lexer* with Scintilla style string +-- *style*. +-- Style strings are comma-separated property settings. Available property +-- settings are: +-- +-- * `font:name`: Font name. +-- * `size:int`: Font size. +-- * `bold` or `notbold`: Whether or not the font face is bold. +-- * `weight:int`: Font weight (between 1 and 999). +-- * `italics` or `notitalics`: Whether or not the font face is italic. +-- * `underlined` or `notunderlined`: Whether or not the font face is +-- underlined. +-- * `fore:color`: Font face foreground color in "#RRGGBB" or 0xBBGGRR format. +-- * `back:color`: Font face background color in "#RRGGBB" or 0xBBGGRR format. +-- * `eolfilled` or `noteolfilled`: Whether or not the background color +-- extends to the end of the line. +-- * `case:char`: Font case ('u' for uppercase, 'l' for lowercase, and 'm' for +-- mixed case). +-- * `visible` or `notvisible`: Whether or not the text is visible. +-- * `changeable` or `notchangeable`: Whether or not the text is changeable or +-- read-only. +-- +-- Property settings may also contain "$(property.name)" expansions for +-- properties defined in Scintilla, theme files, etc. +-- @param lexer The lexer to add a style to. +-- @param token_name The name of the token to associated with the style. +-- @param style A style string for Scintilla. +-- @usage lexer:add_style('longstring', l.STYLE_STRING) +-- @usage lexer:add_style('deprecated_function', l.STYLE_FUNCTION..',italics') +-- @usage lexer:add_style('visible_ws', +-- l.STYLE_WHITESPACE..',back:$(color.grey)') +-- @name add_style +function M.add_style(lexer, token_name, style) + local num_styles = lexer._numstyles + if num_styles == 32 then num_styles = num_styles + 8 end -- skip predefined + if num_styles >= 255 then print('Too many styles defined (255 MAX)') end + lexer._TOKENSTYLES[token_name], lexer._numstyles = num_styles, num_styles + 1 + lexer._EXTRASTYLES[token_name] = style + -- If the lexer is a proxy or a child that embedded itself, copy this style to + -- the parent lexer. + if lexer._lexer then lexer._lexer:add_style(token_name, style) end +end + +--- +-- Adds to lexer *lexer* a fold point whose beginning and end tokens are string +-- *token_name* tokens with string content *start_symbol* and *end_symbol*, +-- respectively. +-- In the event that *start_symbol* may or may not be a fold point depending on +-- context, and that additional processing is required, *end_symbol* may be a +-- function that ultimately returns `1` (indicating a beginning fold point), +-- `-1` (indicating an ending fold point), or `0` (indicating no fold point). +-- That function is passed the following arguments: +-- +-- * `text`: The text being processed for fold points. +-- * `pos`: The position in *text* of the beginning of the line currently +-- being processed. +-- * `line`: The text of the line currently being processed. +-- * `s`: The position of *start_symbol* in *line*. +-- * `symbol`: *start_symbol* itself. +-- @param lexer The lexer to add a fold point to. +-- @param token_name The token name of text that indicates a fold point. +-- @param start_symbol The text that indicates the beginning of a fold point. +-- @param end_symbol Either the text that indicates the end of a fold point, or +-- a function that returns whether or not *start_symbol* is a beginning fold +-- point (1), an ending fold point (-1), or not a fold point at all (0). +-- @usage lexer:add_fold_point(l.OPERATOR, '{', '}') +-- @usage lexer:add_fold_point(l.KEYWORD, 'if', 'end') +-- @usage lexer:add_fold_point(l.COMMENT, '#', l.fold_line_comments('#')) +-- @usage lexer:add_fold_point('custom', function(text, pos, line, s, symbol) +-- ... end) +-- @name add_fold_point +function M.add_fold_point(lexer, token_name, start_symbol, end_symbol) + if not lexer._FOLDPOINTS then lexer._FOLDPOINTS = {_SYMBOLS = {}} end + local symbols = lexer._FOLDPOINTS._SYMBOLS + if not symbols[start_symbol] then + symbols[#symbols + 1], symbols[start_symbol] = start_symbol, true + end + if not lexer._FOLDPOINTS[token_name] then + lexer._FOLDPOINTS[token_name] = {} + end + if type(end_symbol) == 'string' then + if not symbols[end_symbol] then + symbols[#symbols + 1], symbols[end_symbol] = end_symbol, true + end + lexer._FOLDPOINTS[token_name][start_symbol] = 1 + lexer._FOLDPOINTS[token_name][end_symbol] = -1 + else + lexer._FOLDPOINTS[token_name][start_symbol] = end_symbol -- function or int + end + -- If the lexer is a proxy or a child that embedded itself, copy this fold + -- point to the parent lexer. + if lexer._lexer then + lexer._lexer:add_fold_point(token_name, start_symbol, end_symbol) + end +end + +-- (Re)constructs `lexer._TOKENRULE`. +local function join_tokens(lexer) + local patterns, order = lexer._RULES, lexer._RULEORDER + local token_rule = patterns[order[1]] + for i = 2, #order do token_rule = token_rule + patterns[order[i]] end + lexer._TOKENRULE = token_rule + M.token(M.DEFAULT, M.any) + return lexer._TOKENRULE +end + +-- Metatable for Scintillua grammars. +-- These grammars are just tables ultimately passed to `lpeg.P()`. +local grammar_mt = {__index = { + -- Adds lexer *lexer* and any of its embedded lexers to this grammar. + -- @param lexer The lexer to add. + add_lexer = function(self, lexer) + local token_rule = lexer:join_tokens() + for i = 1, #lexer._CHILDREN do + local child = lexer._CHILDREN[i] + if child._CHILDREN then self:add_lexer(child) end + local rules = child._EMBEDDEDRULES[lexer._NAME] + local rules_token_rule = self['__'..child._NAME] or rules.token_rule + self[child._NAME] = (-rules.end_rule * rules_token_rule)^0 * + rules.end_rule^-1 * lpeg_V(lexer._NAME) + local embedded_child = '_'..child._NAME + self[embedded_child] = rules.start_rule * + (-rules.end_rule * rules_token_rule)^0 * + rules.end_rule^-1 + token_rule = lpeg_V(embedded_child) + token_rule + end + self['__'..lexer._NAME] = token_rule -- can contain embedded lexer rules + self[lexer._NAME] = token_rule^0 + end +}} + +-- (Re)constructs `lexer._GRAMMAR`. +-- @param initial_rule The name of the rule to start lexing with. The default +-- value is `lexer._NAME`. Multilang lexers use this to start with a child +-- rule if necessary. +local function build_grammar(lexer, initial_rule) + if not lexer._RULES then return end + if lexer._CHILDREN then + if not initial_rule then initial_rule = lexer._NAME end + local grammar = setmetatable({initial_rule}, grammar_mt) + grammar:add_lexer(lexer) + lexer._INITIALRULE = initial_rule + lexer._GRAMMAR = lpeg_Ct(lpeg_P(grammar)) + else + lexer._GRAMMAR = lpeg_Ct(lexer:join_tokens()^0) + end +end + +--- +-- Embeds child lexer *child* in parent lexer *lexer* using patterns +-- *start_rule* and *end_rule*, which signal the beginning and end of the +-- embedded lexer, respectively. +-- @param lexer The parent lexer. +-- @param child The child lexer. +-- @param start_rule The pattern that signals the beginning of the embedded +-- lexer. +-- @param end_rule The pattern that signals the end of the embedded lexer. +-- @usage html:embed(css, css_start_rule, css_end_rule) +-- @usage html:embed(lexer, php_start_rule, php_end_rule) -- from php lexer +-- @name embed +function M.embed(lexer, child, start_rule, end_rule) + if lexer._lexer then lexer = lexer._lexer end -- proxy; get true parent + -- Add child rules. + if not child._EMBEDDEDRULES then child._EMBEDDEDRULES = {} end + if not child._RULES then error('Cannot embed lexer with no rules') end + child._EMBEDDEDRULES[lexer._NAME] = { + ['start_rule'] = start_rule, + token_rule = child:join_tokens(), + ['end_rule'] = end_rule + } + if not lexer._CHILDREN then lexer._CHILDREN = {} end + local children = lexer._CHILDREN + children[#children + 1] = child + -- Add child styles. + for token, style in pairs(child._EXTRASTYLES) do + lexer:add_style(token, style) + end + -- Add child fold symbols. + if child._FOLDPOINTS then + for token_name, symbols in pairs(child._FOLDPOINTS) do + if token_name ~= '_SYMBOLS' then + for symbol, v in pairs(symbols) do + lexer:add_fold_point(token_name, symbol, v) + end + end + end + end + lexer:build_grammar() + child._lexer = lexer -- use parent's tokens if child is embedding itself +end + +--- +-- Lexes a chunk of text *text* (that has an initial style number of +-- *init_style*) using lexer *lexer*, returning a table of token names and +-- positions. +-- @param lexer The lexer to lex text with. +-- @param text The text in the buffer to lex. +-- @param init_style The current style. Multiple-language lexers use this to +-- determine which language to start lexing in. +-- @return table of token names and positions. +-- @name lex +function M.lex(lexer, text, init_style) + if not lexer._GRAMMAR then return {M.DEFAULT, #text + 1} end + if not lexer._LEXBYLINE then + -- For multilang lexers, build a new grammar whose initial_rule is the + -- current language. + if lexer._CHILDREN then + for style, style_num in pairs(lexer._TOKENSTYLES) do + if style_num == init_style then + local lexer_name = style:match('^(.+)_whitespace') or lexer._NAME + if lexer._INITIALRULE ~= lexer_name then + lexer:build_grammar(lexer_name) + end + break + end + end + end + return lpeg_match(lexer._GRAMMAR, text) + else + local tokens = {} + local function append(tokens, line_tokens, offset) + for i = 1, #line_tokens, 2 do + tokens[#tokens + 1] = line_tokens[i] + tokens[#tokens + 1] = line_tokens[i + 1] + offset + end + end + local offset = 0 + local grammar = lexer._GRAMMAR + for line in text:gmatch('[^\r\n]*\r?\n?') do + local line_tokens = lpeg_match(grammar, line) + if line_tokens then append(tokens, line_tokens, offset) end + offset = offset + #line + -- Use the default style to the end of the line if none was specified. + if tokens[#tokens] ~= offset then + tokens[#tokens + 1], tokens[#tokens + 2] = 'default', offset + 1 + end + end + return tokens + end +end + +--- +-- Determines fold points in a chunk of text *text* using lexer *lexer*, +-- returning a table of fold levels associated with line numbers. +-- *text* starts at position *start_pos* on line number *start_line* with a +-- beginning fold level of *start_level* in the buffer. +-- @param lexer The lexer to fold text with. +-- @param text The text in the buffer to fold. +-- @param start_pos The position in the buffer *text* starts at, starting at +-- zero. +-- @param start_line The line number *text* starts on. +-- @param start_level The fold level *text* starts on. +-- @return table of fold levels associated with line numbers. +-- @name fold +function M.fold(lexer, text, start_pos, start_line, start_level) + local folds = {} + if text == '' then return folds end + local fold = M.property_int['fold'] > 0 + local FOLD_BASE = M.FOLD_BASE + local FOLD_HEADER, FOLD_BLANK = M.FOLD_HEADER, M.FOLD_BLANK + if fold and lexer._FOLDPOINTS then + local lines = {} + for p, l in (text..'\n'):gmatch('()(.-)\r?\n') do + lines[#lines + 1] = {p, l} + end + local fold_zero_sum_lines = M.property_int['fold.on.zero.sum.lines'] > 0 + local fold_points = lexer._FOLDPOINTS + local fold_point_symbols = fold_points._SYMBOLS + local style_at, fold_level = M.style_at, M.fold_level + local line_num, prev_level = start_line, start_level + local current_level = prev_level + for i = 1, #lines do + local pos, line = lines[i][1], lines[i][2] + if line ~= '' then + if lexer._CASEINSENSITIVEFOLDPOINTS then line = line:lower() end + local level_decreased = false + for j = 1, #fold_point_symbols do + local symbol = fold_point_symbols[j] + local word = not symbol:find('[^%w_]') + local s, e = line:find(symbol, 1, true) + while s and e do + --if not word or line:find('^%f[%w_]'..symbol..'%f[^%w_]', s) then + if not word or not ((s > 1 and line:find('^[%w_]', s - 1)) or + line:find('^[%w_]', e + 1)) then + local symbols = fold_points[style_at[start_pos + pos + s - 1]] + local level = symbols and symbols[symbol] + if type(level) == 'function' then + level = level(text, pos, line, s, symbol) + end + if type(level) == 'number' then + current_level = current_level + level + if level < 0 and current_level < prev_level then + -- Potential zero-sum line. If the level were to go back up on + -- the same line, the line may be marked as a fold header. + level_decreased = true + end + end + end + s = line:find(fold_point_symbols[j], s + 1, true) + end + end + folds[line_num] = prev_level + if current_level > prev_level then + folds[line_num] = prev_level + FOLD_HEADER + elseif level_decreased and current_level == prev_level and + fold_zero_sum_lines then + if line_num > start_line then + folds[line_num] = prev_level - 1 + FOLD_HEADER + else + -- Typing within a zero-sum line. + local level = fold_level[line_num - 1] - 1 + if level > FOLD_HEADER then level = level - FOLD_HEADER end + if level > FOLD_BLANK then level = level - FOLD_BLANK end + folds[line_num] = level + FOLD_HEADER + current_level = current_level + 1 + end + end + if current_level < FOLD_BASE then current_level = FOLD_BASE end + prev_level = current_level + else + folds[line_num] = prev_level + FOLD_BLANK + end + line_num = line_num + 1 + end + elseif fold and (lexer._FOLDBYINDENTATION or + M.property_int['fold.by.indentation'] > 0) then + -- Indentation based folding. + -- Calculate indentation per line. + local indentation = {} + for indent, line in (text..'\n'):gmatch('([\t ]*)([^\r\n]*)\r?\n') do + indentation[#indentation + 1] = line ~= '' and #indent + end + -- Find the first non-blank line before start_line. If the current line is + -- indented, make that previous line a header and update the levels of any + -- blank lines inbetween. If the current line is blank, match the level of + -- the previous non-blank line. + local current_level = start_level + for i = start_line - 1, 0, -1 do + local level = M.fold_level[i] + if level >= FOLD_HEADER then level = level - FOLD_HEADER end + if level < FOLD_BLANK then + local indent = M.indent_amount[i] + if indentation[1] and indentation[1] > indent then + folds[i] = FOLD_BASE + indent + FOLD_HEADER + for j = i + 1, start_line - 1 do + folds[j] = start_level + FOLD_BLANK + end + elseif not indentation[1] then + current_level = FOLD_BASE + indent + end + break + end + end + -- Iterate over lines, setting fold numbers and fold flags. + for i = 1, #indentation do + if indentation[i] then + current_level = FOLD_BASE + indentation[i] + folds[start_line + i - 1] = current_level + for j = i + 1, #indentation do + if indentation[j] then + if FOLD_BASE + indentation[j] > current_level then + folds[start_line + i - 1] = current_level + FOLD_HEADER + current_level = FOLD_BASE + indentation[j] -- for any blanks below + end + break + end + end + else + folds[start_line + i - 1] = current_level + FOLD_BLANK + end + end + else + -- No folding, reset fold levels if necessary. + local current_line = start_line + for _ in text:gmatch('\r?\n') do + folds[current_line] = start_level + current_line = current_line + 1 + end + end + return folds +end + +--- +-- Creates a returns a new lexer with the given name. +-- @param name The lexer's name. +-- @param opts Table of lexer options. Options currently supported: +-- * `lex_by_line`: Whether or not the lexer only processes whole lines of +-- text (instead of arbitrary chunks of text) at a time. +-- Line lexers cannot look ahead to subsequent lines. +-- The default value is `false`. +-- * `fold_by_indentation`: Whether or not the lexer does not define any fold +-- points and that fold points should be calculated based on changes in line +-- indentation. +-- The default value is `false`. +-- * `case_insensitive_fold_points`: Whether or not fold points added via +-- `lexer:add_fold_point()` ignore case. +-- The default value is `false`. +-- * `inherit`: Lexer to inherit from. +-- The default value is `nil`. +-- @usage l.new('rhtml', {inherit = l.load('html')}) +-- @name new +function M.new(name, opts) + local lexer = { + _NAME = assert(name, 'lexer name expected'), + _LEXBYLINE = opts and opts['lex_by_line'], + _FOLDBYINDENTATION = opts and opts['fold_by_indentation'], + _CASEINSENSITIVEFOLDPOINTS = opts and opts['case_insensitive_fold_points'], + _lexer = opts and opts['inherit'] + } + + -- Create the initial maps for token names to style numbers and styles. + local token_styles = {} + for i = 1, #default do token_styles[default[i]] = i - 1 end + for i = 1, #predefined do token_styles[predefined[i]] = i + 31 end + lexer._TOKENSTYLES, lexer._numstyles = token_styles, #default + lexer._EXTRASTYLES = {} + + return setmetatable(lexer, {__index = { + add_rule = M.add_rule, modify_rule = M.modify_rule, add_style = M.add_style, + add_fold_point = M.add_fold_point, join_tokens = join_tokens, + build_grammar = build_grammar, embed = M.embed, lex = M.lex, fold = M.fold + }}) +end + +-- Legacy support for older lexers. +-- Processes the `lexer._rules`, `lexer._tokenstyles`, and `lexer._foldsymbols` +-- tables. +-- Since legacy lexers may be processed up to twice, ensure their default styles +-- and rules are not processed more than once. +local function process_legacy_lexer(lexer) + local function warn(msg) --[[io.stderr:write(msg, "\n")]] end + if not lexer._LEGACY then + lexer._LEGACY = true + warn("lexers as tables are deprecated; use 'lexer.new()'") + local token_styles = {} + for i = 1, #default do token_styles[default[i]] = i - 1 end + for i = 1, #predefined do token_styles[predefined[i]] = i + 31 end + lexer._TOKENSTYLES, lexer._numstyles = token_styles, #default + lexer._EXTRASTYLES = {} + setmetatable(lexer, getmetatable(M.new(''))) + if lexer._rules then + warn("lexer '_rules' table is deprecated; use 'add_rule()'") + for i = 1, #lexer._rules do + lexer:add_rule(lexer._rules[i][1], lexer._rules[i][2]) + end + end + end + if lexer._tokenstyles then + warn("lexer '_tokenstyles' table is deprecated; use 'add_style()'") + for token, style in pairs(lexer._tokenstyles) do + -- If this legacy lexer is being processed a second time, only add styles + -- added since the first processing. + if not lexer._TOKENSTYLES[token] then lexer:add_style(token, style) end + end + end + if lexer._foldsymbols then + warn("lexer '_foldsymbols' table is deprecated; use 'add_fold_point()'") + for token_name, symbols in pairs(lexer._foldsymbols) do + if type(symbols) == 'table' and token_name ~= '_patterns' then + for symbol, v in pairs(symbols) do + lexer:add_fold_point(token_name, symbol, v) + end + end + end + if lexer._foldsymbols._case_insensitive then + lexer._CASEINSENSITIVEFOLDPOINTS = true + end + end +end + +local lexers = {} -- cache of loaded lexers +--- +-- Initializes or loads and returns the lexer of string name *name*. +-- Scintilla calls this function in order to load a lexer. Parent lexers also +-- call this function in order to load child lexers and vice-versa. The user +-- calls this function in order to load a lexer when using Scintillua as a Lua +-- library. +-- @param name The name of the lexing language. +-- @param alt_name The alternate name of the lexing language. This is useful for +-- embedding the same child lexer with multiple sets of start and end tokens. +-- @param cache Flag indicating whether or not to load lexers from the cache. +-- This should only be `true` when initially loading a lexer (e.g. not from +-- within another lexer for embedding purposes). +-- The default value is `false`. +-- @return lexer object +-- @name load +function M.load(name, alt_name, cache) + if cache and lexers[alt_name or name] then return lexers[alt_name or name] end + + -- When using Scintillua as a stand-alone module, the `property` and + -- `property_int` tables do not exist (they are not useful). Create them in + -- order prevent errors from occurring. + if not M.property then + M.property, M.property_int = {}, setmetatable({}, { + __index = function(t, k) return tonumber(M.property[k]) or 0 end, + __newindex = function() error('read-only property') end + }) + end + + -- Load the language lexer with its rules, styles, etc. + -- However, replace the default `WHITESPACE` style name with a unique + -- whitespace style name (and then automatically add it afterwards), since + -- embedded lexing relies on these unique whitespace style names. Note that + -- loading embedded lexers changes `WHITESPACE` again, so when adding it + -- later, do not reference the potentially incorrect value. + M.WHITESPACE = (alt_name or name)..'_whitespace' + local lexer = dofile(assert(package.searchpath(name, M.path))) + assert(lexer, string.format("'%s.lua' did not return a lexer", name)) + if alt_name then lexer._NAME = alt_name end + if not getmetatable(lexer) or lexer._LEGACY then + -- A legacy lexer may need to be processed a second time in order to pick up + -- any `_tokenstyles` or `_foldsymbols` added after `l.embed_lexer()`. + process_legacy_lexer(lexer) + if lexer._lexer and lexer._lexer._LEGACY then + process_legacy_lexer(lexer._lexer) -- mainly for `_foldsymbols` edits + end + end + lexer:add_style((alt_name or name)..'_whitespace', M.STYLE_WHITESPACE) + + -- If the lexer is a proxy or a child that embedded itself, set the parent to + -- be the main lexer. + if lexer._lexer then lexer = lexer._lexer end + + lexers[alt_name or name] = lexer + return lexer +end + +-- The following are utility functions lexers will have access to. + +-- Common patterns. +M.any = lpeg_P(1) +M.ascii = lpeg_R('\000\127') +M.extend = lpeg_R('\000\255') +M.alpha = lpeg_R('AZ', 'az') +M.digit = lpeg_R('09') +M.alnum = lpeg_R('AZ', 'az', '09') +M.lower = lpeg_R('az') +M.upper = lpeg_R('AZ') +M.xdigit = lpeg_R('09', 'AF', 'af') +M.cntrl = lpeg_R('\000\031') +M.graph = lpeg_R('!~') +M.print = lpeg_R(' ~') +M.punct = lpeg_R('!/', ':@', '[\'', '{~') +M.space = lpeg_S('\t\v\f\n\r ') + +M.newline = lpeg_S('\r\n\f')^1 +M.nonnewline = 1 - M.newline +M.nonnewline_esc = 1 - (M.newline + '\\') + '\\' * M.any + +M.dec_num = M.digit^1 +M.hex_num = '0' * lpeg_S('xX') * M.xdigit^1 +M.oct_num = '0' * lpeg_R('07')^1 +M.integer = lpeg_S('+-')^-1 * (M.hex_num + M.oct_num + M.dec_num) +M.float = lpeg_S('+-')^-1 * + ((M.digit^0 * '.' * M.digit^1 + M.digit^1 * '.' * M.digit^0) * + (lpeg_S('eE') * lpeg_S('+-')^-1 * M.digit^1)^-1 + + (M.digit^1 * lpeg_S('eE') * lpeg_S('+-')^-1 * M.digit^1)) + +M.word = (M.alpha + '_') * (M.alnum + '_')^0 + +--- +-- Creates and returns a token pattern with token name *name* and pattern +-- *patt*. +-- If *name* is not a predefined token name, its style must be defined in the +-- lexer's `_tokenstyles` table. +-- @param name The name of token. If this name is not a predefined token name, +-- then a style needs to be assiciated with it in the lexer's `_tokenstyles` +-- table. +-- @param patt The LPeg pattern associated with the token. +-- @return pattern +-- @usage local ws = token(l.WHITESPACE, l.space^1) +-- @usage local annotation = token('annotation', '@' * l.word) +-- @name token +function M.token(name, patt) + return lpeg_Cc(name) * patt * lpeg_Cp() +end + +--- +-- Creates and returns a pattern that matches a range of text bounded by +-- *chars* characters. +-- This is a convenience function for matching more complicated delimited ranges +-- like strings with escape characters and balanced parentheses. *single_line* +-- indicates whether or not the range must be on a single line, *no_escape* +-- indicates whether or not to ignore '\' as an escape character, and *balanced* +-- indicates whether or not to handle balanced ranges like parentheses and +-- requires *chars* to be composed of two characters. +-- @param chars The character(s) that bound the matched range. +-- @param single_line Optional flag indicating whether or not the range must be +-- on a single line. +-- @param no_escape Optional flag indicating whether or not the range end +-- character may be escaped by a '\\' character. +-- @param balanced Optional flag indicating whether or not to match a balanced +-- range, like the "%b" Lua pattern. This flag only applies if *chars* +-- consists of two different characters (e.g. "()"). +-- @return pattern +-- @usage local dq_str_escapes = l.delimited_range('"') +-- @usage local dq_str_noescapes = l.delimited_range('"', false, true) +-- @usage local unbalanced_parens = l.delimited_range('()') +-- @usage local balanced_parens = l.delimited_range('()', false, false, true) +-- @see nested_pair +-- @name delimited_range +function M.delimited_range(chars, single_line, no_escape, balanced) + local s = chars:sub(1, 1) + local e = #chars == 2 and chars:sub(2, 2) or s + local range + local b = balanced and s or '' + local n = single_line and '\n' or '' + if no_escape then + local invalid = lpeg_S(e..n..b) + range = M.any - invalid + else + local invalid = lpeg_S(e..n..b) + '\\' + range = M.any - invalid + '\\' * M.any + end + if balanced and s ~= e then + return lpeg_P{s * (range + lpeg_V(1))^0 * e} + else + return s * range^0 * lpeg_P(e)^-1 + end +end + +--- +-- Creates and returns a pattern that matches pattern *patt* only at the +-- beginning of a line. +-- @param patt The LPeg pattern to match on the beginning of a line. +-- @return pattern +-- @usage local preproc = token(l.PREPROCESSOR, l.starts_line('#') * +-- l.nonnewline^0) +-- @name starts_line +function M.starts_line(patt) + return lpeg_Cmt(lpeg_C(patt), function(input, index, match, ...) + local pos = index - #match + if pos == 1 then return index, ... end + local char = input:sub(pos - 1, pos - 1) + if char == '\n' or char == '\r' or char == '\f' then return index, ... end + end) +end + +--- +-- Creates and returns a pattern that verifies that string set *s* contains the +-- first non-whitespace character behind the current match position. +-- @param s String character set like one passed to `lpeg.S()`. +-- @return pattern +-- @usage local regex = l.last_char_includes('+-*!%^&|=,([{') * +-- l.delimited_range('/') +-- @name last_char_includes +function M.last_char_includes(s) + s = '['..s:gsub('[-%%%[]', '%%%1')..']' + return lpeg_P(function(input, index) + if index == 1 then return index end + local i = index + while input:sub(i - 1, i - 1):match('[ \t\r\n\f]') do i = i - 1 end + if input:sub(i - 1, i - 1):match(s) then return index end + end) +end + +--- +-- Returns a pattern that matches a balanced range of text that starts with +-- string *start_chars* and ends with string *end_chars*. +-- With single-character delimiters, this function is identical to +-- `delimited_range(start_chars..end_chars, false, true, true)`. +-- @param start_chars The string starting a nested sequence. +-- @param end_chars The string ending a nested sequence. +-- @return pattern +-- @usage local nested_comment = l.nested_pair('/*', '*/') +-- @see delimited_range +-- @name nested_pair +function M.nested_pair(start_chars, end_chars) + local s, e = start_chars, lpeg_P(end_chars)^-1 + return lpeg_P{s * (M.any - s - end_chars + lpeg_V(1))^0 * e} +end + +--- +-- Creates and returns a pattern that matches any single word in string *words*. +-- *case_insensitive* indicates whether or not to ignore case when matching +-- words. +-- This is a convenience function for simplifying a set of ordered choice word +-- patterns. +-- @param words A string list of words separated by spaces. +-- @param case_insensitive Optional boolean flag indicating whether or not the +-- word match is case-insensitive. The default value is `false`. +-- @param word_chars Unused legacy parameter. +-- @return pattern +-- @usage local keyword = token(l.KEYWORD, word_match[[foo bar baz]]) +-- @usage local keyword = token(l.KEYWORD, word_match([[foo-bar foo-baz +-- bar-foo bar-baz baz-foo baz-bar]], true)) +-- @name word_match +function M.word_match(words, case_insensitive, word_chars) + local word_list = {} + if type(words) == 'table' then + -- Legacy `word_match(word_list, word_chars, case_insensitive)` form. + words = table.concat(words, ' ') + word_chars, case_insensitive = case_insensitive, word_chars + end + for word in words:gmatch('%S+') do + word_list[case_insensitive and word:lower() or word] = true + for char in word:gmatch('[^%w_]') do + if not (word_chars or ''):find(char, 1, true) then + word_chars = (word_chars or '')..char + end + end + end + local chars = M.alnum + '_' + if (word_chars or '') ~= '' then chars = chars + lpeg_S(word_chars) end + return lpeg_Cmt(chars^1, function(input, index, word) + if case_insensitive then word = word:lower() end + return word_list[word] and index or nil + end) +end + +-- Deprecated legacy function. Use `parent:embed()` instead. +-- Embeds child lexer *child* in parent lexer *parent* using patterns +-- *start_rule* and *end_rule*, which signal the beginning and end of the +-- embedded lexer, respectively. +-- @param parent The parent lexer. +-- @param child The child lexer. +-- @param start_rule The pattern that signals the beginning of the embedded +-- lexer. +-- @param end_rule The pattern that signals the end of the embedded lexer. +-- @usage l.embed_lexer(M, css, css_start_rule, css_end_rule) +-- @usage l.embed_lexer(html, M, php_start_rule, php_end_rule) +-- @usage l.embed_lexer(html, ruby, ruby_start_rule, ruby_end_rule) +-- @see embed +-- @name embed_lexer +function M.embed_lexer(parent, child, start_rule, end_rule) + if not getmetatable(parent) then process_legacy_lexer(parent) end + if not getmetatable(child) then process_legacy_lexer(child) end + parent:embed(child, start_rule, end_rule) +end + +-- Determines if the previous line is a comment. +-- This is used for determining if the current comment line is a fold point. +-- @param prefix The prefix string defining a comment. +-- @param text The text passed to a fold function. +-- @param pos The pos passed to a fold function. +-- @param line The line passed to a fold function. +-- @param s The s passed to a fold function. +local function prev_line_is_comment(prefix, text, pos, line, s) + local start = line:find('%S') + if start < s and not line:find(prefix, start, true) then return false end + local p = pos - 1 + if text:sub(p, p) == '\n' then + p = p - 1 + if text:sub(p, p) == '\r' then p = p - 1 end + if text:sub(p, p) ~= '\n' then + while p > 1 and text:sub(p - 1, p - 1) ~= '\n' do p = p - 1 end + while text:sub(p, p):find('^[\t ]$') do p = p + 1 end + return text:sub(p, p + #prefix - 1) == prefix + end + end + return false +end + +-- Determines if the next line is a comment. +-- This is used for determining if the current comment line is a fold point. +-- @param prefix The prefix string defining a comment. +-- @param text The text passed to a fold function. +-- @param pos The pos passed to a fold function. +-- @param line The line passed to a fold function. +-- @param s The s passed to a fold function. +local function next_line_is_comment(prefix, text, pos, line, s) + local p = text:find('\n', pos + s) + if p then + p = p + 1 + while text:sub(p, p):find('^[\t ]$') do p = p + 1 end + return text:sub(p, p + #prefix - 1) == prefix + end + return false +end + +--- +-- Returns a fold function (to be passed to `lexer:add_fold_point()`) that folds +-- consecutive line comments that start with string *prefix*. +-- @param prefix The prefix string defining a line comment. +-- @usage lexer:add_fold_point(l.COMMENT, '--', l.fold_line_comments('--')) +-- @usage lexer:add_fold_point(l.COMMENT, '//', l.fold_line_comments('//')) +-- @name fold_line_comments +function M.fold_line_comments(prefix) + local property_int = M.property_int + return function(text, pos, line, s) + if property_int['fold.line.comments'] == 0 then return 0 end + if s > 1 and line:match('^%s*()') < s then return 0 end + local prev_line_comment = prev_line_is_comment(prefix, text, pos, line, s) + local next_line_comment = next_line_is_comment(prefix, text, pos, line, s) + if not prev_line_comment and next_line_comment then return 1 end + if prev_line_comment and not next_line_comment then return -1 end + return 0 + end +end + +M.property_expanded = setmetatable({}, { + -- Returns the string property value associated with string property *key*, + -- replacing any "$()" and "%()" expressions with the values of their keys. + __index = function(t, key) + return M.property[key]:gsub('[$%%]%b()', function(key) + return t[key:sub(3, -2)] + end) + end, + __newindex = function() error('read-only property') end +}) + +--[[ The functions and fields below were defined in C. + +--- +-- Returns the line number of the line that contains position *pos*, which +-- starts from 1. +-- @param pos The position to get the line number of. +-- @return number +local function line_from_position(pos) end +]] + +return M diff --git a/lexlua/lilypond.lua b/lexlua/lilypond.lua new file mode 100644 index 000000000..6f68e8249 --- /dev/null +++ b/lexlua/lilypond.lua @@ -0,0 +1,31 @@ +-- Copyright 2006-2018 Robert Gieseke. See License.txt. +-- Lilypond LPeg lexer. +-- TODO Embed Scheme; Notes?, Numbers? + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('lilypond') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords, commands. +lex:add_rule('keyword', token(lexer.KEYWORD, '\\' * lexer.word)) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, + lexer.delimited_range('"', false, true))) + +-- Comments. +-- TODO: block comment. +lex:add_rule('comment', token(lexer.COMMENT, '%' * lexer.nonnewline^0)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S("{}'~<>|"))) + +return lex diff --git a/lexlua/lisp.lua b/lexlua/lisp.lua new file mode 100644 index 000000000..bf74fc15c --- /dev/null +++ b/lexlua/lisp.lua @@ -0,0 +1,65 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Lisp LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('lisp') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + defclass defconstant defgeneric define-compiler-macro define-condition + define-method-combination define-modify-macro define-setf-expander + define-symbol-macro defmacro defmethod defpackage defparameter defsetf + defstruct deftype defun defvar + abort assert block break case catch ccase cerror cond ctypecase declaim + declare do do* do-all-symbols do-external-symbols do-symbols dolist dotimes + ecase error etypecase eval-when flet handler-bind handler-case if + ignore-errors in-package labels lambda let let* locally loop macrolet + multiple-value-bind proclaim prog prog* prog1 prog2 progn progv provide + require restart-bind restart-case restart-name return return-from signal + symbol-macrolet tagbody the throw typecase unless unwind-protect when + with-accessors with-compilation-unit with-condition-restarts + with-hash-table-iterator with-input-from-string with-open-file + with-open-stream with-output-to-string with-package-iterator + with-simple-restart with-slots with-standard-io-syntax + t nil +]])) + +local word = lexer.alpha * (lexer.alnum + '_' + '-')^0 + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, "'" * word + + lexer.delimited_range('"'))) + +-- Comments. +local line_comment = ';' * lexer.nonnewline^0 +local block_comment = '#|' * (lexer.any - '|#')^0 * P('|#')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, P('-')^-1 * lexer.digit^1 * + (S('./') * lexer.digit^1)^-1)) + +-- Entities. +lex:add_rule('entity', token('entity', '&' * word)) +lex:add_style('entity', lexer.STYLE_VARIABLE) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('<>=*/+-`@%()'))) + +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '(', ')') +lex:add_fold_point(lexer.OPERATOR, '[', ']') +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '#|', '|#') +lex:add_fold_point(lexer.COMMENT, ';', lexer.fold_line_comments(';')) + +return lex diff --git a/lexlua/litcoffee.lua b/lexlua/litcoffee.lua new file mode 100644 index 000000000..d122ae2db --- /dev/null +++ b/lexlua/litcoffee.lua @@ -0,0 +1,22 @@ +-- Copyright 2006-2018 Robert Gieseke. See License.txt. +-- Literate CoffeeScript LPeg lexer. +-- http://coffeescript.org/#literate + +local lexer = require('lexer') +local token = lexer.token +local P, S = lpeg.P, lpeg.S + +local lex = lexer.new('litcoffee', {inherit = lexer.load('markdown')}) + +-- Embedded CoffeeScript. +local coffeescript = lexer.load('coffeescript') +local coffee_start_rule = token(lexer.STYLE_EMBEDDED, (P(' ')^4 + P('\t'))) +local coffee_end_rule = token(lexer.STYLE_EMBEDDED, lexer.newline) +lex:embed(coffeescript, coffee_start_rule, coffee_end_rule) + +-- Use 'markdown_whitespace' instead of lexer.WHITESPACE since the latter would +-- expand to 'litcoffee_whitespace'. +lex:modify_rule('whitespace', token('markdown_whitespace', S(' \t')^1 + + S('\r\n')^1)) + +return lex diff --git a/lexlua/logtalk.lua b/lexlua/logtalk.lua new file mode 100644 index 000000000..c79429426 --- /dev/null +++ b/lexlua/logtalk.lua @@ -0,0 +1,35 @@ +-- Copyright © 2017-2018 Michael T. Richter <ttmrichter@gmail.com>. See License.txt. +-- Logtalk LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('logtalk', {inherit = lexer.load('prolog')}) + +lex:modify_rule('keyword', token(lexer.KEYWORD, word_match[[ + -- Logtalk "keywords" generated from Vim syntax highlighting file with Prolog + -- keywords stripped since were building up on the Prolog lexer. + abolish_category abolish_events abolish_object abolish_protocol after alias as + before built_in calls category category_property coinductive complements + complements_object conforms_to_protocol create create_category create_object + create_protocol create_logtalk_flag current current_category current_event + current_logtalk_flag current_object current_protocol define_events encoding + end_category end_class end_object end_protocol extends extends_category + extends_object extends_protocol forward implements implements_protocol imports + imports_category include info instantiates instantiates_class is + logtalk_compile logtalk_library_path logtalk_load logtalk_load_context + logtalk_make meta_non_terminal mode object object_property parameter private + protected protocol_property self sender set_logtalk_flag specializes + specializes_class synchronized this threaded threaded_call threaded_engine + threaded_engine_create threaded_engine_destroy threaded_engine_fetch + threaded_engine_next threaded_engine_next_reified threaded_engine_post + threaded_engine_self threaded_engine_yield threaded_exit threaded_ignore + threaded_notify threaded_once threaded_peek threaded_wait uses + -- info/1 and info/2 predicates have their own keywords manually extracted + -- from documentation. + comment argnames arguments author version date parameters parnames copyright + license remarks see_also +]]) + lex:get_rule('keyword')) + +return lex diff --git a/lexlua/lua.lua b/lexlua/lua.lua new file mode 100644 index 000000000..136b618da --- /dev/null +++ b/lexlua/lua.lua @@ -0,0 +1,159 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Lua LPeg lexer. +-- Original written by Peter Odding, 2007/04/04. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('lua') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + and break do else elseif end false for function if in local nil not or repeat + return then true until while + -- Added in 5.2. + goto +]])) + +-- Functions and deprecated functions. +local func = token(lexer.FUNCTION, word_match[[ + assert collectgarbage dofile error getmetatable ipairs load loadfile next + pairs pcall print rawequal rawget rawset require select setmetatable tonumber + tostring type xpcall + -- Added in 5.2. + rawlen +]]) +local deprecated_func = token('deprecated_function', word_match[[ + -- Deprecated in 5.2. + getfenv loadstring module setfenv unpack +]]) +lex:add_rule('function', func + deprecated_func) +lex:add_style('deprecated_function', lexer.STYLE_FUNCTION..',italics') + +-- Constants. +lex:add_rule('constant', token(lexer.CONSTANT, word_match[[ + _G _VERSION + -- Added in 5.2. + _ENV +]])) + +-- Libraries and deprecated libraries. +local library = token('library', word_match[[ + -- Coroutine. + coroutine coroutine.create coroutine.resume coroutine.running coroutine.status + coroutine.wrap coroutine.yield + -- Coroutine added in 5.3. + coroutine.isyieldable + -- Module. + package package.cpath package.loaded package.loadlib package.path + package.preload + -- Module added in 5.2. + package.config package.searchers package.searchpath + -- UTF-8 added in 5.3. + utf8 utf8.char utf8.charpattern utf8.codepoint utf8.codes utf8.len utf8.offset + -- String. + string string.byte string.char string.dump string.find string.format + string.gmatch string.gsub string.len string.lower string.match string.rep + string.reverse string.sub string.upper + -- String added in 5.3. + string.pack string.packsize string.unpack + -- Table. + table table.concat table.insert table.remove table.sort + -- Table added in 5.2. + table.pack table.unpack + -- Table added in 5.3. + table.move + -- Math. + math math.abs math.acos math.asin math.atan math.ceil math.cos math.deg + math.exp math.floor math.fmod math.huge math.log math.max math.min math.modf + math.pi math.rad math.random math.randomseed math.sin math.sqrt math.tan + -- Math added in 5.3. + math.maxinteger math.mininteger math.tointeger math.type math.ult + -- IO. + io io.close io.flush io.input io.lines io.open io.output io.popen io.read + io.stderr io.stdin io.stdout io.tmpfile io.type io.write + -- OS. + os os.clock os.date os.difftime os.execute os.exit os.getenv os.remove + os.rename os.setlocale os.time os.tmpname + -- Debug. + debug debug.debug debug.gethook debug.getinfo debug.getlocal + debug.getmetatable debug.getregistry debug.getupvalue debug.sethook + debug.setlocal debug.setmetatable debug.setupvalue debug.traceback + -- Debug added in 5.2. + debug.getuservalue debug.setuservalue debug.upvalueid debug.upvaluejoin +]]) +local deprecated_library = token('deprecated_library', word_match[[ + -- Module deprecated in 5.2. + package.loaders package.seeall + -- Table deprecated in 5.2. + table.maxn + -- Math deprecated in 5.2. + math.log10 + -- Math deprecated in 5.3. + math.atan2 math.cosh math.frexp math.ldexp math.pow math.sinh math.tanh + -- Bit32 deprecated in 5.3. + bit32 bit32.arshift bit32.band bit32.bnot bit32.bor bit32.btest bit32.extract + bit32.lrotate bit32.lshift bit32.replace bit32.rrotate bit32.rshift bit32.xor + -- Debug deprecated in 5.2. + debug.getfenv debug.setfenv +]]) +lex:add_rule('library', library + deprecated_library) +lex:add_style('library', lexer.STYLE_TYPE) +lex:add_style('deprecated_library', lexer.STYLE_TYPE..',italics') + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +local longstring = lpeg.Cmt('[' * lpeg.C(P('=')^0) * '[', + function(input, index, eq) + local _, e = input:find(']'..eq..']', index, true) + return (e or #input) + 1 + end) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") + + lexer.delimited_range('"')) + + token('longstring', longstring)) +lex:add_style('longstring', lexer.STYLE_STRING) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '--' * (longstring + + lexer.nonnewline^0))) + +-- Numbers. +local lua_integer = P('-')^-1 * (lexer.hex_num + lexer.dec_num) +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lua_integer)) + +-- Labels. +lex:add_rule('label', token(lexer.LABEL, '::' * lexer.word * '::')) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, '..' + + S('+-*/%^#=<>&|~;:,.{}[]()'))) + +-- Fold points. +local function fold_longcomment(text, pos, line, s, symbol) + if symbol == '[' then + if line:find('^%[=*%[', s) then return 1 end + elseif symbol == ']' then + if line:find('^%]=*%]', s) then return -1 end + end + return 0 +end +lex:add_fold_point(lexer.KEYWORD, 'if', 'end') +lex:add_fold_point(lexer.KEYWORD, 'do', 'end') +lex:add_fold_point(lexer.KEYWORD, 'function', 'end') +lex:add_fold_point(lexer.KEYWORD, 'repeat', 'until') +lex:add_fold_point(lexer.COMMENT, '[', fold_longcomment) +lex:add_fold_point(lexer.COMMENT, ']', fold_longcomment) +lex:add_fold_point(lexer.COMMENT, '--', lexer.fold_line_comments('--')) +lex:add_fold_point('longstring', '[', ']') +lex:add_fold_point(lexer.OPERATOR, '(', ')') +lex:add_fold_point(lexer.OPERATOR, '[', ']') +lex:add_fold_point(lexer.OPERATOR, '{', '}') + +return lex diff --git a/lexlua/makefile.lua b/lexlua/makefile.lua new file mode 100644 index 000000000..061424615 --- /dev/null +++ b/lexlua/makefile.lua @@ -0,0 +1,90 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Makefile LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('makefile', {lex_by_line = true}) + +-- Whitespace. +local ws = token(lexer.WHITESPACE, lexer.space^1) +lex:add_rule('whitespace', ws) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, P('!')^-1 * word_match([[ + -- GNU Make conditionals. + ifeq ifneq ifdef ifndef else endif + -- Other conditionals. + if elseif elseifdef elseifndef + -- Directives and other keywords. + define endef export include override private undefine unexport vpath +]], true))) + +-- Targets. +local special_target = token(lexer.CONSTANT, word_match[[ + .PHONY .SUFFIXES .DEFAULT .PRECIOUS .INTERMEDIATE .SECONDARY .SECONDEXPANSION + .DELETE_ON_ERROR .IGNORE .LOW_RESOLUTION_TIME .SILENT .EXPORT_ALL_VARIABLES + .NOTPARALLEL .ONESHELL .POSIX +]]) +local normal_target = token('target', (lexer.any - lexer.space - S(':#='))^1) +lex:add_rule('target', lexer.starts_line((special_target + normal_target) * + ws^0 * #(':' * -P('=')))) +lex:add_style('target', lexer.STYLE_LABEL) + +-- Variables. +local word_char = lexer.any - lexer.space - S(':#=(){}') +local assign = S(':+?')^-1 * '=' +local expanded_var = '$' * ('(' * word_char^1 * ')' + '{' * word_char^1 * '}') +local auto_var = '$' * S('@%<?^+|*') +local special_var = word_match[[ + MAKEFILE_LIST .DEFAULT_GOAL MAKE_RESTARTS .RECIPEPREFIX .VARIABLES .FEATURES + .INCLUDE_DIRS GPATH MAKECMDGOALS MAKESHELL SHELL VPATH +]] * #(ws^0 * assign) +local implicit_var = word_match[[ + -- Some common variables. + AR AS CC CXX CPP FC M2C PC CO GET LEX YACC LINT MAKEINFO TEX TEXI2DVI WEAVE + CWEAVE TANGLE CTANGLE RM + -- Some common flag variables. + ARFLAGS ASFLAGS CFLAGS CXXFLAGS COFLAGS CPPFLAGS FFLAGS GFLAGS LDFLAGS LFLAGS + YFLAGS PFLAGS RFLAGS LINTFLAGS + -- Other. + DESTDIR MAKE MAKEFLAGS MAKEOVERRIDES MFLAGS +]] * #(ws^0 * assign) +local computed_var = token(lexer.OPERATOR, '$' * S('({')) * + token(lexer.FUNCTION, word_match[[ + -- Functions for String Substitution and Analysis. + subst patsubst strip findstring filter filter-out sort word wordlist words + firstword lastword + -- Functions for File Names. + dir notdir suffix basename addsuffix addprefix join wildcard realpath abspath + -- Functions for Conditionals. + if or and + -- Miscellaneous Functions. + foreach call value eval origin flavor shell + -- Functions That Control Make. + error warning info +]]) +local variable = token(lexer.VARIABLE, expanded_var + auto_var + special_var + + implicit_var) + computed_var +lex:add_rule('variable', variable) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, assign + S(':$(){}'))) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, word_char^1)) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) + +-- Embedded Bash. +local bash = lexer.load('bash') +bash:modify_rule('variable', token(lexer.VARIABLE, '$$' * word_char^1) + + bash:get_rule('variable') + variable) +local bash_start_rule = token(lexer.WHITESPACE, P('\t')) + + token(lexer.OPERATOR, P(';')) +local bash_end_rule = token(lexer.WHITESPACE, P('\n')) +lex:embed(bash, bash_start_rule, bash_end_rule) + +return lex diff --git a/lexlua/man.lua b/lexlua/man.lua new file mode 100644 index 000000000..c5f8d5a51 --- /dev/null +++ b/lexlua/man.lua @@ -0,0 +1,29 @@ +-- Copyright 2015-2018 David B. Lamkins <david@lamkins.net>. See License.txt. +-- man/roff LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('man') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Markup. +lex:add_rule('rule1', token(lexer.STRING, '.' * ('B' * P('R')^-1 + + 'I' * P('PR')^-1) * + lexer.nonnewline^0)) +lex:add_rule('rule2', token(lexer.NUMBER, '.' * S('ST') * 'H' * + lexer.nonnewline^0)) +lex:add_rule('rule3', token(lexer.KEYWORD, P('.br') + '.DS' + '.RS' + '.RE' + + '.PD')) +lex:add_rule('rule4', token(lexer.LABEL, '.' * (S('ST') * 'H' + '.TP'))) +lex:add_rule('rule5', token(lexer.VARIABLE, '.B' * P('R')^-1 + + '.I' * S('PR')^-1 + + '.PP')) +lex:add_rule('rule6', token(lexer.TYPE, '\\f' * S('BIPR'))) +lex:add_rule('rule7', token(lexer.PREPROCESSOR, lexer.starts_line('.') * + lexer.alpha^1)) + +return lex diff --git a/lexlua/markdown.lua b/lexlua/markdown.lua new file mode 100644 index 000000000..2622a11d7 --- /dev/null +++ b/lexlua/markdown.lua @@ -0,0 +1,102 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Markdown LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('markdown') + +-- Block elements. +lex:add_rule('header', + token('h6', lexer.starts_line('######') * lexer.nonnewline^0) + + token('h5', lexer.starts_line('#####') * lexer.nonnewline^0) + + token('h4', lexer.starts_line('####') * lexer.nonnewline^0) + + token('h3', lexer.starts_line('###') * lexer.nonnewline^0) + + token('h2', lexer.starts_line('##') * lexer.nonnewline^0) + + token('h1', lexer.starts_line('#') * lexer.nonnewline^0)) +local font_size = lexer.property_int['fontsize'] > 0 and + lexer.property_int['fontsize'] or 10 +local hstyle = 'fore:$(color.red)' +lex:add_style('h6', hstyle) +lex:add_style('h5', hstyle..',size:'..(font_size + 1)) +lex:add_style('h4', hstyle..',size:'..(font_size + 2)) +lex:add_style('h3', hstyle..',size:'..(font_size + 3)) +lex:add_style('h2', hstyle..',size:'..(font_size + 4)) +lex:add_style('h1', hstyle..',size:'..(font_size + 5)) + +lex:add_rule('blockquote', + token(lexer.STRING, + lpeg.Cmt(lexer.starts_line(S(' \t')^0 * '>'), + function(input, index) + local _, e = input:find('\n[ \t]*\r?\n', index) + return (e or #input) + 1 + end))) + +lex:add_rule('blockcode', + token('code', lexer.starts_line(P(' ')^4 + P('\t')) * -P('<') * + lexer.nonnewline^0 * lexer.newline^-1)) +lex:add_style('code', lexer.STYLE_EMBEDDED..',eolfilled') + +lex:add_rule('hr', + token('hr', + lpeg.Cmt(lexer.starts_line(S(' \t')^0 * lpeg.C(S('*-_'))), + function(input, index, c) + local line = input:match('[^\n]*', index) + line = line:gsub('[ \t]', '') + if line:find('[^'..c..']') or #line < 2 then + return nil + end + return (input:find('\n', index) or #input) + 1 + end))) +lex:add_style('hr', 'back:$(color.black),eolfilled') + +lex:add_rule('list', token('list', lexer.starts_line(S(' \t')^0 * (S('*+-') + + R('09')^1 * '.')) * + S(' \t'))) +lex:add_style('list', lexer.STYLE_CONSTANT) + +-- Whitespace. +local ws = token(lexer.WHITESPACE, S(' \t')^1 + S('\v\r\n')^1) +lex:add_rule('whitespace', ws) + +-- Span elements. +lex:add_rule('escape', token(lexer.DEFAULT, P('\\') * 1)) + +lex:add_rule('link_label', + token('link_label', lexer.delimited_range('[]') * ':') * ws * + token('link_url', (lexer.any - lexer.space)^1) * + (ws * token(lexer.STRING, lexer.delimited_range('"', false, true) + + lexer.delimited_range("'", false, true) + + lexer.delimited_range('()')))^-1) +lex:add_style('link_label', lexer.STYLE_LABEL) +lex:add_style('link_url', 'underlined') + +lex:add_rule('link', + token('link', P('!')^-1 * lexer.delimited_range('[]') * + (P('(') * (lexer.any - S(') \t'))^0 * + (S(' \t')^1 * + lexer.delimited_range('"', false, true))^-1 * ')' + + S(' \t')^0 * lexer.delimited_range('[]')) + + P('http://') * (lexer.any - lexer.space)^1)) +lex:add_style('link', 'underlined') + +lex:add_rule('strong', token('strong', P('**') * (lexer.any - '**')^0 * + P('**')^-1 + + P('__') * (lexer.any - '__')^0 * + P('__')^-1)) +lex:add_style('strong', 'bold') +lex:add_rule('em', token('em', lexer.delimited_range('*', true) + + lexer.delimited_range('_', true))) +lex:add_style('em', 'italics') +lex:add_rule('code', token('code', P('``') * (lexer.any - '``')^0 * P('``')^-1 + + lexer.delimited_range('`', true, true))) + +-- Embedded HTML. +local html = lexer.load('html') +local start_rule = lexer.starts_line(S(' \t')^0) * #P('<') * + html:get_rule('element') +local end_rule = token(lexer.DEFAULT, P('\n')) -- TODO: lexer.WHITESPACE errors +lex:embed(html, start_rule, end_rule) + +return lex diff --git a/lexlua/matlab.lua b/lexlua/matlab.lua new file mode 100644 index 000000000..2c576556f --- /dev/null +++ b/lexlua/matlab.lua @@ -0,0 +1,86 @@ +-- Copyright 2006-2018 Martin Morawetz. See License.txt. +-- Matlab LPeg lexer. +-- Based off of lexer code by Mitchell. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('matlab') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match([[ + break case catch continue do else elseif end end_try_catch end_unwind_protect + endfor endif endswitch endwhile for function endfunction global if otherwise + persistent replot return static switch try until unwind_protect + unwind_protect_cleanup varargin varargout while +]], true))) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match[[ + abs any argvatan2 axes axis ceil cla clear clf columns cos delete diff disp + doc double drawnow exp figure find fix floor fprintf gca gcf get grid help + hist hold isempty isnull length load log log10 loglog max mean median min mod + ndims numel num2str ones pause plot printf quit rand randn rectangle rem + repmat reshape round rows save semilogx semilogy set sign sin size sizeof + size_equal sort sprintf squeeze sqrt std strcmp subplot sum tan tic title toc + uicontrol who xlabel ylabel zeros +]])) + +-- Constants. +lex:add_rule('constant', token(lexer.CONSTANT, word_match[[ + EDITOR I IMAGEPATH INFO_FILE J LOADPATH OCTAVE_VERSION PAGER PS1 PS2 PS4 PWD +]])) + +-- Variable. +lex:add_rule('variable', token(lexer.VARIABLE, word_match[[ + ans automatic_replot default_return_value do_fortran_indexing + define_all_return_values empty_list_elements_ok eps false gnuplot_binary + ignore_function_time_stamp implicit_str_to_num_ok Inf inf NaN nan + ok_to_lose_imaginary_part output_max_field_width output_precision + page_screen_output pi prefer_column_vectors prefer_zero_one_indexing + print_answer_id_name print_empty_dimensions realmax realmin + resize_on_range_error return_last_computed_value save_precision + silent_functions split_long_rows suppress_verbose_help_message + treat_neg_dim_as_zero true warn_assign_as_truth_value + warn_comma_in_global_decl warn_divide_by_zero warn_function_name_clash + whitespace_in_literal_matrix +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + + lexer.delimited_range('"') + + lexer.delimited_range('`'))) + +-- Comments. +local line_comment = (P('%') + '#') * lexer.nonnewline^0 +local block_comment = '%{' * (lexer.any - '%}')^0 * P('%}')^-1 +lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer + + lexer.dec_num + lexer.hex_num + + lexer.oct_num)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, + S('!%^&*()[]{}-=+/\\|:;.,?<>~`´'))) + +-- Fold points. +lex:add_fold_point(lexer.KEYWORD, 'if', 'end') +lex:add_fold_point(lexer.KEYWORD, 'for', 'end') +lex:add_fold_point(lexer.KEYWORD, 'while', 'end') +lex:add_fold_point(lexer.KEYWORD, 'switch', 'end') +lex:add_fold_point(lexer.OPERATOR, '(', ')') +lex:add_fold_point(lexer.OPERATOR, '[', ']') +lex:add_fold_point(lexer.COMMENT, '%{', '%}') +lex:add_fold_point(lexer.COMMENT, '%', lexer.fold_line_comments('%')) +lex:add_fold_point(lexer.COMMENT, '#', lexer.fold_line_comments('#')) + +return lex diff --git a/lexlua/moonscript.lua b/lexlua/moonscript.lua new file mode 100644 index 000000000..49a98d25f --- /dev/null +++ b/lexlua/moonscript.lua @@ -0,0 +1,141 @@ +-- Copyright 2016-2018 Alejandro Baez (https://keybase.io/baez). See License.txt. +-- Moonscript LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, S, R = lpeg.P, lpeg.S, lpeg.R + +local lex = lexer.new('moonscript', {fold_by_indentation = true}) + +-- Whitespace. +lex:add_rule('whitspace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + -- Lua. + and break do else elseif false for if in local nil not or return then true + while + -- Moonscript. + continue class export extends from import super switch unless using when with +]])) + +-- Error words. +lex:add_rule('error', token(lexer.ERROR, word_match[[function end]])) + +-- Self reference. +lex:add_rule('self_ref', token('self_ref', '@' * lexer.word + 'self')) +lex:add_style('self_ref', lexer.STYLE_LABEL) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match[[ + assert collectgarbage dofile error getmetatable ipairs load loadfile next + pairs pcall print rawequal rawget rawset require select setmetatable tonumber + tostring type xpcall + -- Added in 5.2. + rawlen +]])) + +-- Constants. +lex:add_rule('constant', token(lexer.CONSTANT, word_match[[ + _G _VERSION + -- Added in 5.2. + _ENV +]])) + +-- Libraries. +lex:add_rule('library', token('library', word_match[[ + -- Coroutine. + coroutine coroutine.create coroutine.resume coroutine.running coroutine.status + coroutine.wrap coroutine.yield + -- Coroutine added in 5.3. + coroutine.isyieldable + -- Module. + package package.cpath package.loaded package.loadlib package.path + package.preload + -- Module added in 5.2. + package.config package.searchers package.searchpath + -- UTF-8 added in 5.3. + utf8 utf8.char utf8.charpattern utf8.codepoint utf8.codes utf8.len utf8.offset + -- String. + string string.byte string.char string.dump string.find string.format + string.gmatch string.gsub string.len string.lower string.match string.rep + string.reverse string.sub string.upper + -- String added in 5.3. + string.pack string.packsize string.unpack + -- Table. + table table.concat table.insert table.remove table.sort + -- Table added in 5.2. + table.pack table.unpack + -- Table added in 5.3. + table.move + -- Math. + math math.abs math.acos math.asin math.atan math.ceil math.cos math.deg + math.exp math.floor math.fmod math.huge math.log math.max math.min math.modf + math.pi math.rad math.random math.randomseed math.sin math.sqrt math.tan + -- Math added in 5.3. + math.maxinteger math.mininteger math.tointeger math.type math.ult + -- IO. + io io.close io.flush io.input io.lines io.open io.output io.popen io.read + io.stderr io.stdin io.stdout io.tmpfile io.type io.write + -- OS. + os os.clock os.date os.difftime os.execute os.exit os.getenv os.remove + os.rename os.setlocale os.time os.tmpname + -- Debug. + debug debug.debug debug.gethook debug.getinfo debug.getlocal + debug.getmetatable debug.getregistry debug.getupvalue debug.sethook + debug.setlocal debug.setmetatable debug.setupvalue debug.traceback + -- Debug added in 5.2. + debug.getuservalue debug.setuservalue debug.upvalueid debug.upvaluejoin + + --- MoonScript 0.3.1 standard library. + -- Printing functions. + p + -- Table functions. + run_with_scope defaultbl extend copy + -- Class/object functions. + is_object bind_methods mixin mixin_object mixin_table + -- Misc functions. + fold + -- Debug functions. + debug.upvalue +]])) +lex:add_style('library', lexer.STYLE_TYPE) + +-- Identifiers. +local identifier = token(lexer.IDENTIFIER, lexer.word) +local proper_ident = token('proper_ident', R('AZ') * lexer.word) +local tbl_key = token('tbl_key', lexer.word * ':' + ':' * lexer.word ) +lex:add_rule('identifier', tbl_key + proper_ident + identifier) +lex:add_style('proper_ident', lexer.STYLE_CLASS) +lex:add_style('tbl_key', lexer.STYLE_REGEX) + +local longstring = lpeg.Cmt('[' * lpeg.C(P('=')^0) * '[', + function(input, index, eq) + local _, e = input:find(']'..eq..']', index, true) + return (e or #input) + 1 + end) + +-- Strings. +local sq_str = lexer.delimited_range("'", false, true) +local dq_str = lexer.delimited_range('"', false, true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str) + + token('longstring', longstring)) +lex:add_style('longstring', lexer.STYLE_STRING) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '--' * (longstring + + lexer.nonnewline^0))) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Function definition. +lex:add_rule('fndef', token('fndef', P('->') + '=>')) +lex:add_style('fndef', lexer.STYLE_PREPROCESSOR) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('+-*!\\/%^#=<>;:,.'))) +lex:add_rule('symbol', token('symbol', S('(){}[]'))) +lex:add_style('symbol', lexer.STYLE_EMBEDDED) + +return lex diff --git a/lexlua/mumps.lua b/lexlua/mumps.lua new file mode 100644 index 000000000..8a7d7d8f1 --- /dev/null +++ b/lexlua/mumps.lua @@ -0,0 +1,112 @@ +-- Copyright 2015-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- MUMPS (M) LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'mumps'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local comment = token(l.COMMENT, ';' * l.nonnewline_esc^0) + +-- Strings. +local string = token(l.STRING, l.delimited_range('"', true)) + +-- Numbers. +local number = token(l.NUMBER, l.float + l.integer) -- TODO: float? + +-- Keywords. +local keyword = token(l.KEYWORD, word_match({ + -- Abbreviations. + 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'q', + 'r', 's', 'u', 'v', 'w', 'x', + -- Full. + 'break', 'close', 'do', 'else', 'for', 'goto', 'halt', 'hang', 'if', 'job', + 'kill', 'lock', 'merge', 'new', 'open', 'quit', 'read', 'set', 'use', 'view', + 'write', 'xecute', + -- Cache- or GTM-specific. + 'catch', 'continue', 'elseif', 'tcommit', 'throw', 'trollback', 'try', + 'tstart', 'while', +}, nil, true)) + +-- Functions. +local func = token(l.FUNCTION, '$' * word_match({ + -- Abbreviations. + 'a', 'c', 'd', 'e', 'f', 'fn', 'g', 'j', 'l', 'n', 'na', 'o', 'p', 'q', 'ql', + 'qs', 'r', 're', 's', 'st', 't', 'tr', 'v', + -- Full. + 'ascii', 'char', 'data', 'extract', 'find', 'fnumber', 'get', 'justify', + 'length', 'next', 'name', 'order', 'piece', 'query', 'qlength', 'qsubscript', + 'random', 'reverse', 'select', 'stack', 'text', 'translate', 'view', + -- Z function abbreviations. + 'zd', 'zdh', 'zdt', 'zdth', 'zh', 'zt', 'zth', 'zu', 'zp', + -- Z functions. + 'zabs', 'zarccos', 'zarcsin', 'zarctan', 'zcos', 'zcot', 'zcsc', 'zdate', + 'zdateh', 'zdatetime', 'zdatetimeh', 'zexp', 'zhex', 'zln', 'zlog', 'zpower', + 'zsec', 'zsin', 'zsqr', 'ztan', 'ztime', 'ztimeh', 'zutil', 'zf', 'zprevious', + -- Cache- or GTM-specific. + 'bit', 'bitcount', 'bitfind', 'bitlogic', 'case', 'classmethod', 'classname', + 'decimal', 'double', 'factor', 'i', 'increment', 'inumber', 'isobject', + 'isvaliddouble', 'isvalidnum', 'li', 'list', 'lb', 'listbuild', 'ld', + 'listdata', 'lf', 'listfind', 'lfs', 'listfromstring', 'lg', 'listget', 'll', + 'listlength', 'listnext', 'ls', 'listsame', 'lts', 'listtostring', 'lv', + 'listvalid', 'locate', 'match', 'method', 'nc', 'nconvert', 'normalize', + 'now', 'num', 'number', 'parameter', 'prefetchoff', 'prefetchon', 'property', + 'replace', 'sc', 'sconvert', 'sortbegin', 'sortend', 'wa', 'wascii', 'wc', + 'wchar', 'we', 'wextract', 'wf', 'wfind', 'wiswide', 'wl', 'wlength', 'wre', + 'wreverse', 'xecute' +}, nil, true)) + +-- Variables. +local variable = token(l.VARIABLE, '$' * l.word_match({ + -- Abbreviations. + 'ec', 'es', 'et', 'h', 'i', 'j', 'k', 'p', 'q', 's', 'st', 't', 'tl', + -- Full. + 'device', 'ecode', 'estack', 'etrap', 'halt', 'horolog', 'io', 'job', + 'namespace', 'principal', 'quit', 'roles', 'storage', 'stack', 'system', + 'test', 'this', 'tlevel', 'username', 'x', 'y', + -- Z variable abbreviations. + 'za', 'zb', 'zc', 'ze', 'zh', 'zi', 'zj', 'zm', 'zn', 'zo', 'zp', 'zr', 'zs', + 'zt', 'zts', 'ztz', 'zv', + -- Z variables. + 'zchild', 'zeof', 'zerror', 'zhorolog', 'zio', 'zjob', 'zmode', 'zname', + 'znspace', 'zorder', 'zparent', 'zpi', 'zpos', 'zreference', 'zstorage', + 'ztimestamp', 'ztimezone', 'ztrap', 'zversion', +}, nil, true)) + +-- Function entity. +local entity = token(l.LABEL, l.starts_line(('%' + l.alpha) * l.alnum^0)) + +-- Support functions. +local support_function = '$$' * ('%' + l.alpha) * l.alnum^0 * + (('%' + l.alpha) * l.alnum^0)^-1 + +-- Identifiers. +local identifier = token(l.IDENTIFIER, l.alpha * l.alnum^0) + +-- Operators. +local operator = token(l.OPERATOR, S('+-/*<>!=_@#&|?:\\\',()[]')) + +M._rules = { + {'whitespace', ws}, + {'keyword', keyword}, + {'variable', variable}, + {'identifier', identifier}, + {'string', string}, + {'comment', comment}, + {'number', number}, + {'operator', operator}, +} + +M._foldsymbols = { + _patterns = {'%l+', '[{}]', '/%*', '%*/', '//'}, + [l.PREPROCESSOR] = {['if'] = 1, ifdef = 1, ifndef = 1, endif = -1}, + [l.OPERATOR] = {['{'] = 1, ['}'] = -1}, + [l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')} +} + +return M diff --git a/lexlua/myrddin.lua b/lexlua/myrddin.lua new file mode 100644 index 000000000..abe5b601a --- /dev/null +++ b/lexlua/myrddin.lua @@ -0,0 +1,54 @@ +-- Copyright 2017-2018 Michael Forney. See License.txt +-- Myrddin LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V + +local lex = lexer.new('myrddin') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + break const continue elif else extern false for generic goto if impl in match + pkg pkglocal sizeof struct trait true type union use var while +]])) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match[[ + void bool char byte int uint int8 uint8 int16 uint16 int32 uint32 int64 uint64 + flt32 flt64 +]] + '@' * lexer.word)) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Comments. +local line_comment = '//' * lexer.nonnewline_esc^0 +local block_comment = P{ + V'part' * P'*/'^-1, + part = '/*' * (V'full' + (lexer.any - '/*' - '*/'))^0, + full = V'part' * '*/', +} +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + + lexer.delimited_range('"', true))) + +-- Numbers. +local digit = lexer.digit + '_' +local bdigit = R'01' + '_' +local xdigit = lexer.xdigit + '_' +local odigit = R'07' + '_' +local integer = '0x' * xdigit^1 + '0o' * odigit^1 + '0b' * bdigit^1 + digit^1 +local float = digit^1 * (('.' * digit^1) * (S'eE' * S'+-'^-1 * digit^1)^-1 + + ('.' * digit^1)^-1 * S'eE' * S'+-'^-1 * digit^1) +lex:add_rule('number', token(lexer.NUMBER, float + integer)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S'`#_+-/*%<>~!=^&|~:;,.()[]{}')) + +return lex diff --git a/lexlua/nemerle.lua b/lexlua/nemerle.lua new file mode 100644 index 000000000..9941c1a95 --- /dev/null +++ b/lexlua/nemerle.lua @@ -0,0 +1,66 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Nemerle LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('nemerle') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + _ abstract and array as base catch class def do else extends extern finally + foreach for fun if implements in interface internal lock macro match module + mutable namespace new out override params private protected public ref repeat + sealed static struct syntax this throw try type typeof unless until using + variant virtual when where while + -- Values. + null true false +]])) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match[[ + bool byte char decimal double float int list long object sbyte short string + uint ulong ushort void +]])) + +-- Strings. +local sq_str = P('L')^-1 * lexer.delimited_range("'", true) +local dq_str = P('L')^-1 * lexer.delimited_range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Comments. +local line_comment = '//' * lexer.nonnewline_esc^0 +local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Preprocessor. +local preproc_word = word_match[[ + define elif else endif endregion error if ifdef ifndef line pragma region + undef using warning +]] +lex:add_rule('preproc', token(lexer.PREPROCESSOR, lexer.starts_line('#') * + S('\t ')^0 * preproc_word)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}'))) + +-- Fold points. +lex:add_fold_point(lexer.PREPROCESSOR, 'region', 'endregion') +lex:add_fold_point(lexer.PREPROCESSOR, 'if', 'endif') +lex:add_fold_point(lexer.PREPROCESSOR, 'ifdef', 'endif') +lex:add_fold_point(lexer.PREPROCESSOR, 'ifndef', 'endif') +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, '//', lexer.fold_line_comments('//')) + +return lex diff --git a/lexlua/nim.lua b/lexlua/nim.lua new file mode 100644 index 000000000..aea719a1b --- /dev/null +++ b/lexlua/nim.lua @@ -0,0 +1,101 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Nim LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('nim', {fold_by_indentation = true}) + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match([[ + addr and as asm atomic bind block break case cast const continue converter + discard distinct div do elif else end enum except export finally for from + generic if import in include interface is isnot iterator lambda let macro + method mixin mod nil not notin object of or out proc ptr raise ref return + shared shl static template try tuple type var when while with without xor + yield +]], true))) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match([[ + -- Procs. + defined definedInScope new unsafeNew internalNew reset high low sizeof succ + pred inc dec newSeq len incl excl card ord chr ze ze64 toU8 toU16 toU32 abs + min max contains cmp setLen newString newStringOfCap add compileOption quit + shallowCopy del delete insert repr toFloat toBiggestFloat toInt toBiggestInt + addQuitProc substr zeroMem copyMem moveMem equalMem swap getRefcount clamp + isNil find contains pop each map GC_ref GC_unref echo debugEcho getTypeInfo + Open repopen Close EndOfFile readChar FlushFile readAll readFile writeFile + write readLine writeln getFileSize ReadBytes ReadChars readBuffer writeBytes + writeChars writeBuffer setFilePos getFilePos fileHandle cstringArrayToSeq + allocCStringArray deallocCStringArray atomicInc atomicDec compareAndSwap + setControlCHook writeStackTrace getStackTrace alloc alloc0 dealloc realloc + getFreeMem getTotalMem getOccupiedMem allocShared allocShared0 deallocShared + reallocShared IsOnStack GC_addCycleRoot GC_disable GC_enable GC_setStrategy + GC_enableMarkAndSweep GC_disableMarkAndSweep GC_fullCollect GC_getStatistics + nimDestroyRange getCurrentException getCurrentExceptionMsg onRaise likely + unlikely rawProc rawEnv finished slurp staticRead gorge staticExec rand + astToStr InstatiationInfo raiseAssert shallow compiles safeAdd locals + -- Iterators. + countdown countup items pairs fields fieldPairs lines + -- Templates. + accumulateResult newException CurrentSourcePath assert doAssert onFailedAssert + eval + -- Threads. + running joinThread joinThreads createThread threadId myThreadId + -- Channels. + send recv peek ready +]], true))) + +-- Types. +lex:add_rule('type', token(lexer.TYPE , word_match([[ + int int8 int16 int32 int64 uint uint8 uint16 uint32 uint64 float float32 + float64 bool char string cstring pointer Ordinal auto any TSignedInt + TUnsignedInt TInteger TOrdinal TReal TNumber range array openarray varargs + seq set TSlice TThread TChannel + -- Meta Types. + expr stmt typeDesc void +]], true))) + +-- Constants. +lex:add_rule('constant', token(lexer.CONSTANT, word_match[[ + on off isMainModule CompileDate CompileTime NimVersion NimMajor NimMinor + NimPatch cpuEndian hostOS hostCPU appType QuitSuccess QuitFailure inf neginf + nan +]])) + +-- Strings. +local sq_str = lexer.delimited_range("'", true) +local dq_str = lexer.delimited_range('"', true) +local triple_dq_str = '"""' * (lexer.any - '"""')^0 * P('"""')^-1 +local raw_dq_str = 'r' * lexer.delimited_range('"', false, true) +lex:add_rule('string', token(lexer.STRING, triple_dq_str + sq_str + dq_str + + raw_dq_str)) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline_esc^0)) + +-- Numbers. +local dec = lexer.digit^1 * ('_' * lexer.digit^1)^0 +local hex = '0' * S('xX') * lexer.xdigit^1 * ('_' * lexer.xdigit^1)^0 +local bin = '0' * S('bB') * S('01')^1 * ('_' * S('01')^1)^0 +local oct = '0o' * R('07')^1 +local integer = S('+-')^-1 * (bin + hex + oct + dec) * + ("'" * S('iIuUfF') * (P('8') + '16' + '32' + '64'))^-1 +local float = lexer.digit^1 * ('_' * lexer.digit^1)^0 * + ('.' * ('_' * lexer.digit)^0)^-1 * S('eE') * S('+-')^-1 * + lexer.digit^1 * ('_' * lexer.digit^1)^0 +lex:add_rule('number', token(lexer.NUMBER, lexer.float + integer)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, + S('=+-*/<>@$~&%|!?^.:\\`()[]{},;'))) + +return lex diff --git a/lexlua/nsis.lua b/lexlua/nsis.lua new file mode 100644 index 000000000..0387791bc --- /dev/null +++ b/lexlua/nsis.lua @@ -0,0 +1,146 @@ +-- Copyright 2006-2018 Robert Gieseke. See License.txt. +-- NSIS LPeg lexer +-- Based on NSIS 2.46 docs: http://nsis.sourceforge.net/Docs/. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('nsis') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Comments (4.1). +local line_comment = (P(';') + '#') * lexer.nonnewline^0 +local block_comment = '/*' * (lexer.any - '*/')^0 * '*/' +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") + + lexer.delimited_range('"') + + lexer.delimited_range('`'))) + +-- Constants (4.2.3). +lex:add_rule('constant', token(lexer.CONSTANT, word_match[[ + $PROGRAMFILES $PROGRAMFILES32 $PROGRAMFILES64 $COMMONFILES $COMMONFILES32 + $COMMONFILES64 $DESKTOP $EXEDIR $EXEFILE $EXEPATH ${NSISDIR} $WINDIR $SYSDIR + $TEMP $STARTMENU $SMPROGRAMS $SMSTARTUP $QUICKLAUNCH$DOCUMENTS $SENDTO $RECENT + $FAVORITES $MUSIC $PICTURES $VIDEOS $NETHOOD $FONTS $TEMPLATES $APPDATA + $LOCALAPPDATA $PRINTHOOD $INTERNET_CACHE $COOKIES $HISTORY $PROFILE + $ADMINTOOLS $RESOURCES $RESOURCES_LOCALIZED $CDBURN_AREA $HWNDPARENT + $PLUGINSDIR +]])) +-- TODO? Constants used in strings: $$ $\r $\n $\t + +-- Variables (4.2). +lex:add_rule('variable', token(lexer.VARIABLE, word_match[[ + $0 $1 $2 $3 $4 $5 $6 $7 $8 $9 $R0 $R1 $R2 $R3 $R4 $R5 $R6 $R7 $R8 $R9 + $INSTDIR $OUTDIR $CMDLINE $LANGUAGE Var /GLOBAL +]]) + '$' * lexer.word) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + -- Pages (4.5). + Page UninstPage PageEx PageEnd PageExEnd + -- Section commands (4.6). + AddSize Section SectionEnd SectionIn SectionGroup SectionGroupEnd + -- Functions (4.7). + Function FunctionEnd + -- Callbacks (4.7.2). + .onGUIInit .onInit .onInstFailed .onInstSuccess .onGUIEnd .onMouseOverSection + .onRebootFailed .onSelChange .onUserAbort .onVerifyInstDir un.onGUIInit + un.onInit un.onUninstFailed un.onUninstSuccess un.onGUIEnd un.onRebootFailed + un.onSelChange un.onUserAbort + -- General Attributes (4.8.1). + AddBrandingImage AllowRootDirInstall AutoCloseWindow BGFont BGFont + BrandingText /TRIMLEFT /TRIMRIGHT /TRIMCENTER Caption ChangeUI CheckBitmap + CompletedText ComponentText CRCCheck DetailsButtonText DirText DirVar + DirVerify FileErrorText Icon InstallButtonText InstallColors InstallDir + InstallDirRegKey InstProgressFlags InstType LicenseBkColor LicenseData + LicenseForceSelection LicenseText MiscButtonText Name OutFile + RequestExecutionLevel SetFont ShowInstDetails ShowUninstDetails SilentInstall + SilentUnInstall SpaceTexts SubCaption UninstallButtonText UninstallCaption + UninstallIcon UninstallSubCaption UninstallText WindowIcon XPStyle admin auto + bottom checkbox false force height hide highest leave left nevershow none + normal off on radiobuttons right show silent silentlog top true user width + -- Compiler Flags (4.8.2). + AllowSkipFiles FileBufSize SetCompress SetCompressor /SOLID /FINAL zlib bzip2 + lzma SetCompressorDictSize SetDatablockOptimize SetDateSave SetOverwrite + ifnewer ifdiff lastused try + -- Version Information (4.8.3). + VIAddVersionKey VIProductVersion /LANG ProductName Comments CompanyName + LegalCopyright FileDescription FileVersion ProductVersion InternalName + LegalTrademarks OriginalFilename PrivateBuild SpecialBuild + -- Basic Instructions (4.9.1). + Delete /REBOOTOK Exec ExecShell ExecShell File /nonfatal Rename ReserveFile + RMDir SetOutPath + -- Registry INI File Instructions (4.9.2). + DeleteINISec DeleteINIStr DeleteRegKey /ifempty DeleteRegValue EnumRegKey + EnumRegValue ExpandEnvStrings FlushINI ReadEnvStr ReadINIStr ReadRegDWORD + ReadRegStr WriteINIStr WriteRegBin WriteRegDWORD WriteRegStr WriteRegExpandStr + HKCR HKEY_CLASSES_ROOT HKLM HKEY_LOCAL_MACHINE HKCU HKEY_CURRENT_USER HKU + HKEY_USERS HKCC HKEY_CURRENT_CONFIG HKDD HKEY_DYN_DATA HKPD + HKEY_PERFORMANCE_DATA SHCTX SHELL_CONTEXT + -- General Purpose Instructions (4.9.3). + CallInstDLL CopyFiles /SILENT /FILESONLY CreateDirectory CreateShortCut + GetDLLVersion GetDLLVersionLocal GetFileTime GetFileTimeLocal GetFullPathName + /SHORT GetTempFileName SearchPath SetFileAttributes RegDLL UnRegDLL + -- Flow Control Instructions (4.9.4). + Abort Call ClearErrors GetCurrentAddress GetFunctionAddress GetLabelAddress + Goto IfAbort IfErrors IfFileExists IfRebootFlag IfSilent IntCmp IntCmpU + MessageBox MB_OK MB_OKCANCEL MB_ABORTRETRYIGNORE MB_RETRYCANCEL MB_YESNO + MB_YESNOCANCEL MB_ICONEXCLAMATION MB_ICONINFORMATION MB_ICONQUESTION + MB_ICONSTOP MB_USERICON MB_TOPMOST MB_SETFOREGROUND MB_RIGHT MB_RTLREADING + MB_DEFBUTTON1 MB_DEFBUTTON2 MB_DEFBUTTON3 MB_DEFBUTTON4 IDABORT IDCANCEL + IDIGNORE IDNO IDOK IDRETRY IDYES Return Quit SetErrors StrCmp StrCmpS + -- File Instructions (4.9.5). + FileClose FileOpen FileRead FileReadByte FileSeek FileWrite FileWriteByte + FindClose FindFirst FindNext + -- Uninstaller Instructions (4.9.6). + WriteUninstaller + -- Miscellaneous Instructions (4.9.7). + GetErrorLevel GetInstDirError InitPluginsDir Nop SetErrorLevel SetRegView + SetShellVarContext all current Sleep + -- String Manipulation Instructions (4.9.8). + StrCpy StrLen + -- Stack Support (4.9.9). + Exch Pop Push + -- Integer Support (4.9.10). + IntFmt IntOp + -- Reboot Instructions (4.9.11). + Reboot SetRebootFlag + -- Install Logging Instructions (4.9.12). + LogSet LogText + -- Section Management (4.9.13). + SectionSetFlags SectionGetFlags SectionGetFlags SectionSetText SectionGetText + SectionSetInstTypes SectionGetInstTypes SectionSetSize SectionGetSize + SetCurInstType GetCurInstType InstTypeSetText InstTypeGetText + -- User Interface Instructions (4.9.14). + BringToFront CreateFont DetailPrint EnableWindow FindWindow GetDlgItem + HideWindow IsWindow LockWindow SendMessage SetAutoClose SetBrandingImage + SetDetailsView SetDetailsPrint listonlytextonly both SetCtlColors /BRANDING + SetSilent ShowWindow + -- Multiple Languages Instructions (4.9.15). + LoadLanguageFile LangString LicenseLangString + -- Compile time commands (5). + !include !addincludedir !addplugindir !appendfile !cd !delfile !echo !error + !execute !packhdr !system !tempfile !warning !verbose {__FILE__} {__LINE__} + {__DATE__} {__TIME__} {__TIMESTAMP__} {NSIS_VERSION} !define !undef !ifdef + !ifndef !if !ifmacrodef !ifmacrondef !else !endif !insertmacro !macro + !macroend !searchparse !searchreplace +]])) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.integer)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('+-*/%|&^~!<>'))) + +-- Labels (4.3). +lex:add_rule('label', token(lexer.LABEL, lexer.word * ':')) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +return lex diff --git a/lexlua/null.lua b/lexlua/null.lua new file mode 100644 index 000000000..8854d0433 --- /dev/null +++ b/lexlua/null.lua @@ -0,0 +1,4 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Null LPeg lexer. + +return require('lexer').new('null') diff --git a/lexlua/objective_c.lua b/lexlua/objective_c.lua new file mode 100644 index 000000000..3d2f02ebb --- /dev/null +++ b/lexlua/objective_c.lua @@ -0,0 +1,71 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Objective C LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('objective_c') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + -- From C. + asm auto break case const continue default do else extern false for goto if + inline register return sizeof static switch true typedef void volatile while + restrict _Bool _Complex _Pragma _Imaginary + -- Objective C. + oneway in out inout bycopy byref self super + -- Preprocessor directives. + @interface @implementation @protocol @end @private @protected @public @class + @selector @encode @defs @synchronized @try @throw @catch @finally + -- Constants. + TRUE FALSE YES NO NULL nil Nil METHOD_NULL +]])) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match[[ + apply_t id Class MetaClass Object Protocol retval_t SEL STR IMP BOOL + TypedStream +]])) + +-- Strings. +local sq_str = P('L')^-1 * lexer.delimited_range("'", true) +local dq_str = P('L')^-1 * lexer.delimited_range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Comments. +local line_comment = '//' * lexer.nonnewline_esc^0 +local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Preprocessor. +local preproc_word = word_match[[ + define elif else endif error if ifdef ifndef import include line pragma undef + warning +]] +lex:add_rule('preprocessor', #lexer.starts_line('#') * + token(lexer.PREPROCESSOR, '#' * S('\t ')^0 * + preproc_word)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}'))) + +-- Fold symbols. +lex:add_fold_point(lexer.PREPROCESSOR, 'region', 'endregion') +lex:add_fold_point(lexer.PREPROCESSOR, 'if', 'endif') +lex:add_fold_point(lexer.PREPROCESSOR, 'ifdef', 'endif') +lex:add_fold_point(lexer.PREPROCESSOR, 'ifndef', 'endif') +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, '//', lexer.fold_line_comments('//')) + +return lex diff --git a/lexlua/pascal.lua b/lexlua/pascal.lua new file mode 100644 index 000000000..495175f4c --- /dev/null +++ b/lexlua/pascal.lua @@ -0,0 +1,62 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Pascal LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('pascal') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match([[ + and array as at asm begin case class const constructor destructor + dispinterface div do downto else end except exports file final finalization + finally for function goto if implementation in inherited initialization inline + interface is label mod not object of on or out packed procedure program + property raise record repeat resourcestring set sealed shl shr static string + then threadvar to try type unit unsafe until uses var while with xor + absolute abstract assembler automated cdecl contains default deprecated dispid + dynamic export external far forward implements index library local message + name namespaces near nodefault overload override package pascal platform + private protected public published read readonly register reintroduce requires + resident safecall stdcall stored varargs virtual write writeln writeonly + false nil self true +]], true))) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match([[ + chr ord succ pred abs round trunc sqr sqrt arctan cos sin exp ln odd eof eoln +]], true))) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match([[ + shortint byte char smallint integer word longint cardinal boolean bytebool + wordbool longbool real single double extended comp currency pointer +]], true))) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, + S('uUrR')^-1 * + lexer.delimited_range("'", true, true))) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Comments. +local line_comment = '//' * lexer.nonnewline_esc^0 +local bblock_comment = '{' * (lexer.any - '}')^0 * P('}')^-1 +local pblock_comment = '(*' * (lexer.any - '*)')^0 * P('*)')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + bblock_comment + + pblock_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) * + S('LlDdFf')^-1)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('.,;^@:=<>+-/*()[]'))) + +return lex diff --git a/lexlua/perl.lua b/lexlua/perl.lua new file mode 100644 index 000000000..2dfc70152 --- /dev/null +++ b/lexlua/perl.lua @@ -0,0 +1,142 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Perl LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V + +local lex = lexer.new('perl') + +-- Whitespace. +lex:add_rule('perl', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + STDIN STDOUT STDERR BEGIN END CHECK INIT + require use + break continue do each else elsif foreach for if last local my next our + package return sub unless until while __FILE__ __LINE__ __PACKAGE__ + and or not eq ne lt gt le ge +]])) + +-- Markers. +lex:add_rule('marker', token(lexer.COMMENT, word_match[[__DATA__ __END__]] * + lexer.any^0)) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match[[ + abs accept alarm atan2 bind binmode bless caller chdir chmod chomp chop chown + chr chroot closedir close connect cos crypt dbmclose dbmopen defined delete + die dump each endgrent endhostent endnetent endprotoent endpwent endservent + eof eval exec exists exit exp fcntl fileno flock fork format formline getc + getgrent getgrgid getgrnam gethostbyaddr gethostbyname gethostent getlogin + getnetbyaddr getnetbyname getnetent getpeername getpgrp getppid getpriority + getprotobyname getprotobynumber getprotoent getpwent getpwnam getpwuid + getservbyname getservbyport getservent getsockname getsockopt glob gmtime goto + grep hex import index int ioctl join keys kill lcfirst lc length link listen + localtime log lstat map mkdir msgctl msgget msgrcv msgsnd new oct opendir open + ord pack pipe pop pos printf print prototype push quotemeta rand readdir read + readlink recv redo ref rename reset reverse rewinddir rindex rmdir scalar + seekdir seek select semctl semget semop send setgrent sethostent setnetent + setpgrp setpriority setprotoent setpwent setservent setsockopt shift shmctl + shmget shmread shmwrite shutdown sin sleep socket socketpair sort splice split + sprintf sqrt srand stat study substr symlink syscall sysread sysseek system + syswrite telldir tell tied tie time times truncate ucfirst uc umask undef + unlink unpack unshift untie utime values vec wait waitpid wantarray warn write +]])) + +local delimiter_matches = {['('] = ')', ['['] = ']', ['{'] = '}', ['<'] = '>'} +local literal_delimitted = P(function(input, index) -- for single delimiter sets + local delimiter = input:sub(index, index) + if not delimiter:find('%w') then -- only non alpha-numerics + local match_pos, patt + if delimiter_matches[delimiter] then + -- Handle nested delimiter/matches in strings. + local s, e = delimiter, delimiter_matches[delimiter] + patt = lexer.delimited_range(s..e, false, false, true) + else + patt = lexer.delimited_range(delimiter) + end + match_pos = lpeg.match(patt, input, index) + return match_pos or #input + 1 + end +end) +local literal_delimitted2 = P(function(input, index) -- for 2 delimiter sets + local delimiter = input:sub(index, index) + -- Only consider non-alpha-numerics and non-spaces as delimiters. The + -- non-spaces are used to ignore operators like "-s". + if not delimiter:find('[%w ]') then + local match_pos, patt + if delimiter_matches[delimiter] then + -- Handle nested delimiter/matches in strings. + local s, e = delimiter, delimiter_matches[delimiter] + patt = lexer.delimited_range(s..e, false, false, true) + else + patt = lexer.delimited_range(delimiter) + end + first_match_pos = lpeg.match(patt, input, index) + final_match_pos = lpeg.match(patt, input, first_match_pos - 1) + if not final_match_pos then -- using (), [], {}, or <> notation + final_match_pos = lpeg.match(lexer.space^0 * patt, input, first_match_pos) + end + return final_match_pos or #input + 1 + end +end) + +-- Strings. +local sq_str = lexer.delimited_range("'") +local dq_str = lexer.delimited_range('"') +local cmd_str = lexer.delimited_range('`') +local heredoc = '<<' * P(function(input, index) + local s, e, delimiter = input:find('([%a_][%w_]*)[\n\r\f;]+', index) + if s == index and delimiter then + local end_heredoc = '[\n\r\f]+' + local _, e = input:find(end_heredoc..delimiter, e) + return e and e + 1 or #input + 1 + end +end) +local lit_str = 'q' * P('q')^-1 * literal_delimitted +local lit_array = 'qw' * literal_delimitted +local lit_cmd = 'qx' * literal_delimitted +local lit_tr = (P('tr') + 'y') * literal_delimitted2 * S('cds')^0 +local regex_str = #P('/') * lexer.last_char_includes('-<>+*!~\\=%&|^?:;([{') * + lexer.delimited_range('/', true) * S('imosx')^0 +local lit_regex = 'qr' * literal_delimitted * S('imosx')^0 +local lit_match = 'm' * literal_delimitted * S('cgimosx')^0 +local lit_sub = 's' * literal_delimitted2 * S('ecgimosx')^0 +lex:add_rule('string', + token(lexer.STRING, sq_str + dq_str + cmd_str + heredoc + lit_str + + lit_array + lit_cmd + lit_tr) + + token(lexer.REGEX, regex_str + lit_regex + lit_match + lit_sub)) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Comments. +local line_comment = '#' * lexer.nonnewline_esc^0 +local block_comment = lexer.starts_line('=') * lexer.alpha * + (lexer.any - lexer.newline * '=cut')^0 * + (lexer.newline * '=cut')^-1 +lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Variables. +local special_var = '$' * ('^' * S('ADEFHILMOPSTWX')^-1 + + S('\\"[]\'&`+*.,;=%~?@<>(|/!-') + + ':' * (lexer.any - ':') + + P('$') * -lexer.word + + lexer.digit^1) +local plain_var = ('$#' + S('$@%')) * P('$')^0 * lexer.word + '$#' +lex:add_rule('variable', token(lexer.VARIABLE, special_var + plain_var)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('-<>+*!~\\=/%&|^.?:;()[]{}'))) + +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '[', ']') +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '#', lexer.fold_line_comments('#')) + +return lex diff --git a/lexlua/php.lua b/lexlua/php.lua new file mode 100644 index 000000000..b989755c4 --- /dev/null +++ b/lexlua/php.lua @@ -0,0 +1,75 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- PHP LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V + +local lex = lexer.new('php') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + and array as bool boolean break case cfunction class const continue declare + default die directory do double echo else elseif empty enddeclare endfor + endforeach endif endswitch endwhile eval exit extends false float for foreach + function global if include include_once int integer isset list new null object + old_function or parent print real require require_once resource return static + stdclass string switch true unset use var while xor + __class__ __file__ __function__ __line__ __sleep __wakeup +]])) + +local word = (lexer.alpha + '_' + R('\127\255')) * + (lexer.alnum + '_' + R('\127\255'))^0 + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, word)) + +-- Variables. +lex:add_rule('variable', token(lexer.VARIABLE, '$' * word)) + +-- Strings. +local sq_str = lexer.delimited_range("'") +local dq_str = lexer.delimited_range('"') +local bt_str = lexer.delimited_range('`') +local heredoc = '<<<' * P(function(input, index) + local _, e, delimiter = input:find('([%a_][%w_]*)[\n\r\f]+', index) + if delimiter then + local _, e = input:find('[\n\r\f]+'..delimiter, e) + return e and e + 1 + end +end) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + bt_str + heredoc)) +-- TODO: interpolated code. + +-- Comments. +local line_comment = (P('//') + '#') * lexer.nonnewline^0 +local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('!@%^*&()-+=|/?.,;:<>[]{}'))) + +-- Embedded in HTML. +local html = lexer.load('html') + +-- Embedded PHP. +local php_start_rule = token('php_tag', '<?' * ('php' * lexer.space)^-1) +local php_end_rule = token('php_tag', '?>') +html:embed(lex, php_start_rule, php_end_rule) +lex:add_style('php_tag', lexer.STYLE_EMBEDDED) + +-- Fold points. +lex:add_fold_point('php_tag', '<?', '?>') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, '//', lexer.fold_line_comments('//')) +lex:add_fold_point(lexer.COMMENT, '#', lexer.fold_line_comments('#')) +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.OPERATOR, '(', ')') + +return lex diff --git a/lexlua/pico8.lua b/lexlua/pico8.lua new file mode 100644 index 000000000..9e70802c3 --- /dev/null +++ b/lexlua/pico8.lua @@ -0,0 +1,39 @@ +-- Copyright 2016-2018 Alejandro Baez (https://keybase.io/baez). See License.txt. +-- PICO-8 lexer. +-- http://www.lexaloffle.com/pico-8.php + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('pico8') + +-- Whitespace +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + __lua__ __gfx__ __gff__ __map__ __sfx__ __music__ +]])) + +-- Identifiers +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Comments +lex:add_rule('comment', token(lexer.COMMENT, '//' * lexer.nonnewline_esc^0)) + +-- Numbers +lex:add_rule('number', token(lexer.NUMBER, lexer.integer)) + +-- Operators +lex:add_rule('operator', token(lexer.OPERATOR, S('_'))) + +-- Embed Lua into PICO-8. +local lua = lexer.load('lua') + +local lua_start_rule = token('pico8_tag', '__lua__') +local lua_end_rule = token('pico8_tag', '__gfx__' ) +lex:embed(lua, lua_start_rule, lua_end_rule) +lex:add_style('pico8_tag', lexer.STYLE_EMBEDDED) + +return lex diff --git a/lexlua/pike.lua b/lexlua/pike.lua new file mode 100644 index 000000000..77caed4c9 --- /dev/null +++ b/lexlua/pike.lua @@ -0,0 +1,56 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Pike LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('pike') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + break case catch continue default do else for foreach gauge if lambda return + sscanf switch while import inherit + -- Type modifiers. + constant extern final inline local nomask optional private protected public + static variant +]])) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match[[ + array class float function int mapping mixed multiset object program string + void +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + + lexer.delimited_range('"', true) + + '#' * lexer.delimited_range('"'))) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '//' * lexer.nonnewline_esc^0 + + lexer.nested_pair('/*', '*/'))) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) * + S('lLdDfF')^-1)) + +-- Preprocessors. +lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, lexer.starts_line('#') * + lexer.nonnewline^0)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('<>=!+-/*%&|^~@`.,:;()[]{}'))) + +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, '//', lexer.fold_line_comments('//')) + +return lex diff --git a/lexlua/pkgbuild.lua b/lexlua/pkgbuild.lua new file mode 100644 index 000000000..bc5e9a672 --- /dev/null +++ b/lexlua/pkgbuild.lua @@ -0,0 +1,79 @@ +-- Copyright 2006-2018 gwash. See License.txt. +-- Archlinux PKGBUILD LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('pkgbuild') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) + +-- Strings. +local sq_str = lexer.delimited_range("'", false, true) +local dq_str = lexer.delimited_range('"') +local ex_str = lexer.delimited_range('`') +local heredoc = '<<' * P(function(input, index) + local s, e, _, delimiter = + input:find('(["\']?)([%a_][%w_]*)%1[\n\r\f;]+', index) + if s == index and delimiter then + local _, e = input:find('[\n\r\f]+'..delimiter, e) + return e and e + 1 or #input + 1 + end +end) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + ex_str + heredoc)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + patch cd make patch mkdir cp sed install rm if then elif else fi case in esac + while for do done continue local return git svn co clone gconf-merge-schema + msg echo ln + -- Operators. + -a -b -c -d -e -f -g -h -k -p -r -s -t -u -w -x -O -G -L -S -N -nt -ot -ef -o + -z -n -eq -ne -lt -le -gt -ge -Np -i +]])) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match[[ + build check package pkgver prepare +]] * '()')) + +-- Constants. +lex:add_rule('constant', token(lexer.CONSTANT, word_match[[ + -- We do *not* list pkgver srcdir and startdir here. + -- These are defined by makepkg but user should not alter them. + arch backup changelog checkdepends conflicts depends epoch groups install + license makedepends md5sums noextract optdepends options pkgbase pkgdesc + pkgname pkgrel pkgver provides replaces sha1sums sha256sums sha384sums + sha512sums source url validpgpkeys +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Variables. +lex:add_rule('variable', token(lexer.VARIABLE, + '$' * (S('!#?*@$') + + lexer.delimited_range('()', true, true) + + lexer.delimited_range('[]', true, true) + + lexer.delimited_range('{}', true, true) + + lexer.delimited_range('`', true, true) + + lexer.digit^1 + + lexer.word))) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/*^~.,:;?()[]{}'))) + +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '(', ')') +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '#', lexer.fold_line_comments('#')) + +return lex diff --git a/lexlua/powershell.lua b/lexlua/powershell.lua new file mode 100644 index 000000000..17bf988fc --- /dev/null +++ b/lexlua/powershell.lua @@ -0,0 +1,63 @@ +-- Copyright 2015-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- PowerShell LPeg lexer. +-- Contributed by Jeff Stone. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('powershell') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match([[ + Begin Break Continue Do Else End Exit For ForEach ForEach-Object Get-Date + Get-Random If Param Pause Powershell Process Read-Host Return Switch While + Write-Host +]], true))) + +-- Comparison Operators. +lex:add_rule('comparison', token(lexer.KEYWORD, '-' * word_match([[ + and as band bor contains eq ge gt is isnot le like lt match ne nomatch not + notcontains notlike or replace +]], true))) + +-- Parameters. +lex:add_rule('parameter', token(lexer.KEYWORD, '-' * word_match([[ + Confirm Debug ErrorAction ErrorVariable OutBuffer OutVariable Verbose WhatIf +]], true))) + +-- Properties. +lex:add_rule('property', token(lexer.KEYWORD, '.' * word_match([[ + day dayofweek dayofyear hour millisecond minute month second timeofday year +]], true))) + +-- Types. +lex:add_rule('type', token(lexer.KEYWORD, '[' * word_match([[ + array boolean byte char datetime decimal double hashtable int long single + string xml +]], true) * ']')) + +-- Variables. +lex:add_rule('variable', token(lexer.VARIABLE, + '$' * (lexer.digit^1 + lexer.word + + lexer.delimited_range('{}', true, true)))) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"', true))) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/*^&|~.,:;?()[]{}%`'))) + +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '{', '}') + +return lex diff --git a/lexlua/prolog.lua b/lexlua/prolog.lua new file mode 100644 index 000000000..72a7d1927 --- /dev/null +++ b/lexlua/prolog.lua @@ -0,0 +1,129 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Prolog LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('prolog') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + -- Directives by manual scanning of SWI-Prolog source code + abolish arithmetic_function at_halt create_prolog_flag discontiguous dynamic + elif else endif format_predicate if initialization lazy_list_iterator listing + load_extensions meta_predicate mode module module_transparent multifile op + persistent pop_operators pred predicate_options prolog_load_context public + push_operators record redefine_system_predicate reexport set_prolog_flag + setting thread_local type use_foreign_library use_module volatile + -- Built-in predicates generated in SWI-Prolog via current_predictate/1. + abolish abort absolute_file_name access_file acyclic_term add_import_module + append apply arg assert asserta assertz at_end_of_stream at_halt atom + atom_chars atom_codes atom_concat atomic atomic_concat atomic_list_concat + atomics_to_string atom_length atom_number atom_prefix atom_string atom_to_term + attach_packs attvar autoload_path bagof between b_getval blob break b_set_dict + b_setval byte_count call callable call_cleanup call_continuation call_dcg + call_residue_vars call_shared_object_function call_with_depth_limit + call_with_inference_limit cancel_halt catch character_count char_code + char_conversion char_type clause clause_property close close_shared_object + code_type collation_key compare compile_aux_clauses compile_predicates + compiling compound compound_name_arguments compound_name_arity consult + context_module copy_predicate_clauses copy_stream_data copy_term copy_term_nat + create_prolog_flag current_arithmetic_function current_atom current_blob + current_char_conversion current_engine current_flag current_format_predicate + current_functor current_input current_key current_locale current_module + current_op current_output current_predicate current_prolog_flag + current_resource current_signal current_trie cwd cyclic_term date_time_stamp + dcg_translate_rule debugging default_module del_attr del_attrs del_dict + delete_directory delete_file delete_import_module deterministic dict_create + dict_pairs directory_files divmod downcase_atom duplicate_term dwim_match + dwim_predicate engine_create engine_destroy engine_fetch engine_next + engine_next_reified engine_post engine_self engine_yield ensure_loaded erase + exception exists_directory exists_file expand_answer expand_file_name + expand_file_search_path expand_goal expand_query expand_term export + extern_indirect fail false fast_read fast_term_serialized fast_write + file_base_name file_directory_name file_name_extension file_search_path + fill_buffer findall findnsols flag float flush_output forall format + format_predicate format_time freeze frozen functor garbage_collect + garbage_collect_atoms garbage_collect_clauses gc_file_search_cache get0 get + get_attr get_attrs get_byte get_char get_code get_dict getenv get_flag + get_single_char get_string_code get_time goal_expansion ground halt ignore + import import_module instance integer intern_indirect is_absolute_file_name + is_dict is_engine is_list is_stream is_thread keysort known_licenses leash + length library_directory license line_count line_position load_files + locale_create locale_destroy locale_property make_directory make_library_index + memberchk message_hook message_property message_queue_create + message_queue_destroy message_queue_property message_to_string module + module_property msort mutex_create mutex_destroy mutex_lock mutex_property + mutex_statistics mutex_trylock mutex_unlock mutex_unlock_all name nb_current + nb_delete nb_getval nb_linkarg nb_link_dict nb_linkval nb_setarg nb_set_dict + nb_setval nl nonvar noprofile noprotocol normalize_space nospy nospyall not + notrace nth_clause nth_integer_root_and_remainder number number_chars + number_codes number_string numbervars once on_signal op open open_null_stream + open_resource open_shared_object open_string open_xterm peek_byte peek_char + peek_code peek_string phrase plus portray predicate_option_mode + predicate_option_type predicate_property print print_message + print_message_lines print_toplevel_variables profiler prolog + prolog_choice_attribute prolog_current_choice prolog_current_frame + prolog_cut_to prolog_debug prolog_event_hook prolog_file_type + prolog_frame_attribute prolog_list_goal prolog_load_context prolog_load_file + prolog_nodebug prolog_skip_frame prolog_skip_level prolog_stack_property + prolog_to_os_filename prompt1 prompt protocol protocola protocolling put + put_attr put_attrs put_byte put_char put_code put_dict pwd qcompile + random_property rational read read_clause read_history read_link + read_pending_chars read_pending_codes read_string read_term + read_term_from_atom recorda recorded recordz redefine_system_predicate + reexport reload_library_index rename_file repeat require reset reset_profiler + residual_goals resource retract retractall same_file same_term see seeing seek + seen select_dict setarg set_end_of_stream setenv set_flag set_input set_locale + setlocale set_module setof set_output set_prolog_flag set_prolog_IO + set_prolog_stack set_random set_stream set_stream_position + setup_call_catcher_cleanup setup_call_cleanup shell shift size_file skip sleep + sort source_file source_file_property source_location split_string spy + stamp_date_time statistics stream_pair stream_position_data stream_property + string string_chars string_code string_codes string_concat string_length + string_lower string_upper strip_module style_check sub_atom sub_atom_icasechk + sub_string subsumes_term succ swiplrc tab tell telling term_attvars + term_expansion term_hash term_string term_to_atom term_variables + text_to_string thread_at_exit thread_create thread_detach thread_exit + thread_get_message thread_join thread_message_hook thread_peek_message + thread_property thread_self thread_send_message thread_setconcurrency + thread_signal thread_statistics throw time_file tmp_file tmp_file_stream told + trace tracing trie_destroy trie_gen trie_insert trie_insert_new trie_lookup + trie_new trie_property trie_term trim_stacks true ttyflush tty_get_capability + tty_goto tty_put tty_size unifiable unify_with_occurs_check unload_file + unsetenv upcase_atom use_module var variant_hash variant_sha1 var_number + var_property verbose_expansion version visible wait_for_input wildcard_match + with_mutex with_output_to working_directory write write_canonical write_length + writeln writeq write_term + -- Built-in functions generated in SWI-Prolog via current_arithmetic_function/1. + xor rem rdiv mod div abs acos acosh asin asinh atan2 atan atanh ceil ceiling + copysign cos cosh cputime e epsilon erf erfc eval exp float + float_fractional_part float_integer_part floor gcd getbit inf integer lgamma + log10 log lsb max min msb nan pi popcount powm random random_float rational + rationalize round sign sin sinh sqrt tan tanh truncate +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + + lexer.delimited_range('"', true))) + +-- Comments. +local line_comment = '%' * lexer.nonnewline^0 +local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.digit^1 * + ('.' * lexer.digit^1)^-1)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('-!+\\|=:;&<>()[]{}'))) + +return lex diff --git a/lexlua/props.lua b/lexlua/props.lua new file mode 100644 index 000000000..634a54031 --- /dev/null +++ b/lexlua/props.lua @@ -0,0 +1,33 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Props LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('props', {lex_by_line = true}) + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Colors. +lex:add_rule('color', token('color', '#' * lexer.xdigit * lexer.xdigit * + lexer.xdigit * lexer.xdigit * + lexer.xdigit * lexer.xdigit)) +lex:add_style('color', lexer.STYLE_NUMBER) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) + +-- Equals. +lex:add_rule('equals', token(lexer.OPERATOR, '=')) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") + + lexer.delimited_range('"'))) + +-- Variables. +lex:add_rule('variable', token(lexer.VARIABLE, '$(' * (lexer.any - ')')^1 * + ')')) + +return lex diff --git a/lexlua/protobuf.lua b/lexlua/protobuf.lua new file mode 100644 index 000000000..5891955ea --- /dev/null +++ b/lexlua/protobuf.lua @@ -0,0 +1,45 @@ +-- Copyright 2016-2018 David B. Lamkins <david@lamkins.net>. See License.txt. +-- Protocol Buffer IDL LPeg lexer. +-- <https://developers.google.com/protocol-buffers/> + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('protobuf') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + contained syntax import option package message group oneof optional required + repeated default extend extensions to max reserved service rpc returns +]])) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match[[ + int32 int64 uint32 uint64 sint32 sint64 fixed32 fixed64 sfixed32 sfixed64 + float double bool string bytes enum true false +]])) + +-- Strings. +local sq_str = P('L')^-1 * lexer.delimited_range("'", true) +local dq_str = P('L')^-1 * lexer.delimited_range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Comments. +local line_comment = '//' * lexer.nonnewline_esc^0 +local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('<>=|;,.()[]{}'))) + +return lex diff --git a/lexlua/ps.lua b/lexlua/ps.lua new file mode 100644 index 000000000..a6a211dd7 --- /dev/null +++ b/lexlua/ps.lua @@ -0,0 +1,47 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Postscript LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('ps') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + pop exch dup copy roll clear count mark cleartomark counttomark exec if ifelse + for repeat loop exit stop stopped countexecstack execstack quit start + true false NULL +]])) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match[[ + add div idiv mod mul sub abs ned ceiling floor round truncate sqrt atan cos + sin exp ln log rand srand rrand +]])) + +-- Identifiers. +local word = (lexer.alpha + '-') * (lexer.alnum + '-')^0 +lex:add_rule('identifier', token(lexer.IDENTIFIER, word)) + +-- Strings. +local arrow_string = lexer.delimited_range('<>') +local nested_string = lexer.delimited_range('()', false, false, true) +lex:add_rule('string', token(lexer.STRING, arrow_string + nested_string)) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '%' * lexer.nonnewline^0)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Labels. +lex:add_rule('label', token(lexer.LABEL, '/' * word)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('[]{}'))) + +return lex diff --git a/lexlua/ps.lua.orig b/lexlua/ps.lua.orig new file mode 100644 index 000000000..c6a98faa9 --- /dev/null +++ b/lexlua/ps.lua.orig @@ -0,0 +1,167 @@ +-- Copyright 2017 Marcio Baraco <marciorps@gmail.com>. See LICENSE. +-- Postscript LPeg lexer. + +local l = require('lexer') +local token, word_match = l.token, l.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'ps'} + +-- Whitespace. +local ws = token(l.WHITESPACE, l.space^1) + +-- Comments. +local comment = token(l.COMMENT, '%' * l.nonnewline^0) + +-- Strings. +local nested_string = l.delimited_range('()', false, false, true) +local hex_string = P('<') * (l.xdigit + l.space)^0 * P('>')^-1 +local enc_string = P('<~') * (R('!u') + l.space)^0 * P('~>') +local str = token(l.STRING, nested_string + hex_string + enc_string) + +-- Numbers. +local frac = (P('.') * l.digit^1) +local expo = (S('eE') * S('+-')^-1 * l.digit^1) +local decm = S('+-')^-1 * l.digit ^ 1 * frac^-1 * expo^-1 +local radx = l.digit^-2 * '#' * l.alnum^1 +-- TODO: Accept only chars that fit radix, ie [01] for 2#, hex for 16# and so. +local number = token(l.NUMBER, decm + radx) + +-- PostScript allows almost all characters in names. +local word = (l.graph - S('()<>[]{}/%'))^1 +-- Names. +local identifier = token(l.IDENTIFIER, word) +-- Deferred Names. +local label = token(l.LABEL, '/' * word) +-- Immediately Evaluated Names. +local preproc = token(l.PREPROCESSOR, '//' * word) + +-- Object constructors. +local operator = token(l.OPERATOR, S('[]{}=') + P('<<') + P('>>') + P('==')) + +-- Operators: +-- + l.KEYWORD for basic ops +-- + l.FUNCTION for graphic ops +-- + l.CLASS for weird ps ops +local keyword = token(l.KEYWORD, word_match{ + -- Control operators. + 'exec', 'eexec', 'if', 'ifelse', 'for', 'repeat', 'loop', 'exit', 'stop', + 'stopped', 'countexecstack', 'execstack', 'quit', 'start', + -- Stack manipulation operators. + 'pop', 'exch', 'dup', 'copy', 'index', 'roll', 'clear', 'count', 'mark', + 'cleartomark', 'counttomark', + -- Array and operators. + 'array', 'string', 'length', 'get', 'put', 'getinterval', 'putinterval', + 'aload', 'astore', 'packedarray', 'setpacking', 'currentpacking', 'forall', + 'anchorsearch', 'search', 'token', + -- Dictionary operators. + 'dict', 'maxlength', 'begin', 'end', 'def', 'undef', 'load', 'store', 'known', + 'where', 'currentdict', 'errordict', 'systemdict', 'userdict', 'globaldict', + 'shareddict', 'statusdict', 'countdictstack', 'cleardictstack', 'dictstack', + -- Type, attribute and conversion operators. + 'type', 'cvlit', 'cvx', 'cvi', 'cvn', 'cvrs', 'cvs', 'cvr', 'xcheck', + 'executeonly', 'noaccess', 'readonly', 'rcheck', 'wcheck', + -- Arithmetic and math operators. + 'add', 'div', 'idiv', 'mod', 'mul', 'sub', 'abs', 'neg', 'ceiling', 'floor', + 'round', 'truncate', 'sqrt', 'atan', 'cos', 'sin', 'exp', 'ln', 'log', 'rand', + 'srand', 'rrand', + -- Relational, boolean and bitwise operators. + 'eq', 'ne', 'ge', 'gt', 'le', 'lt', 'and', 'not', 'or', 'xor', 'true', + 'false', 'bitshift', + -- Coordinate system and matrix operators. + 'matrix', 'initmatrix', 'identmatrix', 'defaultmatrix', 'currentmatrix', + 'setmatrix', 'translate', 'scale', 'rotate', 'concat', 'concatmatrix', + 'transform', 'dtransform', 'itransform', 'idtransform', 'invertmatrix', +}) +local func = token(l.FUNCTION, word_match{ + -- Path construction operators. + 'newpath', 'currentpoint', 'moveto', 'rmoveto', 'lineto', 'rlineto', 'arc', + 'arcn', 'arct', 'arcto', 'curveto', 'rcurveto', 'closepath', 'flattenpath', + 'reversepath', 'strokepath', 'ustrokepath', 'charpath', 'uappend', 'clippath', + 'setbbox', 'pathbbox', 'pathforall', 'upath', 'ucache', 'initclip', 'clip', + 'eoclip', 'rectclip', + -- Glyph and font operators. + 'definefont', 'composefont', 'undefinefont', 'findfont', 'scalefont', + 'makefont', 'setfont', 'rootfont', 'currentfont', 'selectfont', 'show', + 'ashow', 'widthshow', 'awidthshow', 'xshow', 'yshow', 'xyshow', 'glyphshow', + 'stringwidth', 'cshow', 'kshow', 'findencoding', 'FontDirectory', + 'GlobalFontDirectory', 'SharedFontDirectory', 'StandardEncoding', + 'ISOLatin1Encoding', 'setcachedevice', 'setcachedevice2', 'setcharwidth', + -- CID Font operators. + 'addglyph', 'beginbfchar', 'beginbfrange', 'begincidchar', 'begincidrange', + 'begincmap', 'begincodespacerange', 'beginnotdefchar', 'beginnotdefrange', + 'beginrearrangedfont', 'beginusematrix', 'endbfchar', 'endbfrange', + 'endcidchar', 'endcidrange', 'endcmap', 'endcodespacerange', 'endnotdefchar', + 'endnotdefrange', 'endrearrangedfont', 'endusermatrix', 'removeall', + 'removeglyphs', 'StartData', 'usecmap', 'usefont', + -- Painting operations. + 'erasepage', 'stroke', 'fill', 'eofill', 'rectstroke', 'rectfill', 'ustroke', + 'ufill', 'ueofill', 'shfill', 'image', 'imagemask', 'colorimage', + -- Insideness testing operators. + 'infill', 'ineofill', 'inufill', 'inueofill', 'instroke', 'inustroke', + -- Form and pattern operators. + 'makepattern', 'setpattern', 'execform', + -- Graphics state operators. + 'gsave', 'grestore', 'clipsave', 'cliprestore', 'grestoreall', 'initgraphics', + 'gstate', 'setgstate', 'currentgstate', 'setlinewidth', 'currentlinewidth', + 'setlinecap', 'currentlinecap', 'setlinejoin', 'currentlinejoin', + 'setmiterlimit', 'currentmiterlimit', 'setstrokeadjust', + 'currentstrokeadjust', 'setdash', 'currentdash', 'setcolorspace', + 'currentcolorspace', 'setcolor', 'setgray', 'currentgray', 'sethsbcolor', + 'currenthsbcolor', 'setrgbcolor', 'currentrgbcolor', 'setcmykcolor', + 'currentcmykcolor', 'sethalftone', 'currenthalftone', 'setscreen', + 'currentscreen', 'setcolorscreen', 'currentcolorscreen', 'settransfer', + 'currenttransfer', 'setcolortransfer', 'currentcolortransfer', + 'setblackgeneration', 'currentblackgeneration', 'setundercolorremoval', + 'currentundercolorremoval', 'setcolorrendering', 'currentcolorrendering', + 'setflat', 'currentflat', 'setoverprint', 'currentoverprint', 'setsmoothness', + 'currentsmoothness', 'currentcolor', + -- Device setup operators. + 'showpage', 'copypage', 'setpagedevice', 'currentpagedevice', 'nulldevice', + 'currenttrapparams', 'settrapparams', 'settrapzone', +}) +local misc = token(l.CLASS, word_match{ + -- Miscellaneous operators + 'defineresource', 'undefineresource', 'findresource', 'findcolorrendering', + 'resourcestatus', 'resourceforall', 'GetHalftoneName', 'GetPageDeviceName', + 'GetSubstituteCRD', 'save', 'restore', 'setglobal', 'setshared', + 'currentglobal', 'gcheck', 'scheck', 'startjob', 'defineuserobject', + 'execuserobject', 'undefineuserobject', 'UserObjects', 'bind', 'null', + 'version', 'realtime', 'usertime', 'languagelevel', 'product', 'revision', + 'serialnumber', 'executive', 'echo', 'prompt', 'setsystemparams', + 'currentsystemparams', 'setuserparams', 'currentuserparams', 'setdevparams', + 'currentdevparams', 'vmreclaim', 'setvmthreshold', 'vmstatus', 'cachestatus', + 'setcachelimit', 'setcacheparams', 'currentcacheparams', 'setucacheparams', + 'ucachestatus', 'currentshared', 'exitserver', 'serverdict', + -- File operators + 'file', 'filter', 'closefile', 'read', 'write', 'readhexstring', + 'writehexstring', 'readstring', 'writestring', 'readline', 'bytesavailable', + 'flush', 'flushfile', 'resetfile', 'status', 'run', 'currentfile', + 'deletefile', 'renamefile', 'filenameforall', 'setfileposition', + 'fileposition', 'print', 'stack', 'pstack', 'printobject', 'writeobject', + 'setobjectformat', 'currentobjectformat', + -- Errors. + 'configurationerror', 'dictfull', 'dictstackoverflow', 'dictstackunderflow', + 'execstackoverflow', 'handleerror', 'interrupt', 'invalidaccess', + 'invalidexit', 'invalidfileaccess', 'invalidfont', 'invalidrestore', + 'ioerror', 'limitcheck', 'nocurrentpoint', 'rangecheck', 'stackoverflow', + 'stackunderflow', 'syntaxerror', 'timeout', 'typecheck', 'undefined', + 'undefinedfilename', 'undefinedresource', 'undefinedresult', 'unmatchedmark', + 'unregistered', 'VMerror', +}) + +M._rules = { + {'whitespace', ws}, + {'comment', comment}, + {'number', number}, + {'preprocessor', preproc}, + {'label', label}, + {'keyword', keyword}, + {'function', func}, + {'class', misc}, + {'operator', operator}, + {'string', str}, + {'identifier', identifier}, +} + +return M diff --git a/lexlua/pure.lua b/lexlua/pure.lua new file mode 100644 index 000000000..87f456eaf --- /dev/null +++ b/lexlua/pure.lua @@ -0,0 +1,50 @@ +-- Copyright 2015-2018 David B. Lamkins <david@lamkins.net>. See License.txt. +-- pure LPeg lexer, see http://purelang.bitbucket.org/ + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('pure') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Comments. +local line_comment = '//' * lexer.nonnewline^0 +local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Pragmas. +local hashbang = lexer.starts_line('#!') * (lexer.nonnewline - '//')^0 +lex:add_rule('pragma', token(lexer.PREPROCESSOR, hashbang)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + namespace with end using interface extern let const def type public private + nonfix outfix infix infixl infixr prefix postfix if otherwise when case of + then else +]])) + +-- Numbers. +local bin = '0' * S('Bb') * S('01')^1 +local hex = '0' * S('Xx') * (R('09') + R('af') + R('AF'))^1 +local dec = R('09')^1 +local int = (bin + hex + dec) * P('L')^-1 +local rad = P('.') - '..' +local exp = (S('Ee') * S('+-')^-1 * int)^-1 +local flt = int * (rad * dec)^-1 * exp + int^-1 * rad * dec * exp +lex:add_rule('number', token(lexer.NUMBER, flt + int)) + +-- Operators. +local punct = S('+-/*%<>~!=^&|?~:;,.()[]{}@#$`\\\'') +local dots = P('..') +lex:add_rule('operator', token(lexer.OPERATOR, dots + punct)) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"', true))) + +return lex diff --git a/lexlua/python.lua b/lexlua/python.lua new file mode 100644 index 000000000..c3e76ac7e --- /dev/null +++ b/lexlua/python.lua @@ -0,0 +1,104 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Python LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('python', {fold_by_indentation = true}) + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + and as assert break class continue def del elif else except exec finally for + from global if import in is lambda nonlocal not or pass print raise return try + while with yield + -- Descriptors/attr access. + __get__ __set__ __delete__ __slots__ + -- Class. + __new__ __init__ __del__ __repr__ __str__ __cmp__ __index__ __lt__ __le__ + __gt__ __ge__ __eq__ __ne__ __hash__ __nonzero__ __getattr__ __getattribute__ + __setattr__ __delattr__ __call__ + -- Operator. + __add__ __sub__ __mul__ __div__ __floordiv__ __mod__ __divmod__ __pow__ + __and__ __xor__ __or__ __lshift__ __rshift__ __nonzero__ __neg__ __pos__ + __abs__ __invert__ __iadd__ __isub__ __imul__ __idiv__ __ifloordiv__ __imod__ + __ipow__ __iand__ __ixor__ __ior__ __ilshift__ __irshift__ + -- Conversions. + __int__ __long__ __float__ __complex__ __oct__ __hex__ __coerce__ + -- Containers. + __len__ __getitem__ __missing__ __setitem__ __delitem__ __contains__ __iter__ + __getslice__ __setslice__ __delslice__ + -- Module and class attribs. + __doc__ __name__ __dict__ __file__ __path__ __module__ __bases__ __class__ + __self__ + -- Stdlib/sys. + __builtin__ __future__ __main__ __import__ __stdin__ __stdout__ __stderr__ + -- Other. + __debug__ __doc__ __import__ __name__ +]])) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match[[ + abs all any apply basestring bool buffer callable chr classmethod cmp coerce + compile complex copyright credits delattr dict dir divmod enumerate eval + execfile exit file filter float frozenset getattr globals hasattr hash help + hex id input int intern isinstance issubclass iter len license list locals + long map max min object oct open ord pow property quit range raw_input reduce + reload repr reversed round set setattr slice sorted staticmethod str sum super + tuple type unichr unicode vars xrange zip +]])) + +-- Constants. +lex:add_rule('constant', token(lexer.CONSTANT, word_match[[ + ArithmeticError AssertionError AttributeError BaseException DeprecationWarning + EOFError Ellipsis EnvironmentError Exception False FloatingPointError + FutureWarning GeneratorExit IOError ImportError ImportWarning IndentationError + IndexError KeyError KeyboardInterrupt LookupError MemoryError NameError None + NotImplemented NotImplementedError OSError OverflowError + PendingDeprecationWarning ReferenceError RuntimeError RuntimeWarning + StandardError StopIteration SyntaxError SyntaxWarning SystemError SystemExit + TabError True TypeError UnboundLocalError UnicodeDecodeError + UnicodeEncodeError UnicodeError UnicodeTranslateError UnicodeWarning + UserWarning ValueError Warning ZeroDivisionError +]])) + +-- Self. +lex:add_rule('self', token('self', P('self'))) +lex:add_style('self', lexer.STYLE_TYPE) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline_esc^0)) + +-- Strings. +local sq_str = P('u')^-1 * lexer.delimited_range("'", true) +local dq_str = P('U')^-1 * lexer.delimited_range('"', true) +local triple_sq_str = "'''" * (lexer.any - "'''")^0 * P("'''")^-1 +local triple_dq_str = '"""' * (lexer.any - '"""')^0 * P('"""')^-1 +-- TODO: raw_strs cannot end in single \. +local raw_sq_str = P('u')^-1 * 'r' * lexer.delimited_range("'", false, true) +local raw_dq_str = P('U')^-1 * 'R' * lexer.delimited_range('"', false, true) +lex:add_rule('string', token(lexer.STRING, triple_sq_str + triple_dq_str + + sq_str + dq_str + raw_sq_str + + raw_dq_str)) + +-- Numbers. +local dec = lexer.digit^1 * S('Ll')^-1 +local bin = '0b' * S('01')^1 * ('_' * S('01')^1)^0 +local oct = '0' * R('07')^1 * S('Ll')^-1 +local integer = S('+-')^-1 * (bin + lexer.hex_num + oct + dec) +lex:add_rule('number', token(lexer.NUMBER, lexer.float + integer)) + +-- Decorators. +lex:add_rule('decorator', token('decorator', '@' * lexer.nonnewline^0)) +lex:add_style('decorator', lexer.STYLE_PREPROCESSOR) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~`'))) + +return lex diff --git a/lexlua/rails.lua b/lexlua/rails.lua new file mode 100644 index 000000000..8d324acd7 --- /dev/null +++ b/lexlua/rails.lua @@ -0,0 +1,54 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Ruby on Rails LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('rails', {inherit = lexer.load('ruby')}) + +-- Whitespace +lex:modify_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Functions. + +-- ActionPack. +lex:modify_rule('function', token(lexer.FUNCTION, word_match[[ + before_filter skip_before_filter skip_after_filter after_filter around_filter + filter filter_parameter_logging layout require_dependency render render_action + render_text render_file render_template render_nothing render_component + render_without_layout rescue_from url_for redirect_to redirect_to_path + redirect_to_url respond_to helper helper_method model service observer + serialize scaffold verify hide_action +]]) + + +-- View helpers. +token(lexer.FUNCTION, word_match[[ + check_box content_for error_messages_for form_for fields_for file_field + hidden_field image_submit_tag label link_to password_field radio_button submit + text_field text_area +]]) + + +-- ActiveRecord +token(lexer.FUNCTION, word_match[[ + after_create after_destroy after_save after_update after_validation + after_validation_on_create after_validation_on_update before_create + before_destroy before_save before_update before_validation + before_validation_on_create before_validation_on_update composed_of belongs_to + has_one has_many has_and_belongs_to_many validate validates validate_on_create + validates_numericality_of validate_on_update validates_acceptance_of + validates_associated validates_confirmation_of validates_each + validates_format_of validates_inclusion_of validates_exclusion_of + validates_length_of validates_presence_of validates_size_of + validates_uniqueness_of + attr_protected attr_accessible attr_readonly accepts_nested_attributes_for + default_scope scope +]]) + + +-- ActiveSupport +token(lexer.FUNCTION, word_match[[ + alias_method_chain alias_attribute delegate cattr_accessor mattr_accessor + returning memoize +]]) + lex:get_rule('function')) + +return lex diff --git a/lexlua/rc.lua b/lexlua/rc.lua new file mode 100644 index 000000000..868ddbc49 --- /dev/null +++ b/lexlua/rc.lua @@ -0,0 +1,54 @@ +-- Copyright 2017-2018 Michael Forney. See License.txt. +-- rc LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('rc') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + for in while if not switch fn builtin cd eval exec exit flag rfork shift + ulimit umask wait whatis . ~ +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +local str = lexer.delimited_range("'", false, true) +local heredoc = '<<' * P(function(input, index) + local s, e, _, delimiter = input:find('[ \t]*(["\']?)([%w!"%%+,-./:?@_~]+)%1', + index) + if s == index and delimiter then + delimiter = delimiter:gsub('[%%+-.?]', '%%%1') + local _, e = input:find('[\n\r]'..delimiter..'[\n\r]', e) + return e and e + 1 or #input + 1 + end +end) +lex:add_rule('string', token(lexer.STRING, str + heredoc)) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.integer + lexer.float)) + +-- Variables. +lex:add_rule('variable', token(lexer.VARIABLE, '$' * S('"#')^-1 * + ('*' + lexer.digit^1 + + lexer.word))) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('@`=!<>*&^|;?()[]{}') + + '\\\n')) + +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '#', lexer.fold_line_comments('#')) + +return lex diff --git a/lexlua/rebol.lua b/lexlua/rebol.lua new file mode 100644 index 000000000..a6fc68e93 --- /dev/null +++ b/lexlua/rebol.lua @@ -0,0 +1,98 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Rebol LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('rebol') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Comments. +local line_comment = ';' * lexer.nonnewline^0; +local block_comment = 'comment' * P(' ')^-1 * + lexer.delimited_range('{}', false, true) +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + abs absolute add and~ at back change clear complement copy cp divide fifth + find first fourth head insert last make max maximum min minimum multiply + negate next or~ pick poke power random remainder remove second select skip + sort subtract tail third to trim xor~ + alias all any arccosine arcsine arctangent bind break browse call + caret-to-offset catch checksum close comment compose compress cosine debase + decompress dehex detab dh-compute-key dh-generate-key dh-make-key difference + disarm do dsa-generate-key dsa-make-key dsa-make-signature + dsa-verify-signature either else enbase entab exclude exit exp foreach form + free get get-modes halt hide if in intersect load log-10 log-2 log-e loop + lowercase maximum-of minimum-of mold not now offset-to-caret open parse prin + print protect q query quit read read-io recycle reduce repeat return reverse + rsa-encrypt rsa-generate-key rsa-make-key save secure set set-modes show sine + size-text square-root tangent textinfo throw to-hex to-local-file + to-rebol-file trace try union unique unprotect unset until update uppercase + use wait while write write-io + basic-syntax-header crlf font-fixed font-sans-serif font-serif list-words + outstr val value + about alert alter append array ask boot-prefs build-tag center-face change-dir + charset choose clean-path clear-fields confine confirm context cvs-date + cvs-version decode-cgi decode-url deflag-face delete demo desktop dirize + dispatch do-boot do-events do-face do-face-alt does dump-face dump-pane echo + editor emailer emit extract find-by-type find-key-face find-window flag-face + flash focus for forall forever forskip func function get-net-info get-style + has help hide-popup import-email inform input insert-event-func join launch + launch-thru layout license list-dir load-image load-prefs load-thru make-dir + make-face net-error open-events parse-email-addrs parse-header + parse-header-date parse-xml path-thru probe protect-system read-net read-thru + reboot reform rejoin remold remove-event-func rename repend replace request + request-color request-date request-download request-file request-list + request-pass request-text resend save-prefs save-user scroll-para send + set-font set-net set-para set-style set-user set-user-name show-popup source + split-path stylize switch throw-on-error to-binary to-bitset to-block to-char + to-date to-decimal to-email to-event to-file to-get-word to-hash to-idate + to-image to-integer to-issue to-list to-lit-path to-lit-word to-logic to-money + to-none to-pair to-paren to-path to-refinement to-set-path to-set-word + to-string to-tag to-time to-tuple to-url to-word unfocus uninstall unview + upgrade Usage vbug view view-install view-prefs what what-dir write-user + return at space pad across below origin guide tabs indent style styles size + sense backcolor do none + action? any-block? any-function? any-string? any-type? any-word? binary? + bitset? block? char? datatype? date? decimal? email? empty? equal? error? + even? event? file? function? get-word? greater-or-equal? greater? hash? head? + image? index? integer? issue? length? lesser-or-equal? lesser? library? list? + lit-path? lit-word? logic? money? native? negative? none? not-equal? number? + object? odd? op? pair? paren? path? port? positive? refinement? routine? same? + series? set-path? set-word? strict-equal? strict-not-equal? string? struct? + tag? tail? time? tuple? unset? url? word? zero? connected? crypt-strength? + exists-key? input? script? type? value? ? ?? dir? exists-thru? exists? + flag-face? found? in-window? info? inside? link-app? link? modified? offset? + outside? screen-offset? size? span? view? viewed? win-offset? within? + action! any-block! any-function! any-string! any-type! any-word! binary! + bitset! block! char! datatype! date! decimal! email! error! event! file! + function! get-word! hash! image! integer! issue! library! list! lit-path! + lit-word! logic! money! native! none! number! object! op! pair! paren! path! + port! refinement! routine! series! set-path! set-word! string! struct! symbol! + tag! time! tuple! unset! url! word! + true false self +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alpha + '-') * + (lexer.alnum + '-')^0)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"', true) + + lexer.delimited_range('{}') + + "'" * lexer.word)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('=<>+/*:()[]'))) + +-- Fold points. +lex:add_fold_point(lexer.COMMENT, '{', '}') +lex:add_fold_point(lexer.COMMENT, ';', lexer.fold_line_comments(';')) +lex:add_fold_point(lexer.OPERATOR, '[', ']') + +return lex diff --git a/lexlua/rest.lua b/lexlua/rest.lua new file mode 100644 index 000000000..b1af7c562 --- /dev/null +++ b/lexlua/rest.lua @@ -0,0 +1,259 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- reStructuredText LPeg lexer. + +local l = require('lexer') +local token, word_match, starts_line = l.token, l.word_match, l.starts_line +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'rest'} + +-- Whitespace. +local ws = token(l.WHITESPACE, S(' \t')^1 + l.newline^1) +local any_indent = S(' \t')^0 + +-- Section titles (2 or more characters). +local adornment_chars = lpeg.C(S('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~')) +local adornment = lpeg.C(adornment_chars^2 * any_indent) * (l.newline + -1) +local overline = lpeg.Cmt(starts_line(adornment), function(input, index, adm, c) + if not adm:find('^%'..c..'+%s*$') then return nil end + local rest = input:sub(index) + local lines = 1 + for line, e in rest:gmatch('([^\r\n]+)()') do + if lines > 1 and line:match('^(%'..c..'+)%s*$') == adm then + return index + e - 1 + end + if lines > 3 or #line > #adm then return nil end + lines = lines + 1 + end + return #input + 1 +end) +local underline = lpeg.Cmt(starts_line(adornment), function(_, index, adm, c) + local pos = adm:match('^%'..c..'+()%s*$') + return pos and index - #adm + pos - 1 or nil +end) +-- Token needs to be a predefined one in order for folder to work. +local title = token(l.CONSTANT, overline + underline) + +-- Lists. +local bullet_list = S('*+-') -- TODO: '•‣⁃', as lpeg does not support UTF-8 +local enum_list = P('(')^-1 * + (l.digit^1 + S('ivxlcmIVXLCM')^1 + l.alnum + '#') * S('.)') +local field_list = ':' * (l.any - ':')^1 * P(':')^-1 +local option_word = l.alnum * (l.alnum + '-')^0 +local option = S('-/') * option_word * (' ' * option_word)^-1 + + '--' * option_word * ('=' * option_word)^-1 +local option_list = option * (',' * l.space^1 * option)^-1 +local list = #(l.space^0 * (S('*+-:/') + enum_list)) * + starts_line(token('list', l.space^0 * (option_list + bullet_list + + enum_list + field_list) * + l.space)) + +-- Literal block. +local block = P('::') * (l.newline + -1) * function(input, index) + local rest = input:sub(index) + local level, quote = #rest:match('^([ \t]*)') + for pos, indent, line in rest:gmatch('()[ \t]*()([^\r\n]+)') do + local no_indent = (indent - pos < level and line ~= ' ' or level == 0) + local quoted = no_indent and line:find(quote or '^%s*%W') + if quoted and not quote then quote = '^%s*%'..line:match('^%s*(%W)') end + if no_indent and not quoted and pos > 1 then return index + pos - 1 end + end + return #input + 1 +end +local literal_block = token('literal_block', block) + +-- Line block. +local line_block_char = token(l.OPERATOR, starts_line(any_indent * '|')) + +local word = l.alpha * (l.alnum + S('-.+'))^0 + +-- Explicit markup blocks. +local prefix = any_indent * '.. ' +local footnote_label = '[' * (l.digit^1 + '#' * word^-1 + '*') * ']' +local footnote = token('footnote_block', prefix * footnote_label * l.space) +local citation_label = '[' * word * ']' +local citation = token('citation_block', prefix * citation_label * l.space) +local link = token('link_block', prefix * '_' * + (l.delimited_range('`') + (P('\\') * 1 + + l.nonnewline - ':')^1) * ':' * l.space) +local markup_block = #prefix * starts_line(footnote + citation + link) + +-- Directives. +local directive_type = word_match({ + -- Admonitions + 'attention', 'caution', 'danger', 'error', 'hint', 'important', 'note', 'tip', + 'warning', 'admonition', + -- Images + 'image', 'figure', + -- Body elements + 'topic', 'sidebar', 'line-block', 'parsed-literal', 'code', 'math', 'rubric', + 'epigraph', 'highlights', 'pull-quote', 'compound', 'container', + -- Table + 'table', 'csv-table', 'list-table', + -- Document parts + 'contents', 'sectnum', 'section-autonumbering', 'header', 'footer', + -- References + 'target-notes', 'footnotes', 'citations', + -- HTML-specific + 'meta', + -- Directives for substitution definitions + 'replace', 'unicode', 'date', + -- Miscellaneous + 'include', 'raw', 'class', 'role', 'default-role', 'title', + 'restructuredtext-test-directive', +}, '-') +local known_directive = token('directive', + prefix * directive_type * '::' * l.space) +local sphinx_directive_type = word_match({ + -- The TOC tree. + 'toctree', + -- Paragraph-level markup. + 'note', 'warning', 'versionadded', 'versionchanged', 'deprecated', 'seealso', + 'rubric', 'centered', 'hlist', 'glossary', 'productionlist', + -- Showing code examples. + 'highlight', 'literalinclude', + -- Miscellaneous + 'sectionauthor', 'index', 'only', 'tabularcolumns' +}, '-') +local sphinx_directive = token('sphinx_directive', + prefix * sphinx_directive_type * '::' * l.space) +local unknown_directive = token('unknown_directive', + prefix * word * '::' * l.space) +local directive = #prefix * starts_line(known_directive + sphinx_directive + + unknown_directive) + +-- Sphinx code block. +local indented_block = function(input, index) + local rest = input:sub(index) + local level = #rest:match('^([ \t]*)') + for pos, indent, line in rest:gmatch('()[ \t]*()([^\r\n]+)') do + if indent - pos < level and line ~= ' ' or level == 0 and pos > 1 then + return index + pos - 1 + end + end + return #input + 1 +end +local code_block = prefix * 'code-block::' * S(' \t')^1 * l.nonnewline^0 * + (l.newline + -1) * indented_block +local sphinx_block = #prefix * token('code_block', starts_line(code_block)) + +-- Substitution definitions. +local substitution = #prefix * + token('substitution', + starts_line(prefix * l.delimited_range('|') * + l.space^1 * word * '::' * l.space)) + +-- Comments. +local line_comment = prefix * l.nonnewline^0 +local bprefix = any_indent * '..' +local block_comment = bprefix * l.newline * indented_block +local comment = #bprefix * + token(l.COMMENT, starts_line(line_comment + block_comment)) + +-- Inline markup. +local em = token('em', l.delimited_range('*')) +local strong = token('strong', ('**' * (l.any - '**')^0 * P('**')^-1)) +local role = token('role', ':' * word * ':' * (word * ':')^-1) +local interpreted = role^-1 * token('interpreted', l.delimited_range('`')) * + role^-1 +local inline_literal = token('inline_literal', + '``' * (l.any - '``')^0 * P('``')^-1) +local link_ref = token('link', + (word + l.delimited_range('`')) * '_' * P('_')^-1 + + '_' * l.delimited_range('`')) +local footnote_ref = token('footnote', footnote_label * '_') +local citation_ref = token('citation', citation_label * '_') +local substitution_ref = token('substitution', l.delimited_range('|', true) * + ('_' * P('_')^-1)^-1) +local link = token('link', l.alpha * (l.alnum + S('-.'))^1 * ':' * + (l.alnum + S('/.+-%@'))^1) +local inline_markup = (strong + em + inline_literal + link_ref + interpreted + + footnote_ref + citation_ref + substitution_ref + link) * + -l.alnum + +-- Other. +local non_space = token(l.DEFAULT, l.alnum * (l.any - l.space)^0) +local escape = token(l.DEFAULT, '\\' * l.any) + +M._rules = { + {'literal_block', literal_block}, + {'list', list}, + {'markup_block', markup_block}, + {'code_block', sphinx_block}, + {'directive', directive}, + {'substitution', substitution}, + {'comment', comment}, + {'title', title}, + {'line_block_char', line_block_char}, + {'whitespace', ws}, + {'inline_markup', inline_markup}, + {'non_space', non_space}, + {'escape', escape} +} + +M._tokenstyles = { + list = l.STYLE_TYPE, + literal_block = l.STYLE_EMBEDDED..',eolfilled', + footnote_block = l.STYLE_LABEL, + citation_block = l.STYLE_LABEL, + link_block = l.STYLE_LABEL, + directive = l.STYLE_KEYWORD, + sphinx_directive = l.STYLE_KEYWORD..',bold', + unknown_directive = l.STYLE_KEYWORD..',italics', + code_block = l.STYLE_EMBEDDED..',eolfilled', + substitution = l.STYLE_VARIABLE, + strong = 'bold', + em = 'italics', + role = l.STYLE_CLASS, + interpreted = l.STYLE_STRING, + inline_literal = l.STYLE_EMBEDDED, + link = 'underlined', + footnote = 'underlined', + citation = 'underlined', +} + +local sphinx_levels = { + ['#'] = 0, ['*'] = 1, ['='] = 2, ['-'] = 3, ['^'] = 4, ['"'] = 5 +} + +-- Section-based folding. +M._fold = function(text, start_pos, start_line, start_level) + local folds, line_starts = {}, {} + for pos in (text..'\n'):gmatch('().-\r?\n') do + line_starts[#line_starts + 1] = pos + end + local style_at, CONSTANT, level = l.style_at, l.CONSTANT, start_level + local sphinx = l.property_int['fold.by.sphinx.convention'] > 0 + local FOLD_BASE = l.FOLD_BASE + local FOLD_HEADER, FOLD_BLANK = l.FOLD_HEADER, l.FOLD_BLANK + for i = 1, #line_starts do + local pos, next_pos = line_starts[i], line_starts[i + 1] + local c = text:sub(pos, pos) + local line_num = start_line + i - 1 + folds[line_num] = level + if style_at[start_pos + pos] == CONSTANT and c:find('^[^%w%s]') then + local sphinx_level = FOLD_BASE + (sphinx_levels[c] or #sphinx_levels) + level = not sphinx and level - 1 or sphinx_level + if level < FOLD_BASE then level = FOLD_BASE end + folds[line_num - 1], folds[line_num] = level, level + FOLD_HEADER + level = (not sphinx and level or sphinx_level) + 1 + elseif c == '\r' or c == '\n' then + folds[line_num] = level + FOLD_BLANK + end + end + return folds +end + +l.property['fold.by.sphinx.convention'] = '0' + +--[[ Embedded languages. +local bash = l.load('bash') +local bash_indent_level +local start_rule = #(prefix * 'code-block' * '::' * l.space^1 * 'bash' * + (l.newline + -1)) * sphinx_directive * + token('bash_begin', P(function(input, index) + bash_indent_level = #input:match('^([ \t]*)', index) + return index + end))]] + +return M diff --git a/lexlua/rexx.lua b/lexlua/rexx.lua new file mode 100644 index 000000000..8542bca8c --- /dev/null +++ b/lexlua/rexx.lua @@ -0,0 +1,76 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Rexx LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('rexx') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match([[ + address arg by call class do drop else end exit expose forever forward guard + if interpret iterate leave method nop numeric otherwise parse procedure pull + push queue raise reply requires return routine result rc say select self sigl + signal super then to trace use when while until +]], true))) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match([[ + abbrev abs address arg beep bitand bitor bitxor b2x center changestr charin + charout chars compare consition copies countstr c2d c2x datatype date delstr + delword digits directory d2c d2x errortext filespec form format fuzz insert + lastpos left length linein lineout lines max min overlay pos queued random + reverse right sign sourceline space stream strip substr subword symbol time + trace translate trunc value var verify word wordindex wordlength wordpos words + xrange x2b x2c x2d + rxfuncadd rxfuncdrop rxfuncquery rxmessagebox rxwinexec + sysaddrexxmacro sysbootdrive sysclearrexxmacrospace syscloseeventsem + sysclosemutexsem syscls syscreateeventsem syscreatemutexsem syscurpos + syscurstate sysdriveinfo sysdrivemap sysdropfuncs sysdroprexxmacro + sysdumpvariables sysfiledelete sysfilesearch sysfilesystemtype sysfiletree + sysfromunicode systounicode sysgeterrortext sysgetfiledatetime sysgetkey + sysini sysloadfuncs sysloadrexxmacrospace sysmkdir sysopeneventsem + sysopenmutexsem sysposteventsem syspulseeventsem sysqueryprocess + sysqueryrexxmacro sysreleasemutexsem sysreorderrexxmacro sysrequestmutexsem + sysreseteventsem sysrmdir syssaverexxmacrospace syssearchpath + syssetfiledatetime syssetpriority syssleep sysstemcopy sysstemdelete + syssteminsert sysstemsort sysswitchsession syssystemdirectory systempfilename + systextscreenread systextscreensize sysutilversion sysversion sysvolumelabel + syswaiteventsem syswaitnamedpipe syswindecryptfile syswinencryptfile syswinver +]], true))) + +-- Identifiers. +local word = lexer.alpha * (lexer.alnum + S('@#$\\.!?_'))^0 +lex:add_rule('identifier', token(lexer.IDENTIFIER, word)) + +-- Strings. +local sq_str = lexer.delimited_range("'", true, true) +local dq_str = lexer.delimited_range('"', true, true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '--' * lexer.nonnewline_esc^0 + + lexer.nested_pair('/*', '*/'))) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Preprocessor. +lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, lexer.starts_line('#') * + lexer.nonnewline^0)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('=!<>+-/\\*%&|^~.,:;(){}'))) + +-- Fold points +lex:add_fold_point(lexer.KEYWORD, 'do', 'end') +lex:add_fold_point(lexer.KEYWORD, 'select', 'return') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, '--', lexer.fold_line_comments('--')) +--lex:add_fold_point(lexer.OPERATOR, ':', ?) + +return lex diff --git a/lexlua/rhtml.lua b/lexlua/rhtml.lua new file mode 100644 index 000000000..ff76f2479 --- /dev/null +++ b/lexlua/rhtml.lua @@ -0,0 +1,20 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- RHTML LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('rhtml', {inherit = lexer.load('html')}) + +-- Embedded Ruby. +local ruby = lexer.load('rails') +local ruby_start_rule = token('rhtml_tag', '<%' * P('=')^-1) +local ruby_end_rule = token('rhtml_tag', '%>') +lex:embed(ruby, ruby_start_rule, ruby_end_rule) +lex:add_style('rhtml_tag', lexer.STYLE_EMBEDDED) + +-- Fold points. +lex:add_fold_point('rhtml_tag', '<%', '%>') + +return lex diff --git a/lexlua/rstats.lua b/lexlua/rstats.lua new file mode 100644 index 000000000..873854d6b --- /dev/null +++ b/lexlua/rstats.lua @@ -0,0 +1,42 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- R LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('rstats') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + break else for if in next repeat return switch try while + Inf NA NaN NULL FALSE TRUE +]])) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match[[ + array character complex data.frame double factor function integer list logical + matrix numeric vector +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + + lexer.delimited_range('"', true))) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) * + P('i')^-1)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('<->+*/^=.,:;|$()[]{}'))) + +return lex diff --git a/lexlua/ruby.lua b/lexlua/ruby.lua new file mode 100644 index 000000000..f8e346fb5 --- /dev/null +++ b/lexlua/ruby.lua @@ -0,0 +1,132 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Ruby LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('ruby') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + BEGIN END alias and begin break case class def defined? do else elsif end + ensure false for if in module next nil not or redo rescue retry return self + super then true undef unless until when while yield __FILE__ __LINE__ +]])) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match[[ + at_exit autoload binding caller catch chop chop! chomp chomp! eval exec exit + exit! fail fork format gets global_variables gsub gsub! iterator? lambda load + local_variables loop open p print printf proc putc puts raise rand readline + readlines require select sleep split sprintf srand sub sub! syscall system + test trace_var trap untrace_var +]]) * -S('.:|')) + +local word_char = lexer.alnum + S('_!?') + +-- Identifiers. +local word = (lexer.alpha + '_') * word_char^0 +lex:add_rule('identifier', token(lexer.IDENTIFIER, word)) + +-- Comments. +local line_comment = '#' * lexer.nonnewline_esc^0 +local block_comment = lexer.starts_line('=begin') * + (lexer.any - lexer.newline * '=end')^0 * + (lexer.newline * '=end')^-1 +lex:add_rule('comment', token(lexer.COMMENT, block_comment + line_comment)) + +local delimiter_matches = {['('] = ')', ['['] = ']', ['{'] = '}'} +local literal_delimitted = P(function(input, index) + local delimiter = input:sub(index, index) + if not delimiter:find('[%w\r\n\f\t ]') then -- only non alpha-numerics + local match_pos, patt + if delimiter_matches[delimiter] then + -- Handle nested delimiter/matches in strings. + local s, e = delimiter, delimiter_matches[delimiter] + patt = lexer.delimited_range(s..e, false, false, true) + else + patt = lexer.delimited_range(delimiter) + end + match_pos = lpeg.match(patt, input, index) + return match_pos or #input + 1 + end +end) + +-- Strings. +local cmd_str = lexer.delimited_range('`') +local lit_cmd = '%x' * literal_delimitted +local lit_array = '%w' * literal_delimitted +local sq_str = lexer.delimited_range("'") +local dq_str = lexer.delimited_range('"') +local lit_str = '%' * S('qQ')^-1 * literal_delimitted +local heredoc = '<<' * P(function(input, index) + local s, e, indented, _, delimiter = + input:find('(%-?)(["`]?)([%a_][%w_]*)%2[\n\r\f;]+', index) + if s == index and delimiter then + local end_heredoc = (#indented > 0 and '[\n\r\f]+ *' or '[\n\r\f]+') + local _, e = input:find(end_heredoc..delimiter, e) + return e and e + 1 or #input + 1 + end +end) +-- TODO: regex_str fails with `obj.method /patt/` syntax. +local regex_str = #P('/') * lexer.last_char_includes('!%^&*([{-=+|:;,?<>~') * + lexer.delimited_range('/', true, false) * S('iomx')^0 +local lit_regex = '%r' * literal_delimitted * S('iomx')^0 +lex:add_rule('string', token(lexer.STRING, (sq_str + dq_str + lit_str + + heredoc + cmd_str + lit_cmd + + lit_array) * S('f')^-1) + + token(lexer.REGEX, regex_str + lit_regex)) + +-- Numbers. +local dec = lexer.digit^1 * ('_' * lexer.digit^1)^0 * S('ri')^-1 +local bin = '0b' * S('01')^1 * ('_' * S('01')^1)^0 +local integer = S('+-')^-1 * (bin + lexer.hex_num + lexer.oct_num + dec) +-- TODO: meta, control, etc. for numeric_literal. +local numeric_literal = '?' * (lexer.any - lexer.space) * -word_char +lex:add_rule('number', token(lexer.NUMBER, lexer.float * S('ri')^-1 + integer + + numeric_literal)) + +-- Variables. +local global_var = '$' * (word + S('!@L+`\'=~/\\,.;<>_*"$?:') + lexer.digit + + '-' * S('0FadiIKlpvw')) +local class_var = '@@' * word +local inst_var = '@' * word +lex:add_rule('variable', token(lexer.VARIABLE, global_var + class_var + + inst_var)) + +-- Symbols. +lex:add_rule('symbol', token('symbol', ':' * P(function(input, index) + if input:sub(index - 2, index - 2) ~= ':' then return index end +end) * (word_char^1 + sq_str + dq_str))) +lex:add_style('symbol', lexer.STYLE_CONSTANT) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('!%^&*()[]{}-=+/|:;.,?<>~'))) + +-- Fold points. +local function disambiguate(text, pos, line, s) + return line:sub(1, s - 1):match('^%s*$') and + not text:sub(1, pos - 1):match('\\[ \t]*\r?\n$') and 1 or 0 +end +lex:add_fold_point(lexer.KEYWORD, 'begin', 'end') +lex:add_fold_point(lexer.KEYWORD, 'class', 'end') +lex:add_fold_point(lexer.KEYWORD, 'def', 'end') +lex:add_fold_point(lexer.KEYWORD, 'do', 'end') +lex:add_fold_point(lexer.KEYWORD, 'for', 'end') +lex:add_fold_point(lexer.KEYWORD, 'module', 'end') +lex:add_fold_point(lexer.KEYWORD, 'case', 'end') +lex:add_fold_point(lexer.KEYWORD, 'if', disambiguate) +lex:add_fold_point(lexer.KEYWORD, 'while', disambiguate) +lex:add_fold_point(lexer.KEYWORD, 'unless', disambiguate) +lex:add_fold_point(lexer.KEYWORD, 'until', disambiguate) +lex:add_fold_point(lexer.OPERATOR, '(', ')') +lex:add_fold_point(lexer.OPERATOR, '[', ']') +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '=begin', '=end') +lex:add_fold_point(lexer.COMMENT, '#', lexer.fold_line_comments('#')) + +return lex diff --git a/lexlua/rust.lua b/lexlua/rust.lua new file mode 100644 index 000000000..16172620f --- /dev/null +++ b/lexlua/rust.lua @@ -0,0 +1,68 @@ +-- Copyright 2015-2018 Alejandro Baez (https://keybase.io/baez). See License.txt. +-- Rust LPeg lexer. + +local lexer = require("lexer") +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('rust') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + abstract alignof as become box break const continue crate do else enum extern + false final fn for if impl in let loop macro match mod move mut offsetof + override priv proc pub pure ref return Self self sizeof static struct super + trait true type typeof unsafe unsized use virtual where while yield +]])) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, lexer.word^1 * S("!"))) + +-- Library types +lex:add_rule('library', token(lexer.LABEL, lexer.upper * + (lexer.lower + lexer.dec_num)^1)) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match[[ + () bool isize usize char str u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 +]])) + +-- Strings. +local sq_str = P('L')^-1 * lexer.delimited_range("'") +local dq_str = P('L')^-1 * lexer.delimited_range('"') +local raw_str = '#"' * (lexer.any - '#')^0 * P('#')^-1 +lex:add_rule('string', token(lexer.STRING, dq_str + raw_str)) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Comments. +local line_comment = '//' * lexer.nonnewline_esc^0 +local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, + lexer.float + + P('0b')^-1 * (lexer.dec_num + "_")^1 + + lexer.integer)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, + S('+-/*%<>!=`^~@&|?#~:;,.()[]{}'))) + +-- Attributes. +lex:add_rule('preprocessor', token(lexer.PREPROCESSOR, + "#[" * (lexer.nonnewline - ']')^0 * + P("]")^-1)) + +-- Fold points. +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, '//', lexer.fold_line_comments('//')) +lex:add_fold_point(lexer.OPERATOR, '(', ')') +lex:add_fold_point(lexer.OPERATOR, '{', '}') + +return lex diff --git a/lexlua/sass.lua b/lexlua/sass.lua new file mode 100644 index 000000000..5a1bd944b --- /dev/null +++ b/lexlua/sass.lua @@ -0,0 +1,24 @@ +-- Copyright 2006-2018 Robert Gieseke. See License.txt. +-- Sass CSS preprocessor LPeg lexer. +-- http://sass-lang.com + +local lexer = require('lexer') +local token = lexer.token +local P, S = lpeg.P, lpeg.S + +local lex = lexer.new('sass', {inherit = lexer.load('css')}) + +-- Line comments. +lex:add_rule('line_comment', token(lexer.COMMENT, '//' * lexer.nonnewline^0)) + +-- Variables. +lex:add_rule('variable', token(lexer.VARIABLE, '$' * (lexer.alnum + S('_-'))^1)) + +-- Mixins. +lex:add_rule('mixin', token('mixin', P('@') * lexer.word)) +lex:add_style('mixin', lexer.STYLE_FUNCTION) + +-- Fold points. +lex:add_fold_point(lexer.COMMENT, '//', lexer.fold_line_comments('//')) + +return lex diff --git a/lexlua/scala.lua b/lexlua/scala.lua new file mode 100644 index 000000000..c7119c321 --- /dev/null +++ b/lexlua/scala.lua @@ -0,0 +1,61 @@ +-- Copyright 2006-2018 JMS. See License.txt. +-- Scala LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('scala') + +-- Whitespace. +local ws = token(lexer.WHITESPACE, lexer.space^1) +lex:add_rule('whitespace', ws) + +-- Classes. +lex:add_rule('class', token(lexer.KEYWORD, P('class')) * ws^1 * + token(lexer.CLASS, lexer.word)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + abstract case catch class def do else extends false final finally for forSome + if implicit import lazy match new null object override package private + protected return sealed super this throw trait try true type val var while + with yield +]])) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match[[ + Array Boolean Buffer Byte Char Collection Double Float Int Iterator LinkedList + List Long Map None Option Set Short SortedMap SortedSet String TreeMap TreeSet +]])) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, lexer.word) * #P('(')) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +local symbol = "'" * lexer.word +local dq_str = lexer.delimited_range('"', true) +local tq_str = '"""' * (lexer.any - '"""')^0 * P('"""')^-1 +lex:add_rule('string', token(lexer.STRING, tq_str + symbol + dq_str)) + +-- Comments. +local line_comment = '//' * lexer.nonnewline_esc^0 +local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) * + S('LlFfDd')^-1)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}'))) + +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, '//', lexer.fold_line_comments('//')) + +return lex diff --git a/lexlua/scheme.lua b/lexlua/scheme.lua new file mode 100644 index 000000000..2050b26c9 --- /dev/null +++ b/lexlua/scheme.lua @@ -0,0 +1,80 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Scheme LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('scheme') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + and begin case cond cond-expand define define-macro delay do else fluid-let if + lambda let let* letrec or quasiquote quote set! +]])) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match[[ + abs acos angle append apply asin assoc assq assv atan car cdr caar cadr cdar + cddr caaar caadr cadar caddr cdaar cdadr cddar cdddr + call-with-current-continuation call-with-input-file call-with-output-file + call-with-values call/cc catch ceiling char->integer char-downcase char-upcase + close-input-port close-output-port cons cos current-input-port + current-output-port delete-file display dynamic-wind eval exit exact->inexact + exp expt file-or-directory-modify-seconds floor force for-each gcd gensym + get-output-string getenv imag-part integer->char lcm length list list->string + list->vector list-ref list-tail load log magnitude make-polar make-rectangular + make-string make-vector map max member memq memv min modulo newline nil not + number->string open-input-file open-input-string open-output-file + open-output-string peek-char quotient read read-char read-line real-part + remainder reverse reverse! round set-car! set-cdr! sin sqrt string + string->list string->number string->symbol string-append string-copy + string-fill! string-length string-ref string-set! substring symbol->string + system tan truncate values vector vector->list vector-fill! vector-length + vector-ref vector-set! with-input-from-file with-output-to-file write + write-char + boolean? char-alphabetic? char-ci<=? char-ci<? char-ci=? char-ci>=? char-ci>? + char-lower-case? char-numeric? char-ready? char-upper-case? char-whitespace? + char<=? char<? char=? char>=? char>? char? complex? eof-object? eq? equal? + eqv? even? exact? file-exists? inexact? input-port? integer? list? negative? + null? number? odd? output-port? pair? port? positive? procedure? rational? + real? string-ci<=? string-ci<? string-ci=? string-ci>=? string-ci>? string<=? + string<? string=? string>=? string>? string? symbol? vector? zero? + #t #f +]])) + +local word = (lexer.alpha + S('-!?')) * (lexer.alnum + S('-!?'))^0 + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, word)) + +-- Strings. +local literal = (P("'") + '#' * S('\\bdox')) * lexer.word +local dq_str = lexer.delimited_range('"') +lex:add_rule('string', token(lexer.STRING, literal + dq_str)) + +-- Comments. +local line_comment = ';' * lexer.nonnewline^0 +local block_comment = '#|' * (lexer.any - '|#')^0 * P('|#')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, P('-')^-1 * lexer.digit^1 * + (S('./') * lexer.digit^1)^-1)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('<>=*/+-`@%:()'))) + +-- Entity. +lex:add_rule('entity', token('entity', '&' * word)) +lex:add_style('entity', lexer.STYLE_VARIABLE) + +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '(', ')') +lex:add_fold_point(lexer.COMMENT, '#|', '|#') +lex:add_fold_point(lexer.COMMENT, ';', lexer.fold_line_comments(';')) + +return lex diff --git a/lexlua/smalltalk.lua b/lexlua/smalltalk.lua new file mode 100644 index 000000000..3cf0d2656 --- /dev/null +++ b/lexlua/smalltalk.lua @@ -0,0 +1,46 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Smalltalk LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('smalltalk') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + true false nil self super isNil not Smalltalk Transcript +]])) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match[[ + Date Time Boolean True False Character String Array Symbol Integer Object +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") + + '$' * lexer.word)) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, + lexer.delimited_range('"', false, true))) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S(':=_<>+-/*!()[]'))) + +-- Labels. +lex:add_rule('label', token(lexer.LABEL, '#' * lexer.word)) + +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '[', ']') + +return lex diff --git a/lexlua/sml.lua b/lexlua/sml.lua new file mode 100644 index 000000000..4b8faf625 --- /dev/null +++ b/lexlua/sml.lua @@ -0,0 +1,113 @@ +-- Copyright 2017-2018 Murray Calavera. See License.txt. +-- Standard ML LPeg lexer. + +local lexer = require('lexer') +local token = lexer.token + +function mlword(words) + return lexer.word_match(words, "'") +end + +local ws = token(lexer.WHITESPACE, lexer.space^1) + +-- single line comments are valid in successor ml +local cl = '(*)' * lexer.nonnewline^0 +local comment = token(lexer.COMMENT, cl + lexer.nested_pair('(*', '*)')) + +local string = token(lexer.STRING, lpeg.P('#')^-1 * + lexer.delimited_range('"', true)) + +local function num(digit) + return digit * (digit^0 * lpeg.P('_'))^0 * digit^1 + digit +end + +local int = num(lexer.digit) +local frac = lpeg.P('.') * int +local minus = lpeg.P('~')^-1 +local exp = lpeg.S('eE') * minus * int +local real = int * frac^-1 * exp + int * frac * exp^-1 +local hex = num(lexer.xdigit) +local bin = num(lpeg.S('01')) + +local number = token(lexer.NUMBER, + lpeg.P('0w') * int + + (lpeg.P('0wx') + lpeg.P('0xw')) * hex + + (lpeg.P('0wb') + lpeg.P('0bw')) * bin + + minus * lpeg.P('0x') * hex + + minus * lpeg.P('0b') * bin + + minus * real + + minus * int +) + +local keyword = token(lexer.KEYWORD, mlword{ + 'abstype', 'and', 'andalso', 'as', 'case', 'do', 'datatype', 'else', 'end', + 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix', 'infixr', 'let', + 'local', 'nonfix', 'of', 'op', 'orelse', 'raise', 'rec', 'then', + 'type', 'val', 'with', 'withtype', 'while', + + 'eqtype', 'functor', 'include', 'sharing', 'sig', 'signature', + 'struct', 'structure' +}) + +-- includes valid symbols for identifiers +local operator = token(lexer.OPERATOR, + lpeg.S('!*/+-^:@=<>()[]{},;._|#%&$?~`\\')) + +local type = token(lexer.TYPE, mlword{ + 'int', 'real', 'word', 'bool', 'char', 'string', 'unit', + 'array', 'exn', 'list', 'option', 'order', 'ref', 'substring', 'vector' +}) + +-- `real`, `vector` and `substring` are a problem +local func = token(lexer.FUNCTION, mlword{ + 'app', 'before', 'ceil', 'chr', 'concat', 'exnMessage', 'exnName', + 'explode', 'floor', 'foldl', 'foldr', 'getOpt', 'hd', 'ignore', + 'implode', 'isSome', 'length', 'map', 'not', 'null', 'ord', 'print', + 'real', 'rev', 'round', 'size', 'str', 'substring', 'tl', 'trunc', + 'valOf', 'vector', + 'o', 'abs', 'mod', 'div' +}) + +-- non-symbolic identifiers only +local id = (lexer.alnum + "'" + '_')^0 +local aid = lexer.alpha * id +local longid = (aid * lpeg.P('.'))^0 * aid +local identifier = token(lexer.IDENTIFIER, lexer.lower * id) +local typevar = token(lexer.VARIABLE, "'" * id) +local c = mlword{'true', 'false', 'nil'} +local const = token(lexer.CONSTANT, lexer.upper * id + c) +local structure = token(lexer.CLASS, aid * lpeg.P('.')) + +local open + = token(lexer.KEYWORD, mlword{'open', 'structure', 'functor'}) + * ws * token(lexer.CLASS, longid) + +local struct_dec + = token(lexer.KEYWORD, lpeg.P('structure')) * ws + * token(lexer.CLASS, aid) * ws + * token(lexer.OPERATOR, lpeg.P('=')) * ws + +local struct_new = struct_dec * token(lexer.KEYWORD, lpeg.P('struct')) +local struct_alias = struct_dec * token(lexer.CLASS, longid) + +local M = {_NAME = 'sml'} + +M._rules = { + {'whitespace', ws}, + {'comment', comment}, + {'number', number}, + {'struct_new', struct_new}, + {'struct_alias', struct_alias}, + {'structure', structure}, + {'open', open}, + {'type', type}, + {'keyword', keyword}, + {'function', func}, + {'string', string}, + {'operator', operator}, + {'typevar', typevar}, + {'constant', const}, + {'identifier', identifier}, +} + +return M diff --git a/lexlua/snobol4.lua b/lexlua/snobol4.lua new file mode 100644 index 000000000..9c8682bc2 --- /dev/null +++ b/lexlua/snobol4.lua @@ -0,0 +1,65 @@ +-- Copyright 2013-2018 Michael T. Richter. See License.txt. +-- SNOBOL4 lexer. +-- This lexer works with classic SNOBOL4 as well as the CSNOBOL4 extensions. + +local lexer = require 'lexer' +local token, word_match = lexer.token, lexer.word_match +local B, P, R, S, V = lpeg.B, lpeg.P, lpeg.R, lpeg.S, lpeg.V + +local M = { _NAME = 'snobol4' } + +-- Helper patterns. +local dotted_id = lexer.word * (P'.' * lexer.word)^0 + +local dq_str = lexer.delimited_range('"', true, true) +local sq_str = lexer.delimited_range("'", true, true) + +local branch = B(lexer.space * P':(') * dotted_id * #P')' +local sbranch = B(lexer.space * P':' * S'SF' * '(') * dotted_id * #P')' +local sbranchx = B(P')' * S'SF' * P'(') * dotted_id * #P')' + +-- Token definitions. +local bif = token(lexer.FUNCTION, word_match({ + 'APPLY', 'ARRAY', 'CHAR', 'CONVERT', 'COPY', 'DATA', 'DATE', 'DIFFER', 'DUPL', + 'EQ', 'EVAL', 'FILE_ABSPATH', 'FILE_ISDIR', 'FREEZE', 'FUNCTION', 'GE', 'GT', + 'HOST', 'IDENT', 'INTEGER', 'IO_FINDUNIT', 'ITEM', 'LABEL', 'LOAD', 'LPAD', + 'LE', 'LGT', 'LT', 'NE', 'OPSYN', 'ORD', 'PROTOTYPE', 'REMDR', 'REPLACE', + 'REVERSE', 'RPAD', 'RSORT', 'SERV_LISTEN', 'SET', 'SETEXIT', 'SIZE', 'SORT', + 'SQRT', 'SSET', 'SUBSTR', 'TABLE', 'THAW', 'TIME', 'TRACE', 'TRIM', 'UNLOAD', + 'VALUE', 'VDIFFER', +}, '', true) * #lexer.delimited_range('()', false, true, true)) +local comment = token(lexer.COMMENT, + lexer.starts_line(S'*#|;!' * lexer.nonnewline^0)) +local control = token(lexer.PREPROCESSOR, lexer.starts_line(P'-' * lexer.word)) +local identifier = token(lexer.DEFAULT, dotted_id) +local keyword = token(lexer.KEYWORD, word_match({ + 'ABORT', 'ARRAY', 'CONTINUE', 'DEFINE', 'END', 'FRETURN', 'INPUT', 'NRETURN', + 'OUTPUT', 'PUNCH', 'RETURN', 'SCONTINUE', 'TABLE', +}, '', true) + P'&' * lexer.word) +local label = token(lexer.LABEL, lexer.starts_line(dotted_id)) +local number = token(lexer.NUMBER, lexer.float + lexer.integer) +local operator = token(lexer.OPERATOR, S'¬?$.!%*/#+-@⊥&^~\\=') +local pattern = lexer.token(lexer.CLASS, word_match({ -- keep distinct + 'ABORT', 'ANY', 'ARB', 'ARBNO', 'BAL', 'BREAK', 'BREAKX', 'FAIL', 'FENCE', + 'LEN', 'NOTANY', 'POS', 'REM', 'RPOS', 'RTAB', 'SPAN', 'SUCCEED', 'TAB', +}, '', true) * #lexer.delimited_range('()', false, true, true)) +local str = token(lexer.STRING, sq_str + dq_str) +local target = token(lexer.LABEL, branch + sbranch + sbranchx) +local ws = token(lexer.WHITESPACE, lexer.space^1) + +M._rules = { + { 'comment', comment }, + { 'control', control }, + { 'string', str }, + { 'number', number }, + { 'keyword', keyword }, + { 'label', label }, + { 'target', target }, + { 'pattern', pattern }, + { 'built-in', bif }, + { 'operator', operator }, + { 'identifier', identifier }, + { 'whitespace', ws }, +} + +return M diff --git a/lexlua/sql.lua b/lexlua/sql.lua new file mode 100644 index 000000000..23bf44173 --- /dev/null +++ b/lexlua/sql.lua @@ -0,0 +1,59 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- SQL LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('sql') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match([[ + add all alter analyze and as asc asensitive before between bigint binary blob + both by call cascade case change char character check collate column condition + connection constraint continue convert create cross current_date current_time + current_timestamp current_user cursor database databases day_hour + day_microsecond day_minute day_second dec decimal declare default delayed + delete desc describe deterministic distinct distinctrow div double drop dual + each else elseif enclosed escaped exists exit explain false fetch float for + force foreign from fulltext goto grant group having high_priority + hour_microsecond hour_minute hour_second if ignore in index infile inner inout + insensitive insert int integer interval into is iterate join key keys kill + leading leave left like limit lines load localtime localtimestamp lock long + longblob longtext loop low_priority match mediumblob mediumint mediumtext + middleint minute_microsecond minute_second mod modifies natural not + no_write_to_binlog null numeric on optimize option optionally or order out + outer outfile precision primary procedure purge read reads real references + regexp rename repeat replace require restrict return revoke right rlike schema + schemas second_microsecond select sensitive separator set show smallint soname + spatial specific sql sqlexception sqlstate sqlwarning sql_big_result + sql_calc_found_rows sql_small_result ssl starting straight_join table + terminated text then tinyblob tinyint tinytext to trailing trigger true undo + union unique unlock unsigned update usage use using utc_date utc_time + utc_timestamp values varbinary varchar varcharacter varying when where while + with write xor year_month zerofill +]], true))) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") + + lexer.delimited_range('"') + + lexer.delimited_range('`'))) + +-- Comments. +local line_comment = (P('--') + '#') * lexer.nonnewline^0 +local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S(',()'))) + +return lex diff --git a/lexlua/taskpaper.lua b/lexlua/taskpaper.lua new file mode 100644 index 000000000..060e1cbc3 --- /dev/null +++ b/lexlua/taskpaper.lua @@ -0,0 +1,60 @@ +-- Copyright (c) 2016-2018 Larry Hynes. See License.txt. +-- Taskpaper LPeg lexer + +local lexer = require('lexer') +local token = lexer.token +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'taskpaper'} + +local delimiter = P(' ') + P('\t') + +-- Whitespace +local ws = token(lexer.WHITESPACE, lexer.space^1) + +-- Tags +local day_tag = token('day_tag', (P('@today') + P('@tomorrow'))) + +local overdue_tag = token('overdue_tag', P('@overdue')) + +local plain_tag = token('plain_tag', P('@') * lexer.word) + +local extended_tag = token('extended_tag', + P('@') * lexer.word * P('(') * + (lexer.word + R('09') + P('-'))^1 * P(')')) + +-- Projects +local project = token('project', + lexer.nested_pair(lexer.starts_line(lexer.alnum), ':') * + lexer.newline) + +-- Notes +local note = token('note', delimiter^1 * lexer.alnum * lexer.nonnewline^0) + +-- Tasks +local task = token('task', delimiter^1 * P('-') + lexer.newline) + +M._rules = { + {'note', note}, + {'task', task}, + {'project', project}, + {'extended_tag', extended_tag}, + {'day_tag', day_tag}, + {'overdue_tag', overdue_tag}, + {'plain_tag', plain_tag}, + {'whitespace', ws}, +} + +M._tokenstyles = { + note = lexer.STYLE_CONSTANT, + task = lexer.STYLE_FUNCTION, + project = lexer.STYLE_TAG, + extended_tag = lexer.STYLE_COMMENT, + day_tag = lexer.STYLE_CLASS, + overdue_tag = lexer.STYLE_PREPROCESSOR, + plain_tag = lexer.STYLE_COMMENT, +} + +M._LEXBYLINE = true + +return M diff --git a/lexlua/tcl.lua b/lexlua/tcl.lua new file mode 100644 index 000000000..5ac96f85f --- /dev/null +++ b/lexlua/tcl.lua @@ -0,0 +1,49 @@ +-- Copyright 2014-2018 Joshua Krämer. See License.txt. +-- Tcl LPeg lexer. +-- This lexer follows the TCL dodekalogue (http://wiki.tcl.tk/10259). +-- It is based on the previous lexer by Mitchell. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('tcl') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Comment. +lex:add_rule('comment', token(lexer.COMMENT, '#' * P(function(input, index) + local i = index - 2 + while i > 0 and input:find('^[ \t]', i) do i = i - 1 end + if i < 1 or input:find('^[\r\n;]', i) then return index end +end) * lexer.nonnewline^0)) + +-- Separator (semicolon). +lex:add_rule('separator', token(lexer.CLASS, P(';'))) + +-- Argument expander. +lex:add_rule('expander', token(lexer.LABEL, P('{*}'))) + +-- Delimiters. +lex:add_rule('braces', token(lexer.KEYWORD, S('{}'))) +lex:add_rule('quotes', token(lexer.FUNCTION, '"')) +lex:add_rule('brackets', token(lexer.VARIABLE, S('[]'))) + +-- Variable substitution. +lex:add_rule('variable', token(lexer.STRING, '$' * + (lexer.alnum + '_' + P(':')^2)^0)) + +-- Backslash substitution. +lex:add_rule('backslash', token(lexer.TYPE, + '\\' * (lexer.digit * lexer.digit^-2 + + 'x' * lexer.xdigit^1 + + 'u' * lexer.xdigit * lexer.xdigit^-3 + + 'U' * lexer.xdigit * lexer.xdigit^-7 + + 1))) + +-- Fold points. +lex:add_fold_point(lexer.KEYWORD, '{', '}') +lex:add_fold_point(lexer.COMMENT, '#', lexer.fold_line_comments('#')) + +return lex diff --git a/lexlua/template.txt b/lexlua/template.txt new file mode 100644 index 000000000..730479384 --- /dev/null +++ b/lexlua/template.txt @@ -0,0 +1,38 @@ +-- ? LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('?') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + keyword1 keyword2 keyword3 +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") + + lexer.delimited_range('"'))) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('+-*/%^=<>,.{}[]()'))) + +-- Fold points. +lex:add_fold_point(lexer.KEYWORD, 'start', 'end') +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '#', lexer.fold_line_comments('#')) + +return lex diff --git a/lexlua/tex.lua b/lexlua/tex.lua new file mode 100644 index 000000000..185c543ee --- /dev/null +++ b/lexlua/tex.lua @@ -0,0 +1,34 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Plain TeX LPeg lexer. +-- Modified by Robert Gieseke. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('tex') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '%' * lexer.nonnewline^0)) + +-- TeX environments. +lex:add_rule('environment', token('environment', '\\' * (P('begin') + 'end') * + lexer.word)) +lex:add_style('environment', lexer.STYLE_KEYWORD) + +-- Commands. +lex:add_rule('command', token(lexer.KEYWORD, '\\' * + (lexer.alpha^1 + S('#$&~_^%{}')))) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('$&#{}[]'))) + +-- Fold points. +lex:add_fold_point(lexer.COMMENT, '%', lexer.fold_line_comments('%')) +lex:add_fold_point('environment', '\\begin', '\\end') +lex:add_fold_point(lexer.OPERATOR, '{', '}') + +return lex diff --git a/lexlua/texinfo.lua b/lexlua/texinfo.lua new file mode 100644 index 000000000..9a742f251 --- /dev/null +++ b/lexlua/texinfo.lua @@ -0,0 +1,222 @@ +-- Copyright 2014-2018 stef@ailleurs.land. See License.txt. +-- Plain Texinfo version 5.2 LPeg lexer +-- Freely inspired from Mitchell work and valuable help from him too ! + +-- Directives are processed (more or less) in the Reference Card Texinfo order +-- Reference Card page for each directive group is in comment for reference + +--[[ +Note: Improving Fold Points use with Texinfo + +At the very beginning of your Texinfo file, it could be wised to insert theses +alias : + +@alias startchapter = comment +@alias endchapter = comment + +Then use this to begin each chapter : + +@endchapter -------------------------------------------------------------------- +@chapter CHAPTER TITLE +@startchapter ------------------------------------------------------------------ + +With the use of Scintilla's `SCI_FOLDALL(SC_FOLDACTION_TOGGLE)` or Textadept's +`buffer:fold_all(buffer.FOLDACTION_TOGGLE)`, you have then a nice chapter +folding, useful with large documents. +]] + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('texinfo') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Directives. +local directives_base = word_match([[ + end + -- Custom keywords for chapter folding + startchapter endchapter + -- List and tables (page 2, column 2) + itemize enumerate + -- Beginning a Texinfo document (page 1, column 1) + titlepage copying + -- Block environments (page 2, column 1) + cartouche + -- Block environments > Displays using fixed-width fonts (page 2, column 2) + example smallexample + -- List and tables (page 2, column 2) + multitable + -- Floating Displays (page 2, column 3) + float listoffloats caption shortcaption image + -- Floating Displays > Footnotes (page 2, column 3) + footnote footnotestyle + -- Conditionally (in)visible text > Output formats (page 3, column 3) + ifdocbook ifhtml ifinfo ifplaintext iftex ifxml ifnotdocbook ifnothtml + ifnotplaintext ifnottex ifnotxml ifnotinfo inlinefmt inlinefmtifelse + -- Conditionally (in)visible text > Raw formatter text (page 4, column 1) + docbook html tex xml inlineraw + -- Conditionally (in)visible text > Documents variables (page 4, column 1) + set clear value ifset ifclear inlineifset inlineifclear + -- Conditionally (in)visible text > Testing for commands (page 4, column 1) + ifcommanddefined ifcommandnotdefined end + -- Defining new Texinfo commands (page 4, column 1) + alias macro unmacro definfounclose + -- File inclusion (page 4, column 1) + include verbatiminclude + -- Formatting and headers footers for TeX (page 4, column 1) + allowcodebreaks finalout fonttextsize + -- Formatting and headers footers for TeX > paper size (page 4, column 2) + smallbook afourpaper afivepaper afourlatex afourwide pagesizes + -- Formatting and headers footers for TeX > Page headers and footers (page 4, + -- column 2) + -- not implemented + -- Document preferences (page 4, column 2) + -- not implemented + -- Ending a Texinfo document (page 4, column 2) + bye +]], true) +lex:add_rule('directive', token('directives', ('@end' * lexer.space^1 + '@') * + directives_base)) +lex:add_style('directives', lexer.STYLE_FUNCTION) + +-- Chapters. +local chapters_base = word_match([[ + -- Chapter structuring (page 1, column 2) + lowersections raisesections part + -- Chapter structuring > Numbered, included in contents (page 1, column 2) + chapter centerchap + -- Chapter structuring > Context-dependent, included in contents (page 1, + -- column 2) + section subsection subsubsection + -- Chapter structuring > Unumbered, included in contents (page 1, column 2) + unnumbered unnumberedsec unnumberedsubsec unnumberedsubsection + unnumberedsubsubsec unnumberedsubsubsection + -- Chapter structuring > Letter and numbered, included in contents (page 1, + -- column 2) + appendix appendixsec appendixsection appendixsubsec appendixsubsection + appendixsubsubsec appendixsubsubsection + -- Chapter structuring > Unumbered, not included in contents, no new page + -- (page 1, column 3) + chapheading majorheading heading subheading subsubheading +]], true) +lex:add_rule('chapter', token('chapters', ('@end' * lexer.space^1 + '@') * + chapters_base)) +lex:add_style('chapters', lexer.STYLE_CLASS) + +-- Common keywords. +local keyword_base = word_match([[ + end + -- Beginning a Texinfo document (page 1, column 1) + setfilename settitle insertcopying + -- Beginning a Texinfo document > Internationlization (page 1, column 1) + documentencoding documentlanguage frenchspacing + -- Beginning a Texinfo document > Info directory specification and HTML + -- document description (page 1, column 1) + dircategory direntry documentdescription + -- Beginning a Texinfo document > Titre pages (page 1, column 1) + shorttitlepage center titlefont title subtitle author + -- Beginning a Texinfo document > Tables of contents (page 1, column 2) + shortcontents summarycontents contents setcontentsaftertitlepage + setshortcontentsaftertitlepage + -- Nodes (page 1, column 2) + node top anchor novalidate + -- Menus (page 1, column 2) + menu detailmenu + -- Cross references > Within the Info system (page 1, column 3) + xref pxref ref inforef xrefautomaticsectiontitle + -- Cross references > Outside of info (page 1, column 3) + url cite + -- Marking text > Markup for regular text (page 1, column 3) + var dfn acronym abbr + -- Marking text > Markup for litteral text (page 1, column 3) + code file command env option kbd key email indicateurl samp verb + -- Marking text > GUI sequences (page 2, column 1) + clicksequence click clickstyle arrow + -- Marking text > Math (page 2, column 1) + math minus geq leq + -- Marking text > Explicit font selection (page 2, column 1) + sc r i slanted b sansserif t + -- Block environments (page 2, column 1) + noindent indent exdent + -- Block environments > Normally filled displays using regular text fonts + -- (page 2, column 1) + quotation smallquotation indentedblock smallindentedblock raggedright + -- Block environments > Line-for-line displays using regular test fonts (page + -- 2, column 2) + format smallformat display smalldisplay flushleft flushright + -- Block environments > Displays using fixed-width fonts (page 2, column 2) + lisp smalllisp verbatim + -- List and tables (page 2, column 2) + table ftable vtable tab item itemx headitem headitemfont asis + -- Indices (page 2, column 3) + cindex findex vindex kindex pindex tindex defcodeindex syncodeindex synindex + printindex + -- Insertions within a paragraph > Characters special to Texinfo (page 2, + -- column 3) + @ { } backslashcar comma hashcar : . ? ! dmn + -- Insertions within a paragraph > Accents (page 3, column 1) + -- not implemented + -- Insertions within a paragraph > Non-English characters (page 3, column 1) + -- not implemented + -- Insertions within a paragraph > Other text characters an logos (page 3, + -- column 1) + bullet dots enddots euro pounds textdegree copyright registeredsymbol TeX + LaTeX today guillemetleft guillementright guillemotleft guillemotright + -- Insertions within a paragraph > Glyphs for code examples (page 3, column 2) + equiv error expansion point print result + -- Making and preventing breaks (page 3, column 2) + * / - hyphenation tie w refill + -- Vertical space (page 3, column 2) + sp page need group vskip + -- Definition commands (page 3, column 2) + -- not implemented +]], true) +lex:add_rule('keyword', token(lexer.KEYWORD, ('@end' * lexer.space^1 + '@') * + keyword_base)) + +-- Italics +lex:add_rule('emph', token('emph', + '@emph' * + lexer.delimited_range('{}', false, true, true))) +lex:add_style('emph', lexer.STYLE_STRING..',italics') + +-- Bold +lex:add_rule('strong', token('strong', + '@strong' * + lexer.delimited_range('{}', false, true, true))) +lex:add_style('strong', lexer.STYLE_STRING..',bold') + +-- Identifiers +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, + lexer.delimited_range('{}', false, true, true))) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Comments. +local line_comment = '@c' * lexer.nonnewline_esc^0 +--local line_comment_long = '@comment' * lexer.nonnewline_esc^0 +local block_comment = '@ignore' * (lexer.any - '@end ignore')^0 * + P('@end ignore')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Fold points. +lex:add_fold_point('directives', '@titlepage', '@end titlepage') +lex:add_fold_point('directives', '@copying', '@end copying') +lex:add_fold_point('directives', '@ifset', '@end ifset') +lex:add_fold_point('directives', '@tex', '@end tex') +lex:add_fold_point('directives', '@itemize', '@end itemize') +lex:add_fold_point('directives', '@enumerate', '@end enumerate') +lex:add_fold_point('directives', '@multitable', '@end multitable') +lex:add_fold_point('directives', '@example', '@end example') +lex:add_fold_point('directives', '@smallexample', '@end smallexample') +lex:add_fold_point('directives', '@cartouche', '@end cartouche') +lex:add_fold_point('directives', '@startchapter', '@end startchapter') + +return lex diff --git a/lexlua/text.lua b/lexlua/text.lua new file mode 100644 index 000000000..a3b367190 --- /dev/null +++ b/lexlua/text.lua @@ -0,0 +1,4 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Text LPeg lexer. + +return require('lexer').new('text') diff --git a/lexlua/themes/curses.lua b/lexlua/themes/curses.lua new file mode 100644 index 000000000..2162a3724 --- /dev/null +++ b/lexlua/themes/curses.lua @@ -0,0 +1,55 @@ +-- Copyright 2007-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Curses theme for Lua lexers. +-- Contributions by Ana Balan. + +local property = require('lexer').property + +-- Normal colors. +property['color.black'] = '#000000' +property['color.red'] = '#800000' +property['color.green'] = '#008000' +property['color.yellow'] = '#808000' +property['color.blue'] = '#000080' +property['color.magenta'] = '#800080' +property['color.cyan'] = '#008080' +property['color.white'] = '#C0C0C0' + +-- Light colors. (16 color terminals only.) +-- These only apply to 16 color terminals. For other terminals, set the +-- style's `bold` attribute to use the light color variant. +property['color.light_black'] = '#404040' +property['color.light_red'] = '#FF0000' +property['color.light_green'] = '#00FF00' +--property['color.light_yellow'] = '#FFFF00' +property['color.light_blue'] = '#0000FF' +property['color.light_magenta'] = '#FF00FF' +--property['color.light_cyan'] = '#0000FF' +property['color.light_white'] = '#FFFFFF' + +-- Predefined styles. +property['style.default'] = 'fore:$(color.white),back:$(color.black)' +property['style.linenumber'] = '' +property['style.bracelight'] = 'fore:$(color.black),back:$(color.white)' +property['style.controlchar'] = '' +property['style.indentguide'] = '' +property['style.calltip'] = '$(style.default)' +property['style.folddisplaytext'] = 'fore:$(color.black),bold' + +-- Token styles. +property['style.class'] = 'fore:$(color.yellow)' +property['style.comment'] = 'fore:$(color.black),bold' +property['style.constant'] = 'fore:$(color.red)' +property['style.embedded'] = '$(style.keyword),back:$(color.black)' +property['style.error'] = 'fore:$(color.red),bold' +property['style.function'] = 'fore:$(color.blue)' +property['style.identifier'] = '' +property['style.keyword'] = 'fore:$(color.white),bold' +property['style.label'] = 'fore:$(color.red),bold' +property['style.number'] = 'fore:$(color.cyan)' +property['style.operator'] = 'fore:$(color.yellow)' +property['style.preprocessor'] = 'fore:$(color.magenta)' +property['style.regex'] = 'fore:$(color.green),bold' +property['style.string'] = 'fore:$(color.green)' +property['style.type'] = 'fore:$(color.magenta),bold' +property['style.variable'] = 'fore:$(color.blue),bold' +property['style.whitespace'] = '' diff --git a/lexlua/themes/dark.lua b/lexlua/themes/dark.lua new file mode 100644 index 000000000..8d68d20ae --- /dev/null +++ b/lexlua/themes/dark.lua @@ -0,0 +1,89 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Dark theme for Lua lexers. +-- Contributions by Ana Balan. + +local property = require('lexer').property + +-- Greyscale colors. +--property['color.dark_black'] = '#000000' +property['color.black'] = '#1A1A1A' +property['color.light_black'] = '#333333' +--property['color.grey_black'] = '#4D4D4D' +property['color.dark_grey'] = '#666666' +--property['color.grey'] = '#808080' +property['color.light_grey'] = '#999999' +--property['color.grey_white'] = '#B3B3B3' +property['color.dark_white'] = '#CCCCCC' +--property['color.white'] = '#E6E6E6' +--property['color.light_white'] = '#FFFFFF' + +-- Dark colors. +--property['color.dark_red'] = '#661A1A' +--property['color.dark_yellow'] = '#66661A' +--property['color.dark_green'] = '#1A661A' +--property['color.dark_teal'] = '#1A6666' +--property['color.dark_purple'] = '#661A66' +--property['color.dark_orange'] = '#B3661A' +--property['color.dark_pink'] = '#B36666' +--property['color.dark_lavender'] = '#6666B3' +--property['color.dark_blue'] = '#1A66B3' + +-- Normal colors. +property['color.red'] = '#994D4D' +property['color.yellow'] = '#99994D' +property['color.green'] = '#4D994D' +property['color.teal'] = '#4D9999' +property['color.purple'] = '#994D99' +property['color.orange'] = '#E6994D' +--property['color.pink'] = '#E69999' +property['color.lavender'] = '#9999E6' +property['color.blue'] = '#4D99E6' + +-- Light colors. +property['color.light_red'] = '#CC8080' +property['color.light_yellow'] = '#CCCC80' +property['color.light_green'] = '#80CC80' +--property['color.light_teal'] = '#80CCCC' +--property['color.light_purple'] = '#CC80CC' +--property['color.light_orange'] = '#FFCC80' +--property['color.light_pink'] = '#FFCCCC' +--property['color.light_lavender'] = '#CCCCFF' +property['color.light_blue'] = '#80CCFF' + +-- Default style. +property['font'], property['fontsize'] = 'Bitstream Vera Sans Mono', 10 +if WIN32 then + property['font'] = 'Courier New' +elseif OSX then + property['font'], property['fontsize'] = 'Monaco', 12 +end + +-- Predefined styles. +property['style.default'] = 'font:$(font),size:$(fontsize),'.. + 'fore:$(color.light_grey),back:$(color.black)' +property['style.linenumber'] = 'fore:$(color.dark_grey),back:$(color.black)' +property['style.bracelight'] = 'fore:$(color.light_blue)' +property['style.bracebad'] = 'fore:$(color.light_red)' +property['style.controlchar'] = '' +property['style.indentguide'] = 'fore:$(color.light_black)' +property['style.calltip'] = 'fore:$(color.light_grey),back:$(color.light_black)' +property['style.folddisplaytext'] = 'fore:$(color.dark_grey)' + +-- Token styles. +property['style.class'] = 'fore:$(color.light_yellow)' +property['style.comment'] = 'fore:$(color.dark_grey)' +property['style.constant'] = 'fore:$(color.red)' +property['style.embedded'] = '$(style.keyword),back:$(color.light_black)' +property['style.error'] = 'fore:$(color.red),italics' +property['style.function'] = 'fore:$(color.blue)' +property['style.identifier'] = '' +property['style.keyword'] = 'fore:$(color.dark_white)' +property['style.label'] = 'fore:$(color.orange)' +property['style.number'] = 'fore:$(color.teal)' +property['style.operator'] = 'fore:$(color.yellow)' +property['style.preprocessor'] = 'fore:$(color.purple)' +property['style.regex'] = 'fore:$(color.light_green)' +property['style.string'] = 'fore:$(color.green)' +property['style.type'] = 'fore:$(color.lavender)' +property['style.variable'] = 'fore:$(color.light_blue)' +property['style.whitespace'] = '' diff --git a/lexlua/themes/light.lua b/lexlua/themes/light.lua new file mode 100644 index 000000000..644953198 --- /dev/null +++ b/lexlua/themes/light.lua @@ -0,0 +1,89 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Light theme for Lua lexers. +-- Contributions by Ana Balan. + +local property = require('lexer').property + +-- Greyscale colors. +--property['color.dark_black'] = '#000000' +--property['color.black'] = '#1A1A1A' +property['color.light_black'] = '#333333' +--property['color.grey_black'] = '#4D4D4D' +--property['color.dark_grey'] = '#666666' +property['color.grey'] = '#808080' +--property['color.light_grey'] = '#999999' +--property['grey_white'] = '#B3B3B3' +property['color.dark_white'] = '#CCCCCC' +property['color.white'] = '#E6E6E6' +--property['color.light_white'] = '#FFFFFF' + +-- Dark colors. +--property['color.dark_red'] = '#661A1A' +property['color.dark_yellow'] = '#66661A' +property['color.dark_green'] = '#1A661A' +--property['color.dark_teal'] = '#1A6666' +--property['color.dark_purple'] = '#661A66' +property['color.dark_orange'] = '#B3661A' +--property['color.dark_pink'] = '#B36666' +property['color.dark_lavender'] = '#6666B3' +property['color.dark_blue'] = '#1A66B3' + +-- Normal colors. +property['color.red'] = '#994D4D' +property['color.yellow'] = '#99994D' +property['color.green'] = '#4D994D' +property['color.teal'] = '#4D9999' +property['color.purple'] = '#994D99' +--property['color.orange'] = '#E6994D' +--property['color.pink'] = '#E69999' +property['color.lavender'] = '#9999E6' +--property['color.blue'] = '#4D99E6' + +-- Light colors. +property['color.light_red'] = '#C08080' +--property['color.light_yellow'] = '#CCCC80' +--property['color.light_green'] = '#80CC80' +--property['color.light_teal'] = '#80CCCC' +--property['color.light_purple'] = '#CC80CC' +--property['color.light_orange'] = '#FFCC80' +--property['color.light_pink'] = '#FFCCCC' +--property['color.light_lavender'] = '#CCCCFF' +property['color.light_blue'] = '#80CCFF' + +-- Default style. +property['font'], property['fontsize'] = 'Bitstream Vera Sans Mono', 10 +if WIN32 then + property['font'] = 'Courier New' +elseif OSX then + property['font'], property['fontsize'] = 'Monaco', 12 +end + +-- Predefined styles. +property['style.default'] = 'font:$(font),size:$(fontsize),'.. + 'fore:$(color.light_black),back:$(color.white)' +property['style.linenumber'] = 'fore:$(color.grey),back:$(color.white)' +property['style.bracelight'] = 'fore:$(color.light_blue)' +property['style.bracebad'] = 'fore:$(color.light_red)' +property['style.controlchar'] = '' +property['style.indentguide'] = 'fore:$(color.dark_white)' +property['style.calltip'] = 'fore:$(color.light_black),back:$(color.dark_white)' +property['style.folddisplaytext'] = 'fore:$(color.grey)' + +-- Token styles. +property['style.class'] = 'fore:$(color.yellow)' +property['style.comment'] = 'fore:$(color.grey)' +property['style.constant'] = 'fore:$(color.red)' +property['style.embedded'] = '$(style.keyword),back:$(color.dark_white)' +property['style.error'] = 'fore:$(color.red),italics' +property['style.function'] = 'fore:$(color.dark_orange)' +property['style.identifier'] = '' +property['style.keyword'] = 'fore:$(color.dark_blue)' +property['style.label'] = 'fore:$(color.dark_orange)' +property['style.number'] = 'fore:$(color.teal)' +property['style.operator'] = 'fore:$(color.purple)' +property['style.preprocessor'] = 'fore:$(color.dark_yellow)' +property['style.regex'] = 'fore:$(color.dark_green)' +property['style.string'] = 'fore:$(color.green)' +property['style.type'] = 'fore:$(color.lavender)' +property['style.variable'] = 'fore:$(color.dark_lavender)' +property['style.whitespace'] = '' diff --git a/lexlua/themes/scite.lua b/lexlua/themes/scite.lua new file mode 100644 index 000000000..741b07e92 --- /dev/null +++ b/lexlua/themes/scite.lua @@ -0,0 +1,53 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- SciTE theme for Lua lexers. + +local property = require('lexer').property + +property['color.red'] = '#7F0000' +property['color.yellow'] = '#7F7F00' +property['color.green'] = '#007F00' +property['color.teal'] = '#007F7F' +property['color.purple'] = '#7F007F' +property['color.orange'] = '#B07F00' +property['color.blue'] = '#00007F' +property['color.black'] = '#000000' +property['color.grey'] = '#808080' +property['color.white'] = '#FFFFFF' + +-- Default style. +property['font'], property['fontsize'] = 'Monospace', 11 +if WIN32 then + property['font'] = 'Courier New' +elseif OSX then + property['font'], property['fontsize'] = 'Monaco', 12 +end + +-- Predefined styles. +property['style.default'] = 'font:$(font),size:$(fontsize),'.. + 'fore:$(color.black),back:$(color.white)' +property['style.linenumber'] = 'back:#C0C0C0' +property['style.bracelight'] = 'fore:#0000FF,bold' +property['style.bracebad'] = 'fore:#FF0000,bold' +property['style.controlchar'] = '' +property['style.indentguide'] = 'fore:#C0C0C0,back:$(color.white)' +property['style.calltip'] = 'fore:$(color.white),back:#444444' +property['style.folddisplaytext'] = '' + +-- Token styles. +property['style.class'] = 'fore:$(color.black),bold' +property['style.comment'] = 'fore:$(color.green)' +property['style.constant'] = 'fore:$(color.teal),bold' +property['style.embedded'] = 'fore:$(color.blue)' +property['style.error'] = 'fore:$(color.red)' +property['style.function'] = 'fore:$(color.black),bold' +property['style.identifier'] = '' +property['style.keyword'] = 'fore:$(color.blue),bold' +property['style.label'] = 'fore:$(color.teal),bold' +property['style.number'] = 'fore:$(color.teal)' +property['style.operator'] = 'fore:$(color.black),bold' +property['style.preprocessor'] = 'fore:$(color.yellow)' +property['style.regex'] = '$(style.string)' +property['style.string'] = 'fore:$(color.purple)' +property['style.type'] = 'fore:$(color.blue)' +property['style.variable'] = 'fore:$(color.black)' +property['style.whitespace'] = '' diff --git a/lexlua/toml.lua b/lexlua/toml.lua new file mode 100644 index 000000000..ba8ec9be1 --- /dev/null +++ b/lexlua/toml.lua @@ -0,0 +1,53 @@ +-- Copyright 2015-2018 Alejandro Baez (https://keybase.io/baez). See License.txt. +-- TOML LPeg lexer. + +local lexer = require("lexer") +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('toml', {fold_by_indentation = true}) + +-- Whitespace +lex:add_rule('indent', #lexer.starts_line(S(' \t')) * + (token(lexer.WHITESPACE, ' ') + + token('indent_error', '\t'))^1) +lex:add_rule('whitespace', token(lexer.WHITESPACE, S(' \t')^1 + + lexer.newline^1)) +lex:add_style('indent_error', 'back:%(color.red)') + +-- kewwords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[true false]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'") + + lexer.delimited_range('"'))) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '#' * lexer.nonnewline^0)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('#=+-,.{}[]()'))) + +-- Datetime. +lex:add_rule('datetime', + token('timestamp', + lexer.digit * lexer.digit * lexer.digit * lexer.digit * -- yr + '-' * lexer.digit * lexer.digit^-1 * -- month + '-' * lexer.digit * lexer.digit^-1 * -- day + ((S(' \t')^1 + S('tT'))^-1 * -- separator + lexer.digit * lexer.digit^-1 * -- hour + ':' * lexer.digit * lexer.digit * -- minute + ':' * lexer.digit * lexer.digit * -- second + ('.' * lexer.digit^0)^-1 * -- fraction + ('Z' + -- timezone + S(' \t')^0 * S('-+') * lexer.digit * lexer.digit^-1 * + (':' * lexer.digit * lexer.digit)^-1)^-1)^-1)) +lex:add_style('timestamp', lexer.STYLE_NUMBER) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +return lex diff --git a/lexlua/vala.lua b/lexlua/vala.lua new file mode 100644 index 000000000..ebc930392 --- /dev/null +++ b/lexlua/vala.lua @@ -0,0 +1,60 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Vala LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('vala') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + class delegate enum errordomain interface namespace signal struct using + -- Modifiers. + abstract const dynamic extern inline out override private protected public ref + static virtual volatile weak + -- Other. + as base break case catch construct continue default delete do else ensures + finally for foreach get if in is lock new requires return set sizeof switch + this throw throws try typeof value var void while + -- Etc. + null true false +]])) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match[[ + bool char double float int int8 int16 int32 int64 long short size_t ssize_t + string uchar uint uint8 uint16 uint32 uint64 ulong unichar ushort +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +local sq_str = lexer.delimited_range("'", true) +local dq_str = lexer.delimited_range('"', true) +local tq_str = '"""' * (lexer.any - '"""')^0 * P('"""')^-1 +local ml_str = '@' * lexer.delimited_range('"', false, true) +lex:add_rule('string', token(lexer.STRING, tq_str + sq_str + dq_str + ml_str)) + +-- Comments. +local line_comment = '//' * lexer.nonnewline_esc^0 +local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) * + S('uUlLfFdDmM')^-1)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}'))) + +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, '//', lexer.fold_line_comments('//')) + +return lex diff --git a/lexlua/vb.lua b/lexlua/vb.lua new file mode 100644 index 000000000..170b49321 --- /dev/null +++ b/lexlua/vb.lua @@ -0,0 +1,53 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- VisualBasic LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('vb') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match([[ + -- Control. + If Then Else ElseIf While Wend For To Each In Step Case Select Return Continue + Do Until Loop Next With Exit + -- Operators. + Mod And Not Or Xor Is + -- Storage types. + Call Class Const Dim ReDim Preserve Function Sub Property End Set Let Get New + Randomize Option Explicit On Error Execute + -- Storage modifiers. + Private Public Default + -- Constants. + Empty False Nothing Null True +]], true))) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match([[ + Boolean Byte Char Date Decimal Double Long Object Short Single String +]], true))) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, + (P("'") + word_match([[rem]], true)) * + lexer.nonnewline^0)) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, + lexer.delimited_range('"', true, true))) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) * + S('LlUuFf')^-2)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('=><+-*^&:.,_()'))) + +return lex diff --git a/lexlua/vbscript.lua b/lexlua/vbscript.lua new file mode 100644 index 000000000..acc59df60 --- /dev/null +++ b/lexlua/vbscript.lua @@ -0,0 +1,53 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- VisualBasic LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('vbscript') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match([[ + -- Control. + If Then Else ElseIf While Wend For To Each In Step Case Select Return Continue + Do Until Loop Next With Exit + -- Operators. + Mod And Not Or Xor Is + -- Storage types. + Call Class Const Dim ReDim Preserve Function Sub Property End Set Let Get New + Randomize Option Explicit On Error Execute + -- Storage modifiers. + Private Public Default + -- Constants. + Empty False Nothing Null True +]], true))) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match([[ + Boolean Byte Char Date Decimal Double Long Object Short Single String +]], true))) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, + (P("'") + word_match([[rem]], true)) * + lexer.nonnewline^0)) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, + lexer.delimited_range('"', true, true))) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, (lexer.float + lexer.integer) * + S('LlUuFf')^-2)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('=><+-*^&:.,_()'))) + +return lex diff --git a/lexlua/vcard.lua b/lexlua/vcard.lua new file mode 100644 index 000000000..453d27a27 --- /dev/null +++ b/lexlua/vcard.lua @@ -0,0 +1,101 @@ +-- Copyright (c) 2015-2018 Piotr Orzechowski [drzewo.org]. See License.txt. +-- vCard 2.1, 3.0 and 4.0 LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'vcard'} + +-- Whitespace. +local ws = token(lexer.WHITESPACE, lexer.space^1) + +-- Required properties. +local required_property = token(lexer.KEYWORD, word_match({ + 'BEGIN', 'END', 'FN', 'N' --[[ Not required in v4.0. ]], 'VERSION' +}, nil, true)) * #P(':') + +-- Supported properties. +local supported_property = token(lexer.TYPE, word_match({ + 'ADR', 'AGENT' --[[ Not supported in v4.0. ]], + 'ANNIVERSARY' --[[ Supported in v4.0 only. ]], 'BDAY', + 'CALADRURI' --[[ Supported in v4.0 only. ]], + 'CALURI' --[[ Supported in v4.0 only. ]], 'CATEGORIES', + 'CLASS' --[[ Supported in v3.0 only. ]], + 'CLIENTPIDMAP' --[[ Supported in v4.0 only. ]], 'EMAIL', 'END', + 'FBURL' --[[ Supported in v4.0 only. ]], + 'GENDER' --[[ Supported in v4.0 only. ]], 'GEO', + 'IMPP' --[[ Not supported in v2.1. ]], 'KEY', + 'KIND' --[[ Supported in v4.0 only. ]], + 'LABEL' --[[ Not supported in v4.0. ]], + 'LANG' --[[ Supported in v4.0 only. ]], 'LOGO', + 'MAILER' --[[ Not supported in v4.0. ]], + 'MEMBER' --[[ Supported in v4.0 only. ]], + 'NAME' --[[ Supported in v3.0 only. ]], + 'NICKNAME' --[[ Not supported in v2.1. ]], 'NOTE', 'ORG', 'PHOTO', + 'PRODID' --[[ Not supported in v2.1. ]], + 'PROFILE' --[[ Not supported in v4.0. ]], + 'RELATED' --[[ Supported in v4.0 only. ]], 'REV', 'ROLE', + 'SORT-STRING' --[[ Not supported in v4.0. ]], 'SOUND', 'SOURCE', 'TEL', + 'TITLE', 'TZ', 'UID', 'URL', 'XML' --[[ Supported in v4.0 only. ]] +}, nil, true)) * #S(':;') + +local identifier = lexer.alpha^1 * lexer.digit^0 * (P('-') * lexer.alnum^1)^0 + +-- Extension. +local extension = token(lexer.TYPE, lexer.starts_line(S('xX') * P('-') * + identifier * #S(':;'))) + +-- Parameter. +local parameter = token(lexer.IDENTIFIER, + lexer.starts_line(identifier * #S(':='))) + + token(lexer.STRING, identifier) * #S(':=') + +-- Operators. +local operator = token(lexer.OPERATOR, S('.:;=')) + +-- Group and property. +local group_sequence = token(lexer.CONSTANT, lexer.starts_line(identifier)) * + token(lexer.OPERATOR, P('.')) * + (required_property + supported_property + + lexer.token(lexer.TYPE, S('xX') * P('-') * identifier) * + #S(':;')) +-- Begin vCard, end vCard. +local begin_sequence = token(lexer.KEYWORD, P('BEGIN')) * + token(lexer.OPERATOR, P(':')) * + token(lexer.COMMENT, P('VCARD')) +local end_sequence = token(lexer.KEYWORD, P('END')) * + token(lexer.OPERATOR, P(':')) * + token(lexer.COMMENT, P('VCARD')) + +-- vCard version (in v3.0 and v4.0 must appear immediately after BEGIN:VCARD). +local version_sequence = token(lexer.KEYWORD, P('VERSION')) * + token(lexer.OPERATOR, P(':')) * + token(lexer.CONSTANT, lexer.digit^1 * + (P('.') * lexer.digit^1)^-1) + +-- Data. +local data = token(lexer.IDENTIFIER, lexer.any) + +-- Rules. +M._rules = { + {'whitespace', ws}, + {'begin_sequence', begin_sequence}, + {'end_sequence', end_sequence}, + {'version_sequence', version_sequence}, + {'group_sequence', group_sequence}, + {'required_property', required_property}, + {'supported_property', supported_property}, + {'extension', extension}, + {'parameter', parameter}, + {'operator', operator}, + {'data', data}, +} + +-- Folding. +M._foldsymbols = { + _patterns = {'BEGIN', 'END'}, + [lexer.KEYWORD] = {['BEGIN'] = 1, ['END'] = -1} +} + +return M diff --git a/lexlua/verilog.lua b/lexlua/verilog.lua new file mode 100644 index 000000000..63af4c61c --- /dev/null +++ b/lexlua/verilog.lua @@ -0,0 +1,86 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- Verilog LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('verilog') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + always assign begin case casex casez default deassign disable else end endcase + endfunction endgenerate endmodule endprimitive endspecify endtable endtask for + force forever fork function generate if initial join macromodule module + negedge posedge primitive repeat release specify table task wait while + -- Compiler directives. + `include `define `undef `ifdef `ifndef `else `endif `timescale `resetall + `signed `unsigned `celldefine `endcelldefine `default_nettype + `unconnected_drive `nounconnected_drive `protect `endprotect `protected + `endprotected `remove_gatename `noremove_gatename `remove_netname + `noremove_netname `expand_vectornets `noexpand_vectornets + `autoexpand_vectornets + -- Signal strengths. + strong0 strong1 pull0 pull1 weak0 weak1 highz0 highz1 small medium large +]])) + +-- Function. +lex:add_rule('function', token(lexer.FUNCTION, word_match[[ + $stop $finish $time $stime $realtime $settrace $cleartrace $showscopes + $showvars $monitoron $monitoroff $random $printtimescale $timeformat $display + -- Built-in primitives. + and nand or nor xor xnor buf bufif0 bufif1 not notif0 notif1 nmos pmos cmos + rnmos rpmos rcmos tran tranif0 tranif1 rtran rtranif0 rtranif1 pullup pulldown +]])) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match[[ + integer reg time realtime defparam parameter event wire wand wor tri triand + trior tri0 tri1 trireg vectored scalared input output inout supply0 supply1 +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range('"'))) + +-- Comments. +local line_comment = '//' * lexer.nonnewline^0 +local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +local bin_suffix = S('bB') * S('01_xXzZ')^1 +local oct_suffix = S('oO') * S('01234567_xXzZ')^1 +local dec_suffix = S('dD') * S('0123456789_xXzZ')^1 +local hex_suffix = S('hH') * S('0123456789abcdefABCDEF_xXzZ')^1 +lex:add_rule('number', token(lexer.NUMBER, (lexer.digit + '_')^1 + + "'" * (bin_suffix + oct_suffix + + dec_suffix + hex_suffix))) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('=~+-/*<>%&|^~,:;()[]{}'))) + +-- Fold points. +lex:add_fold_point(lexer.KEYWORD, 'case', 'endcase') +lex:add_fold_point(lexer.KEYWORD, 'casex', 'endcase') +lex:add_fold_point(lexer.KEYWORD, 'casez', 'endcase') +lex:add_fold_point(lexer.KEYWORD, 'function', 'endfunction') +lex:add_fold_point(lexer.KEYWORD, 'fork', 'join') +lex:add_fold_point(lexer.KEYWORD, 'table', 'endtable') +lex:add_fold_point(lexer.KEYWORD, 'task', 'endtask') +lex:add_fold_point(lexer.KEYWORD, 'generate', 'endgenerate') +lex:add_fold_point(lexer.KEYWORD, 'specify', 'endspecify') +lex:add_fold_point(lexer.KEYWORD, 'primitive', 'endprimitive') +lex:add_fold_point(lexer.KEYWORD, 'module', 'endmodule') +lex:add_fold_point(lexer.KEYWORD, 'begin', 'end') +lex:add_fold_point(lexer.OPERATOR, '(', ')') +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, '//', lexer.fold_line_comments('//')) + +return lex diff --git a/lexlua/vhdl.lua b/lexlua/vhdl.lua new file mode 100644 index 000000000..ea5ff3768 --- /dev/null +++ b/lexlua/vhdl.lua @@ -0,0 +1,69 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- VHDL LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('vhdl') + +-- Whitespace. +lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + access after alias all architecture array assert attribute begin block body + buffer bus case component configuration constant disconnect downto else elsif + end entity exit file for function generate generic group guarded if impure in + inertial inout is label library linkage literal loop map new next null of on + open others out package port postponed procedure process pure range record + register reject report return select severity signal shared subtype then to + transport type unaffected units until use variable wait when while with + note warning error failure + and nand or nor xor xnor rol ror sla sll sra srl mod rem + abs not false true +]])) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, word_match[[ + rising_edge shift_left shift_right rotate_left rotate_right resize std_match + to_integer to_unsigned to_signed unsigned signed to_bit to_bitvector + to_stdulogic to_stdlogicvector to_stdulogicvector +]])) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match[[ + bit bit_vector character boolean integer real time string severity_level + positive natural signed unsigned line text std_logic std_logic_vector + std_ulogic std_ulogic_vector qsim_state qsim_state_vector qsim_12state + qsim_12state_vector qsim_strength mux_bit mux_vectory reg_bit reg_vector + wor_bit wor_vector +]])) + +-- Constants. +lex:add_rule('constant', token(lexer.CONSTANT, word_match[[ + EVENT BASE LEFT RIGHT LOW HIGH ASCENDING IMAGE VALUE POS VAL SUCC VAL POS PRED + VAL POS LEFTOF RIGHTOF LEFT RIGHT LOW HIGH RANGE REVERSE LENGTH ASCENDING + DELAYED STABLE QUIET TRANSACTION EVENT ACTIVE LAST LAST LAST DRIVING DRIVING + SIMPLE INSTANCE PATH +]])) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, (lexer.alpha + "'") * + (lexer.alnum + S("_'"))^1)) + +-- Strings. +local sq_str = lexer.delimited_range("'", true, true) +local dq_str = lexer.delimited_range('"', true) +lex:add_rule('string', token(lexer.STRING, sq_str + dq_str)) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '--' * lexer.nonnewline^0)) + +-- Numbers. +lex:add_rule('number', token(lexer.NUMBER, lexer.float + lexer.integer)) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('=/!:;<>+-/*%&|^~()'))) + +return lex diff --git a/lexlua/wsf.lua b/lexlua/wsf.lua new file mode 100644 index 000000000..123d3b543 --- /dev/null +++ b/lexlua/wsf.lua @@ -0,0 +1,101 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- WSF LPeg lexer (based on XML). +-- Contributed by Jeff Stone. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V + +local lex = lexer.new('wsf') + +-- Whitespace. +local ws = token(lexer.WHITESPACE, lexer.space^1) +lex:add_rule('whitespace', ws) + +-- Comments. +lex:add_rule('comment', token(lexer.COMMENT, '<!--' * (lexer.any - '-->')^0 * + P('-->')^-1)) + +local alpha = R('az', 'AZ', '\127\255') +local word_char = lexer.alnum + S('_-:.??') +local identifier = (alpha + S('_-:.??')) * word_char^0 + +-- Elements. +local element = token('element', '<' * P('/')^-1 * identifier) +lex:add_rule('element', element) +lex:add_style('element', lexer.STYLE_KEYWORD) + +-- Closing tags. +local tag_close = token('element', P('/')^-1 * '>') +lex:add_rule('tag_close', tag_close) + +-- Attributes. +local attribute = token('attribute', identifier) * #(lexer.space^0 * '=') +lex:add_rule('attribute', attribute) +lex:add_style('attribute', lexer.STYLE_TYPE) + +local in_tag = P(function(input, index) + local before = input:sub(1, index - 1) + local s, e = before:find('<[^>]-$'), before:find('>[^<]-$') + if s and e then return s > e and index or nil end + if s then return index end + return input:find('^[^<]->', index) and index or nil +end) + +-- Equals. +local equals = token(lexer.OPERATOR, '=') * in_tag +lex:add_rule('equals', equals) + +-- Strings. +local string = #S('\'"') * lexer.last_char_includes('=') * + token(lexer.STRING, lexer.delimited_range("'", false, true) + + lexer.delimited_range('"', false, true)) +lex:add_rule('string', string) + +-- Numbers. +lex:add_rule('number', #lexer.digit * lexer.last_char_includes('=') * + token(lexer.NUMBER, lexer.digit^1 * P('%')^-1) * in_tag) + +-- Entities. +lex:add_rule('entity', token('entity', '&' * word_match[[ + lt gt amp apos quot +]] * ';')) +lex:add_style('entity', lexer.STYLE_OPERATOR) + +-- Fold points. +local function disambiguate_lt(text, pos, line, s) + return not line:find('^</', s) and 1 or -1 +end +lex:add_fold_point('element', '<', disambiguate_lt) +lex:add_fold_point('element', '/>', -1) +lex:add_fold_point(lexer.COMMENT, '<!--', '-->') + +-- Finally, add JavaScript and VBScript as embedded languages + +-- Tags that start embedded languages. +local embed_start_tag = element * + (ws^1 * attribute * ws^0 * equals * ws^0 * string)^0 * + ws^0 * tag_close +local embed_end_tag = element * tag_close + +-- Embedded JavaScript. +local js = lexer.load('javascript') +local js_start_rule = #(P('<script') * (P(function(input, index) + if input:find('^%s+language%s*=%s*(["\'])[jJ][ava]*[sS]cript%1', index) then + return index + end +end) + '>')) * embed_start_tag -- <script language="javascript"> +local js_end_rule = #('</script' * ws^0 * '>') * embed_end_tag -- </script> +lex:embed(js, js_start_rule, js_end_rule) + +-- Embedded VBScript. +local vbs = lexer.load('vbscript') +local vbs_start_rule = #(P('<script') * (P(function(input, index) + if input:find('^%s+language%s*=%s*(["\'])[vV][bB][sS]cript%1', index) then + return index + end +end) + '>')) * embed_start_tag -- <script language="vbscript"> +local vbs_end_rule = #('</script' * ws^0 * '>') * embed_end_tag -- </script> +lex:embed(vbs, vbs_start_rule, vbs_end_rule) + +return lex diff --git a/lexlua/xml.lua b/lexlua/xml.lua new file mode 100644 index 000000000..d709ef3e5 --- /dev/null +++ b/lexlua/xml.lua @@ -0,0 +1,88 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- XML LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V + +local lex = lexer.new('xml') + +-- Whitespace. +local ws = token(lexer.WHITESPACE, lexer.space^1) +lex:add_rule('whitespace', ws) + +-- Comments and CDATA. +lex:add_rule('comment', token(lexer.COMMENT, '<!--' * (lexer.any - '-->')^0 * + P('-->')^-1)) +lex:add_rule('cdata', token('cdata', '<![CDATA[' * (lexer.any - ']]>')^0 * + P(']]>')^-1)) +lex:add_style('cdata', lexer.STYLE_COMMENT) + +local alpha = R('az', 'AZ', '\127\255') +local word_char = lexer.alnum + S('_-:.??') +local identifier = (alpha + S('_-:.??')) * word_char^0 + +-- Doctypes and other markup tags. +lex:add_rule('doctype', token('doctype', P('<!DOCTYPE')) * ws * + token('doctype', identifier) * (ws * identifier)^-1 * + (1 - P('>'))^0 * token('doctype', '>')) +lex:add_style('doctype', lexer.STYLE_COMMENT) + +-- Processing instructions. +lex:add_rule('proc_insn', token('proc_insn', P('<?') * (1 - P('?>'))^0 * + P('?>')^-1)) +lex:add_style('proc_insn', lexer.STYLE_COMMENT) + +-- Elements. +local namespace = token(lexer.OPERATOR, ':') * token('namespace', identifier) +lex:add_rule('element', token('element', '<' * P('/')^-1 * identifier) * + namespace^-1) +lex:add_style('element', lexer.STYLE_KEYWORD) +lex:add_style('namespace', lexer.STYLE_CLASS) + +-- Closing tags. +lex:add_rule('close_tag', token('element', P('/')^-1 * '>')) + +-- Attributes. +lex:add_rule('attribute', token('attribute', identifier) * namespace^-1 * + #(lexer.space^0 * '=')) +lex:add_style('attribute', lexer.STYLE_TYPE) + +-- TODO: performance is terrible on large files. +local in_tag = P(function(input, index) + local before = input:sub(1, index - 1) + local s, e = before:find('<[^>]-$'), before:find('>[^<]-$') + if s and e then return s > e and index or nil end + if s then return index end + return input:find('^[^<]->', index) and index or nil +end) + +-- Equals. +--lex:add_rule('equal', token(lexer.OPERATOR, '=')) -- * in_tag + +-- Strings. +lex:add_rule('string', #S('\'"') * lexer.last_char_includes('=') * + token(lexer.STRING, + lexer.delimited_range("'", false, true) + + lexer.delimited_range('"', false, true))) + +-- Numbers. +lex:add_rule('number', #lexer.digit * lexer.last_char_includes('=') * + token(lexer.NUMBER, lexer.digit^1 * P('%')^-1))--*in_tag) + +-- Entities. +lex:add_rule('entity', token('entity', '&' * word_match[[ + lt gt amp apos quot +]] * ';')) +lex:add_style('entity', lexer.STYLE_OPERATOR) + +-- Fold Points. +local function disambiguate_lt(text, pos, line, s) + return not line:find('^</', s) and 1 or -1 +end +lex:add_fold_point('element', '<', disambiguate_lt) +lex:add_fold_point('element', '/>', -1) +lex:add_fold_point(lexer.COMMENT, '<!--', '-->') +lex:add_fold_point('cdata', '<![CDATA[', ']]>') + +return lex diff --git a/lexlua/xtend.lua b/lexlua/xtend.lua new file mode 100644 index 000000000..452080e45 --- /dev/null +++ b/lexlua/xtend.lua @@ -0,0 +1,90 @@ +-- Copyright (c) 2014-2018 Piotr Orzechowski [drzewo.org]. See License.txt. +-- Xtend LPeg lexer. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local lex = lexer.new('xtend') + +-- Whitespace. +local ws = token(lexer.WHITESPACE, lexer.space^1) +lex:add_rule('whitespace', ws) + +-- Classes. +lex:add_rule('class', token(lexer.KEYWORD, P('class')) * ws^1 * + token(lexer.CLASS, lexer.word)) + +-- Keywords. +lex:add_rule('keyword', token(lexer.KEYWORD, word_match[[ + -- General. + abstract annotation as case catch class create def default dispatch do else + enum extends extension final finally for if implements import interface + instanceof it new override package private protected public return self static + super switch synchronized this throw throws try typeof val var while + -- Templates. + -- AFTER BEFORE ENDFOR ENDIF FOR IF SEPARATOR + -- Literals. + true false null +]])) + +-- Types. +lex:add_rule('type', token(lexer.TYPE, word_match[[ + boolean byte char double float int long short void + Boolean Byte Character Double Float Integer Long Short String +]])) + +-- Functions. +lex:add_rule('function', token(lexer.FUNCTION, lexer.word) * #P('(')) + +-- Identifiers. +lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word)) + +-- Templates. +lex:add_rule('template', token('template', "'''" * (lexer.any - P("'''"))^0 * + P("'''")^-1)) +lex:add_style('template', lexer.STYLE_EMBEDDED) + +-- Strings. +lex:add_rule('string', token(lexer.STRING, lexer.delimited_range("'", true) + + lexer.delimited_range('"', true))) + +-- Comments. +local line_comment = '//' * lexer.nonnewline_esc^0 +local block_comment = '/*' * (lexer.any - '*/')^0 * P('*/')^-1 +lex:add_rule('comment', token(lexer.COMMENT, line_comment + block_comment)) + +-- Numbers. +local small_suff = S('lL') +local med_suff = S('bB') * S('iI') +local large_suff = S('dD') + S('fF') + S('bB') * S('dD') +local exp = S('eE') * lexer.digit^1 + +local dec_inf = ('_' * lexer.digit^1)^0 +local hex_inf = ('_' * lexer.xdigit^1)^0 +local float_pref = lexer.digit^1 * '.' * lexer.digit^1 +local float_suff = exp^-1 * med_suff^-1 * large_suff^-1 + +local dec = lexer.digit * dec_inf * (small_suff^-1 + float_suff) +local hex = lexer.hex_num * hex_inf * P('#' * (small_suff + med_suff))^-1 +local float = float_pref * dec_inf * float_suff + +lex:add_rule('number', token(lexer.NUMBER, float + hex + dec)) + +-- Annotations. +lex:add_rule('annotation', token('annotation', '@' * lexer.word)) +lex:add_style('annotation', lexer.STYLE_PREPROCESSOR) + +-- Operators. +lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%<>!=^&|?~:;.()[]{}#'))) + +-- Error. +lex:add_rule('error', token(lexer.ERROR, lexer.any)) + +-- Fold points. +lex:add_fold_point(lexer.OPERATOR, '{', '}') +lex:add_fold_point(lexer.COMMENT, '/*', '*/') +lex:add_fold_point(lexer.COMMENT, '//', lexer.fold_line_comments('//')) +lex:add_fold_point(lexer.KEYWORD, 'import', lexer.fold_line_comments('import')) + +return lex diff --git a/lexlua/yaml.lua b/lexlua/yaml.lua new file mode 100644 index 000000000..abfab8a60 --- /dev/null +++ b/lexlua/yaml.lua @@ -0,0 +1,120 @@ +-- Copyright 2006-2018 Mitchell mitchell.att.foicica.com. See License.txt. +-- YAML LPeg lexer. +-- It does not keep track of indentation perfectly. + +local lexer = require('lexer') +local token, word_match = lexer.token, lexer.word_match +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +local M = {_NAME = 'yaml'} + +-- Whitespace. +local indent = #lexer.starts_line(S(' \t')) * + (token(lexer.WHITESPACE, ' ') + token('indent_error', '\t'))^1 +local ws = token(lexer.WHITESPACE, S(' \t')^1 + lexer.newline^1) + +-- Comments. +local comment = token(lexer.COMMENT, '#' * lexer.nonnewline^0) + +-- Strings. +local string = token(lexer.STRING, lexer.delimited_range("'") + + lexer.delimited_range('"')) + +-- Numbers. +local integer = lexer.dec_num + lexer.hex_num + '0' * S('oO') * R('07')^1 +local special_num = '.' * word_match({'inf', 'nan'}, nil, true) +local number = token(lexer.NUMBER, special_num + lexer.float + integer) + +-- Timestamps. +local ts = token('timestamp', + lexer.digit * lexer.digit * lexer.digit * lexer.digit * -- year + '-' * lexer.digit * lexer.digit^-1 * -- month + '-' * lexer.digit * lexer.digit^-1 * -- day + ((S(' \t')^1 + S('tT'))^-1 * -- separator + lexer.digit * lexer.digit^-1 * -- hour + ':' * lexer.digit * lexer.digit * -- minute + ':' * lexer.digit * lexer.digit * -- second + ('.' * lexer.digit^0)^-1 * -- fraction + ('Z' + -- timezone + S(' \t')^0 * S('-+') * lexer.digit * lexer.digit^-1 * + (':' * lexer.digit * lexer.digit)^-1)^-1)^-1) + +-- Constants. +local constant = token(lexer.CONSTANT, + word_match({'null', 'true', 'false'}, nil, true)) + +-- Types. +local type = token(lexer.TYPE, '!!' * word_match({ + -- Collection types. + 'map', 'omap', 'pairs', 'set', 'seq', + -- Scalar types. + 'binary', 'bool', 'float', 'int', 'merge', 'null', 'str', 'timestamp', + 'value', 'yaml' +}, nil, true) + '!' * lexer.delimited_range('<>')) + +-- Document boundaries. +local doc_bounds = token('document', lexer.starts_line(P('---') + '...')) + +-- Directives +local directive = token('directive', lexer.starts_line('%') * + lexer.nonnewline^1) + +local word = (lexer.alpha + '-' * -lexer.space) * (lexer.alnum + '-')^0 + +-- Keys and literals. +local colon = S(' \t')^0 * ':' * (lexer.space + -1) +local key = token(lexer.KEYWORD, + #word * (lexer.nonnewline - colon)^1 * #colon * + P(function(input, index) + local line = input:sub(1, index - 1):match('[^\r\n]+$') + return not line:find('[%w-]+:') and index + end)) +local value = #word * (lexer.nonnewline - lexer.space^0 * S(',]}'))^1 +local block = S('|>') * S('+-')^-1 * (lexer.newline + -1) * + function(input, index) + local rest = input:sub(index) + local level = #rest:match('^( *)') + for pos, indent, line in rest:gmatch('() *()([^\r\n]+)') do + if indent - pos < level and line ~= ' ' or + level == 0 and pos > 1 then + return index + pos - 1 + end + end + return #input + 1 + end +local literal = token('literal', value + block) + +-- Indicators. +local anchor = token(lexer.LABEL, '&' * word) +local alias = token(lexer.VARIABLE, '*' * word) +local tag = token('tag', '!' * word * P('!')^-1) +local reserved = token(lexer.ERROR, S('@`') * word) +local indicator_chars = token(lexer.OPERATOR, S('-?:,[]{}!')) + +M._rules = { + {'indent', indent}, + {'whitespace', ws}, + {'comment', comment}, + {'doc_bounds', doc_bounds}, + {'key', key}, + {'literal', literal}, + {'timestamp', ts}, + {'number', number}, + {'constant', constant}, + {'type', type}, + {'indicator', tag + indicator_chars + alias + anchor + reserved}, + {'directive', directive}, +} + +M._tokenstyles = { + indent_error = 'back:%(color.red)', + document = lexer.STYLE_CONSTANT, + literal = lexer.STYLE_DEFAULT, + timestamp = lexer.STYLE_NUMBER, + tag = lexer.STYLE_CLASS, + directive = lexer.STYLE_PREPROCESSOR, +} + +M._FOLDBYINDENTATION = true + +return M |