diff options
| author | Kein-Hong Man <unknown> | 2015-12-18 10:51:12 +1100 | 
|---|---|---|
| committer | Kein-Hong Man <unknown> | 2015-12-18 10:51:12 +1100 | 
| commit | d44ccc7664512d0be6df259d6c36fe960a36fd13 (patch) | |
| tree | 1d08e3b99338af165448cf41a031a9093b3d4b9f | |
| parent | 52b77c3c0da892751a9605a7db932cc2d3a2044e (diff) | |
| download | scintilla-mirror-d44ccc7664512d0be6df259d6c36fe960a36fd13.tar.gz | |
Bug [#1793]. Add support for Perl 5.22.
Double-diamond operator <<>>
Hexadecimal floating point literals
Repetition in list assignment
Added example file to the lexing tests and changed lexing tests to allow larger
files and to standardise on Unix line ends when testing these larger files to
avoid spurious failures.
| -rw-r--r-- | doc/ScintillaHistory.html | 9 | ||||
| -rw-r--r-- | lexers/LexPerl.cxx | 32 | ||||
| -rw-r--r-- | test/examples/perl-test-5220delta.pl | 178 | ||||
| -rw-r--r-- | test/examples/perl-test-5220delta.pl.styled | 178 | ||||
| -rw-r--r-- | test/lexTests.py | 86 | 
5 files changed, 453 insertions, 30 deletions
| diff --git a/doc/ScintillaHistory.html b/doc/ScintillaHistory.html index b26f955eb..6a6ab5ec5 100644 --- a/doc/ScintillaHistory.html +++ b/doc/ScintillaHistory.html @@ -527,8 +527,13 @@  	<a href="http://sourceforge.net/p/scintilla/bugs/1790/">Bug #1790</a>.  	</li>  	<li> -	Perl lexer allows '_' for subroutine prototypes. -	<a href="http://sourceforge.net/p/scintilla/bugs/1791/">Bug #1791</a>. +	Perl lexer updated for Perl 5.20 and 5.22.<br /> +	Allow '_' for subroutine prototypes. +	<a href="http://sourceforge.net/p/scintilla/bugs/1791/">Bug #1791</a>.<br /> +	Double-diamond operator <<>>.<br /> +	Hexadecimal floating point literals.<br /> +	Repetition in list assignment. +	<a href="http://sourceforge.net/p/scintilla/bugs/1793/">Bug #1793</a>.  	</li>  	<li>  	Send SCN_UPDATEUI with SC_UPDATE_SELECTION when the application changes multiple diff --git a/lexers/LexPerl.cxx b/lexers/LexPerl.cxx index 39fe10f14..3a69bf8cc 100644 --- a/lexers/LexPerl.cxx +++ b/lexers/LexPerl.cxx @@ -52,10 +52,10 @@ using namespace Scintilla;  #define HERE_DELIM_MAX 256		// maximum length of HERE doc delimiter -#define PERLNUM_BINARY		1	// order is significant: 1-4 cannot have a dot -#define PERLNUM_HEX			2 -#define PERLNUM_OCTAL		3 -#define PERLNUM_FLOAT_EXP	4	// exponent part only +#define PERLNUM_BINARY		1	// order is significant: 1-3 cannot have a dot +#define PERLNUM_OCTAL		2 +#define PERLNUM_FLOAT_EXP	3	// exponent part only +#define PERLNUM_HEX			4	// may be a hex float  #define PERLNUM_DECIMAL		5	// 1-5 are numbers; 6-7 are strings  #define PERLNUM_VECTOR		6  #define PERLNUM_V_VECTOR	7 @@ -760,6 +760,14 @@ void SCI_METHOD LexerPerl::Lex(Sci_PositionU startPos, Sci_Position length, int  							break;  						}  						// number then dot (go through) +					} else if (numState == PERLNUM_HEX) { +						if (dotCount <= 1 && IsADigit(sc.chNext, 16)) { +							break;	// hex with one dot is a hex float +						} else { +							sc.SetState(SCE_PL_OPERATOR); +							break; +						} +						// hex then dot (go through)  					} else if (IsADigit(sc.chNext))	// vectors  						break;  					// vector then dot (go through) @@ -778,8 +786,15 @@ void SCI_METHOD LexerPerl::Lex(Sci_PositionU startPos, Sci_Position length, int  					break;  				// number then word (go through)  			} else if (numState == PERLNUM_HEX) { -				if (IsADigit(sc.ch, 16)) +				if (sc.ch == 'P' || sc.ch == 'p') {	// hex float exponent, sign +					numState = PERLNUM_FLOAT_EXP; +					if (sc.chNext == '+' || sc.chNext == '-') { +						sc.Forward(); +					} +					break; +				} else if (IsADigit(sc.ch, 16))  					break; +				// hex or hex float then word (go through)  			} else if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {  				if (IsADigit(sc.ch))	// vector  					break; @@ -1263,7 +1278,7 @@ void SCI_METHOD LexerPerl::Lex(Sci_PositionU startPos, Sci_Position length, int  					fw++;  				} else if (sc.ch == 'x' && (sc.chNext == '=' ||	// repetition  				        !setWord.Contains(sc.chNext) || -				        (IsADigit(sc.chPrev) && IsADigit(sc.chNext)))) { +				        ((IsADigit(sc.chPrev) || sc.chPrev == ')') && IsADigit(sc.chNext)))) {  					sc.ChangeState(SCE_PL_OPERATOR);  				}  				// if potentially a keyword, scan forward and grab word, then check @@ -1436,7 +1451,10 @@ void SCI_METHOD LexerPerl::Lex(Sci_PositionU startPos, Sci_Position length, int  				}  				backFlag = BACK_NONE;  				if (isHereDoc) {	// handle '<<', HERE doc -					if (preferRE) { +					if (sc.Match("<<>>")) {		// double-diamond operator (5.22) +						sc.SetState(SCE_PL_OPERATOR); +						sc.Forward(3); +					} else if (preferRE) {  						sc.SetState(SCE_PL_HERE_DELIM);  						HereDoc.State = 0;  					} else {		// << operator diff --git a/test/examples/perl-test-5220delta.pl b/test/examples/perl-test-5220delta.pl new file mode 100644 index 000000000..a9c80caa2 --- /dev/null +++ b/test/examples/perl-test-5220delta.pl @@ -0,0 +1,178 @@ +# -*- coding: utf-8 -*- +#-------------------------------------------------------------------------- +# perl-test-5220delta.pl +#-------------------------------------------------------------------------- +# REF: https://metacpan.org/pod/distribution/perl/pod/perldelta.pod +# maybe future ref: https://metacpan.org/pod/distribution/perl/pod/perl5220delta.pod +# also: http://perltricks.com/article/165/2015/4/10/A-preview-of-Perl-5-22 +# +#-------------------------------------------------------------------------- +# Kein-Hong Man <keinhong@gmail.com> Public Domain 20151217 +#-------------------------------------------------------------------------- +# 20151217	initial document +# 20151218	updated tests and comments +#-------------------------------------------------------------------------- + +use v5.22;			# may be needed + +#-------------------------------------------------------------------------- +# New bitwise operators +#-------------------------------------------------------------------------- + +use feature 'bitwise'		# enable feature, warning enabled +use experimental "bitwise";	# enable feature, warning disabled + +# numerical operands +10&20  10|20   10^20 ~10 +$a&"8" $a|"8" $a^"8" ~$a ~"8" + +# string operands +'0'&."8" '0'|."8" '0'^."8" ~.'0' ~."8" +# the following is AMBIGUOUS, perl sees 10 and not .10 only when bitwise feature is enabled +# so it's feature-setting-dependent, no plans to change current behaviour + $a&.10   $a|.10   $a^.10  ~.$a  ~.10 + +# assignment variants +$a&=10;    $a|=10;    $a^=10; +$b&.='20'; $b|.='20'; $b^.='20'; +$c&="30";  $c|="30";  $c^="30"; +$d&.=$e;   $d|.=$e;   $d^.=$e; + +#-------------------------------------------------------------------------- +# New double-diamond operator +#-------------------------------------------------------------------------- +# <<>> is like <> but each element of @ARGV will be treated as an actual file name + +# example snippet from brian d foy's blog post +while( <<>> ) {  # new, safe line input operator +	...; +	} + +#-------------------------------------------------------------------------- +# New \b boundaries in regular expressions +#-------------------------------------------------------------------------- + +qr/\b{gcb}/ +qr/\b{wb}/ +qr/\b{sb}/ + +#-------------------------------------------------------------------------- +# Non-Capturing Regular Expression Flag +#-------------------------------------------------------------------------- +# disables capturing and filling in $1, $2, etc + +"hello" =~ /(hi|hello)/n; # $1 is not set + +#-------------------------------------------------------------------------- +# Aliasing via reference +#-------------------------------------------------------------------------- +# Variables and subroutines can now be aliased by assigning to a reference + +\$c = \$d; +\&x = \&y; + +# Aliasing can also be applied to foreach iterator variables + +foreach \%hash (@array_of_hash_refs) { ... } + +# example snippet from brian d foy's blog post + +use feature qw(refaliasing); + +\%other_hash = \%hash; + +use v5.22; +use feature qw(refaliasing); + +foreach \my %hash ( @array_of_hashes ) { # named hash control variable +	foreach my $key ( keys %hash ) { # named hash now! +		...; +		} +	} + +#-------------------------------------------------------------------------- +# New :const subroutine attribute +#-------------------------------------------------------------------------- + +my $x = 54321; +*INLINED = sub : const { $x }; +$x++; + +# more examples of attributes +# (not 5.22 stuff, but some general examples for study, useful for +#  handling subroutine signature and subroutine prototype highlighting) + +sub foo : lvalue ; + +package X; +sub Y::x : lvalue { 1 } + +package X; +sub foo { 1 } +package Y; +BEGIN { *bar = \&X::foo; } +package Z; +sub Y::bar : lvalue ; + +# built-in attributes for subroutines: +lvalue method prototype(..) locked const + +#-------------------------------------------------------------------------- +# Repetition in list assignment +#-------------------------------------------------------------------------- + +# example snippet from brian d foy's blog post +use v5.22; +my(undef, $card_num, (undef)x3, $count) = split /:/; + +(undef,undef,$foo) = that_function() +# is equivalent to  +((undef)x2, $foo) = that_function() + +#-------------------------------------------------------------------------- +# Floating point parsing has been improved +#-------------------------------------------------------------------------- +# Hexadecimal floating point literals + +# some hex floats from a program by Rick Regan +# appropriated and extended from Lua 5.2.x test cases +# tested on perl 5.22/cygwin + +0x1p-1074; +0x3.3333333333334p-5; +0xcc.ccccccccccdp-11; +0x1p+1; +0x1p-6; +0x1.b7p-1; +0x1.fffffffffffffp+1023; +0x1p-1022; +0X1.921FB4D12D84AP+1; +0x1.999999999999ap-4; + +# additional test cases for characterization +0x1p-1074.		# dot is a string operator +0x.ABCDEFp10		# legal, dot immediately after 0x +0x.p10			# perl allows 0x as a zero, then concat with p10 bareword +0x.p 0x0.p		# dot then bareword +0x_0_.A_BC___DEF_p1_0	# legal hex float, underscores are mostly allowed +0x0._ABCDEFp10		# _ABCDEFp10 is a bareword, no underscore allowed after dot + +# illegal, but does not use error highlighting +0x0p1ABC		# illegal, highlighted as 0x0p1 abut with bareword ABC  + +# allowed to FAIL for now +0x0.ABCDEFp_10		# ABCDEFp_10 is a bareword, '_10' exponent not allowed +0xp 0xp1 0x0.0p		# syntax errors +0x41.65.65 		# hex dot number, but lexer now fails with 0x41.65 left as a partial hex float + +#-------------------------------------------------------------------------- +# Support for ?PATTERN? without explicit operator has been removed +#-------------------------------------------------------------------------- +# ?PATTERN? must now be written as m?PATTERN? + +?PATTERN?	# does not work in current LexPerl anyway, NO ACTION NEEDED +m?PATTERN? + +#-------------------------------------------------------------------------- +# end of test file +#-------------------------------------------------------------------------- diff --git a/test/examples/perl-test-5220delta.pl.styled b/test/examples/perl-test-5220delta.pl.styled new file mode 100644 index 000000000..c01757999 --- /dev/null +++ b/test/examples/perl-test-5220delta.pl.styled @@ -0,0 +1,178 @@ +{2}# -*- coding: utf-8 -*-{0} +{2}#--------------------------------------------------------------------------{0} +{2}# perl-test-5220delta.pl{0} +{2}#--------------------------------------------------------------------------{0} +{2}# REF: https://metacpan.org/pod/distribution/perl/pod/perldelta.pod{0} +{2}# maybe future ref: https://metacpan.org/pod/distribution/perl/pod/perl5220delta.pod{0} +{2}# also: http://perltricks.com/article/165/2015/4/10/A-preview-of-Perl-5-22{0} +{2}#{0} +{2}#--------------------------------------------------------------------------{0} +{2}# Kein-Hong Man <keinhong@gmail.com> Public Domain 20151217{0} +{2}#--------------------------------------------------------------------------{0} +{2}# 20151217	initial document{0} +{2}# 20151218	updated tests and comments{0} +{2}#--------------------------------------------------------------------------{0} + +{5}use{0} {6}v5.22{10};{0}			{2}# may be needed{0} + +{2}#--------------------------------------------------------------------------{0} +{2}# New bitwise operators{0} +{2}#--------------------------------------------------------------------------{0} + +{5}use{0} {11}feature{0} {7}'bitwise'{0}		{2}# enable feature, warning enabled{0} +{5}use{0} {11}experimental{0} {6}"bitwise"{10};{0}	{2}# enable feature, warning disabled{0} + +{2}# numerical operands{0} +{4}10{10}&{4}20{0}  {4}10{10}|{4}20{0}   {4}10{10}^{4}20{0} {10}~{4}10{0} +{12}$a{10}&{6}"8"{0} {12}$a{10}|{6}"8"{0} {12}$a{10}^{6}"8"{0} {10}~{12}$a{0} {10}~{6}"8"{0} + +{2}# string operands{0} +{7}'0'{10}&.{6}"8"{0} {7}'0'{10}|.{6}"8"{0} {7}'0'{10}^.{6}"8"{0} {10}~.{7}'0'{0} {10}~.{6}"8"{0} +{2}# the following is AMBIGUOUS, perl sees 10 and not .10 only when bitwise feature is enabled{0} +{2}# so it's feature-setting-dependent, no plans to change current behaviour{0} + {12}$a{10}&{4}.10{0}   {12}$a{10}|{4}.10{0}   {12}$a{10}^{4}.10{0}  {10}~.{12}$a{0}  {10}~{4}.10{0} + +{2}# assignment variants{0} +{12}$a{10}&={4}10{10};{0}    {12}$a{10}|={4}10{10};{0}    {12}$a{10}^={4}10{10};{0} +{12}$b{10}&.={7}'20'{10};{0} {12}$b{10}|.={7}'20'{10};{0} {12}$b{10}^.={7}'20'{10};{0} +{12}$c{10}&={6}"30"{10};{0}  {12}$c{10}|={6}"30"{10};{0}  {12}$c{10}^={6}"30"{10};{0} +{12}$d{10}&.={12}$e{10};{0}   {12}$d{10}|.={12}$e{10};{0}   {12}$d{10}^.={12}$e{10};{0} + +{2}#--------------------------------------------------------------------------{0} +{2}# New double-diamond operator{0} +{2}#--------------------------------------------------------------------------{0} +{2}# <<>> is like <> but each element of @ARGV will be treated as an actual file name{0} + +{2}# example snippet from brian d foy's blog post{0} +{5}while{10}({0} {10}<<>>{0} {10}){0} {10}{{0}  {2}# new, safe line input operator{0} +	{10}...;{0} +	{10}}{0} + +{2}#--------------------------------------------------------------------------{0} +{2}# New \b boundaries in regular expressions{0} +{2}#--------------------------------------------------------------------------{0} + +{29}qr/\b{gcb}/{0} +{29}qr/\b{wb}/{0} +{29}qr/\b{sb}/{0} + +{2}#--------------------------------------------------------------------------{0} +{2}# Non-Capturing Regular Expression Flag{0} +{2}#--------------------------------------------------------------------------{0} +{2}# disables capturing and filling in $1, $2, etc{0} + +{6}"hello"{0} {10}=~{0} {17}/(hi|hello)/n{10};{0} {2}# $1 is not set{0} + +{2}#--------------------------------------------------------------------------{0} +{2}# Aliasing via reference{0} +{2}#--------------------------------------------------------------------------{0} +{2}# Variables and subroutines can now be aliased by assigning to a reference{0} + +{10}\{12}$c{0} {10}={0} {10}\{12}$d{10};{0} +{10}\&{11}x{0} {10}={0} {10}\&{11}y{10};{0} + +{2}# Aliasing can also be applied to foreach iterator variables{0} + +{5}foreach{0} {10}\{14}%hash{0} {10}({13}@array_of_hash_refs{10}){0} {10}{{0} {10}...{0} {10}}{0} + +{2}# example snippet from brian d foy's blog post{0} + +{5}use{0} {11}feature{0} {30}qw(refaliasing){10};{0} + +{10}\{14}%other_hash{0} {10}={0} {10}\{14}%hash{10};{0} + +{5}use{0} {6}v5.22{10};{0} +{5}use{0} {11}feature{0} {30}qw(refaliasing){10};{0} + +{5}foreach{0} {10}\{5}my{0} {14}%hash{0} {10}({0} {13}@array_of_hashes{0} {10}){0} {10}{{0} {2}# named hash control variable{0} +	{5}foreach{0} {5}my{0} {12}$key{0} {10}({0} {5}keys{0} {14}%hash{0} {10}){0} {10}{{0} {2}# named hash now!{0} +		{10}...;{0} +		{10}}{0} +	{10}}{0} + +{2}#--------------------------------------------------------------------------{0} +{2}# New :const subroutine attribute{0} +{2}#--------------------------------------------------------------------------{0} + +{5}my{0} {12}$x{0} {10}={0} {4}54321{10};{0} +{15}*INLINED{0} {10}={0} {5}sub{0} {10}:{0} {11}const{0} {10}{{0} {12}$x{0} {10}};{0} +{12}$x{10}++;{0} + +{2}# more examples of attributes{0} +{2}# (not 5.22 stuff, but some general examples for study, useful for{0} +{2}#  handling subroutine signature and subroutine prototype highlighting){0} + +{5}sub{0} {11}foo{0} {10}:{0} {11}lvalue{0} {10};{0} + +{5}package{0} {11}X{10};{0} +{5}sub{0} {11}Y{10}::x{0} {10}:{0} {11}lvalue{0} {10}{{0} {4}1{0} {10}}{0} + +{5}package{0} {11}X{10};{0} +{5}sub{0} {11}foo{0} {10}{{0} {4}1{0} {10}}{0} +{5}package{0} {11}Y{10};{0} +{5}BEGIN{0} {10}{{0} {15}*bar{0} {10}={0} {10}\&{11}X{10}::{11}foo{10};{0} {10}}{0} +{5}package{0} {11}Z{10};{0} +{5}sub{0} {11}Y{10}::{11}bar{0} {10}:{0} {11}lvalue{0} {10};{0} + +{2}# built-in attributes for subroutines:{0} +{11}lvalue{0} {11}method{0} {5}prototype{10}(..){0} {11}locked{0} {11}const{0} + +{2}#--------------------------------------------------------------------------{0} +{2}# Repetition in list assignment{0} +{2}#--------------------------------------------------------------------------{0} + +{2}# example snippet from brian d foy's blog post{0} +{5}use{0} {6}v5.22{10};{0} +{5}my{10}({5}undef{10},{0} {12}$card_num{10},{0} {10}({5}undef{10})x{4}3{10},{0} {12}$count{10}){0} {10}={0} {5}split{0} {17}/:/{10};{0} + +{10}({5}undef{10},{5}undef{10},{12}$foo{10}){0} {10}={0} {11}that_function{10}(){0} +{2}# is equivalent to {0} +{10}(({5}undef{10})x{4}2{10},{0} {12}$foo{10}){0} {10}={0} {11}that_function{10}(){0} + +{2}#--------------------------------------------------------------------------{0} +{2}# Floating point parsing has been improved{0} +{2}#--------------------------------------------------------------------------{0} +{2}# Hexadecimal floating point literals{0} + +{2}# some hex floats from a program by Rick Regan{0} +{2}# appropriated and extended from Lua 5.2.x test cases{0} +{2}# tested on perl 5.22/cygwin{0} + +{4}0x1p-1074{10};{0} +{4}0x3.3333333333334p-5{10};{0} +{4}0xcc.ccccccccccdp-11{10};{0} +{4}0x1p+1{10};{0} +{4}0x1p-6{10};{0} +{4}0x1.b7p-1{10};{0} +{4}0x1.fffffffffffffp+1023{10};{0} +{4}0x1p-1022{10};{0} +{4}0X1.921FB4D12D84AP+1{10};{0} +{4}0x1.999999999999ap-4{10};{0} + +{2}# additional test cases for characterization{0} +{4}0x1p-1074{10}.{0}		{2}# dot is a string operator{0} +{4}0x.ABCDEFp10{0}		{2}# legal, dot immediately after 0x{0} +{4}0x{10}.{11}p10{0}			{2}# perl allows 0x as a zero, then concat with p10 bareword{0} +{4}0x{10}.{11}p{0} {4}0x0{10}.{11}p{0}		{2}# dot then bareword{0} +{4}0x_0_.A_BC___DEF_p1_0{0}	{2}# legal hex float, underscores are mostly allowed{0} +{4}0x0{10}.{11}_ABCDEFp10{0}		{2}# _ABCDEFp10 is a bareword, no underscore allowed after dot{0} + +{2}# illegal, but does not use error highlighting{0} +{4}0x0p1{11}ABC{0}		{2}# illegal, highlighted as 0x0p1 abut with bareword ABC {0} + +{2}# allowed to FAIL for now{0} +{4}0x0.ABCDEFp_10{0}		{2}# ABCDEFp_10 is a bareword, '_10' exponent not allowed{0} +{4}0xp{0} {4}0xp1{0} {4}0x0.0p{0}		{2}# syntax errors{0} +{4}0x41.65{10}.{4}65{0} 		{2}# hex dot number, but lexer now fails with 0x41.65 left as a partial hex float{0} + +{2}#--------------------------------------------------------------------------{0} +{2}# Support for ?PATTERN? without explicit operator has been removed{0} +{2}#--------------------------------------------------------------------------{0} +{2}# ?PATTERN? must now be written as m?PATTERN?{0} + +{10}?{11}PATTERN{10}?{0}	{2}# does not work in current LexPerl anyway, NO ACTION NEEDED{0} +{17}m?PATTERN?{0} + +{2}#--------------------------------------------------------------------------{0} +{2}# end of test file{0} +{2}#--------------------------------------------------------------------------{0} diff --git a/test/lexTests.py b/test/lexTests.py index a53db7eb8..421c6393b 100644 --- a/test/lexTests.py +++ b/test/lexTests.py @@ -15,6 +15,37 @@ b"function",  b"sub"  ] +keywordsPerl = [ +b"NULL __FILE__ __LINE__ __PACKAGE__ __DATA__ __END__ AUTOLOAD " +b"BEGIN CORE DESTROY END EQ GE GT INIT LE LT NE CHECK abs accept " +b"alarm and atan2 bind binmode bless caller chdir chmod chomp chop " +b"chown chr chroot close closedir cmp connect continue cos crypt " +b"dbmclose dbmopen defined delete die do dump each else elsif endgrent " +b"endhostent endnetent endprotoent endpwent endservent eof eq eval " +b"exec exists exit exp fcntl fileno flock for foreach fork format " +b"formline ge getc getgrent getgrgid getgrnam gethostbyaddr gethostbyname " +b"gethostent getlogin getnetbyaddr getnetbyname getnetent getpeername " +b"getpgrp getppid getpriority getprotobyname getprotobynumber getprotoent " +b"getpwent getpwnam getpwuid getservbyname getservbyport getservent " +b"getsockname getsockopt glob gmtime goto grep gt hex if index " +b"int ioctl join keys kill last lc lcfirst le length link listen " +b"local localtime lock log lstat lt map mkdir msgctl msgget msgrcv " +b"msgsnd my ne next no not oct open opendir or ord our pack package " +b"pipe pop pos print printf prototype push quotemeta qu " +b"rand read readdir readline readlink readpipe recv redo " +b"ref rename require reset return reverse rewinddir rindex rmdir " +b"scalar seek seekdir select semctl semget semop send setgrent " +b"sethostent setnetent setpgrp setpriority setprotoent setpwent " +b"setservent setsockopt shift shmctl shmget shmread shmwrite shutdown " +b"sin sleep socket socketpair sort splice split sprintf sqrt srand " +b"stat study sub substr symlink syscall sysopen sysread sysseek " +b"system syswrite tell telldir tie tied time times truncate " +b"uc ucfirst umask undef unless unlink unpack unshift untie until " +b"use utime values vec wait waitpid wantarray warn while write " +b"xor " +b"given when default break say state UNITCHECK __SUB__ fc" +] +  class TestLexers(unittest.TestCase):  	def setUp(self): @@ -23,7 +54,7 @@ class TestLexers(unittest.TestCase):  		self.ed.ClearAll()  		self.ed.EmptyUndoBuffer() -	def AsStyled(self): +	def AsStyled(self, withWindowsLineEnds):  		text = self.ed.Contents()  		data = io.BytesIO()  		prevStyle = -1 @@ -34,11 +65,14 @@ class TestLexers(unittest.TestCase):  				data.write(styleBuf.encode('utf-8'))  				prevStyle = styleNow  			data.write(text[o:o+1]) -		return data.getvalue() +		if withWindowsLineEnds: +			return data.getvalue().replace(b"\n", b"\r\n") +		else: +			return data.getvalue() -	def LexExample(self, name, lexerName, keywords=None): -		if keywords is None: -			keywords = [] +	def LexExample(self, name, lexerName, keywords, fileMode="b"): +		self.ed.ClearAll() +		self.ed.EmptyUndoBuffer()  		self.ed.SetCodePage(65001)  		self.ed.LexerLanguage = lexerName  		bits = self.ed.StyleBitsNeeded @@ -52,6 +86,8 @@ class TestLexers(unittest.TestCase):  		nameNew = nameExample +".new"  		with open(nameExample, "rb") as f:  			prog = f.read() +		if fileMode == "t" and sys.platform == "win32": +			prog = prog.replace(b"\r\n", b"\n")  		BOM = b"\xEF\xBB\xBF"  		if prog.startswith(BOM):  			prog = prog[len(BOM):] @@ -62,12 +98,15 @@ class TestLexers(unittest.TestCase):  		try:  			with open(namePrevious, "rb") as f:  				prevStyled = f.read() -		except FileNotFoundError: +			if fileMode == "t" and sys.platform == "win32": +				prog = prog.replace(b"\r\n", b"\n") +		except EnvironmentError:  			prevStyled = "" -		progStyled = self.AsStyled() +		progStyled = self.AsStyled(fileMode == "t" and sys.platform == "win32")  		if progStyled != prevStyled:  			with open(nameNew, "wb") as f:  				f.write(progStyled) +			print("Incorrect lex for " + name)  			print(progStyled)  			print(prevStyled)  			self.assertEquals(progStyled, prevStyled) @@ -75,19 +114,21 @@ class TestLexers(unittest.TestCase):  			# as that is likely to fail many times.  			return -		# Try partial lexes from the start of every line which should all be identical. -		for line in range(self.ed.LineCount): -			lineStart = self.ed.PositionFromLine(line) -			self.ed.StartStyling(lineStart, mask) -			self.assertEquals(self.ed.EndStyled, lineStart) -			self.ed.Colourise(lineStart, lenDocument) -			progStyled = self.AsStyled() -			if progStyled != prevStyled: -				with open(nameNew, "wb") as f: -					f.write(progStyled) -				self.assertEquals(progStyled, prevStyled) -				# Give up after one failure -				return +		if fileMode == "b":	# "t" files are large and this is a quadratic check +			# Try partial lexes from the start of every line which should all be identical. +			for line in range(self.ed.LineCount): +				lineStart = self.ed.PositionFromLine(line) +				self.ed.StartStyling(lineStart, mask) +				self.assertEquals(self.ed.EndStyled, lineStart) +				self.ed.Colourise(lineStart, lenDocument) +				progStyled = self.AsStyled(fileMode == "t" and sys.platform == "win32") +				if progStyled != prevStyled: +					print("Incorrect partial lex for " + name + " at line " + line) +					with open(nameNew, "wb") as f: +						f.write(progStyled) +					self.assertEquals(progStyled, prevStyled) +					# Give up after one failure +					return  	def testCXX(self):  		self.LexExample("x.cxx", b"cpp", [b"int"]) @@ -115,7 +156,10 @@ class TestLexers(unittest.TestCase):  		self.LexExample("x.rb", b"ruby", [b"class def end"])  	def testPerl(self): -		self.LexExample("x.pl", b"perl", [b"printf sleep use while"]) +		self.LexExample("x.pl", b"perl", keywordsPerl) + +	def testPerlCases(self): +		self.LexExample("perl-test-5220delta.pl", b"perl", keywordsPerl, "t")  	def testD(self):  		self.LexExample("x.d", b"d", | 
