diff options
Diffstat (limited to 'test')
| -rw-r--r-- | test/examples/perl-test-5220delta.pl | 178 | ||||
| -rw-r--r-- | test/examples/perl-test-5220delta.pl.styled | 178 | ||||
| -rw-r--r-- | test/lexTests.py | 86 | 
3 files changed, 421 insertions, 21 deletions
| diff --git a/test/examples/perl-test-5220delta.pl b/test/examples/perl-test-5220delta.pl new file mode 100644 index 000000000..a9c80caa2 --- /dev/null +++ b/test/examples/perl-test-5220delta.pl @@ -0,0 +1,178 @@ +# -*- coding: utf-8 -*- +#-------------------------------------------------------------------------- +# perl-test-5220delta.pl +#-------------------------------------------------------------------------- +# REF: https://metacpan.org/pod/distribution/perl/pod/perldelta.pod +# maybe future ref: https://metacpan.org/pod/distribution/perl/pod/perl5220delta.pod +# also: http://perltricks.com/article/165/2015/4/10/A-preview-of-Perl-5-22 +# +#-------------------------------------------------------------------------- +# Kein-Hong Man <keinhong@gmail.com> Public Domain 20151217 +#-------------------------------------------------------------------------- +# 20151217	initial document +# 20151218	updated tests and comments +#-------------------------------------------------------------------------- + +use v5.22;			# may be needed + +#-------------------------------------------------------------------------- +# New bitwise operators +#-------------------------------------------------------------------------- + +use feature 'bitwise'		# enable feature, warning enabled +use experimental "bitwise";	# enable feature, warning disabled + +# numerical operands +10&20  10|20   10^20 ~10 +$a&"8" $a|"8" $a^"8" ~$a ~"8" + +# string operands +'0'&."8" '0'|."8" '0'^."8" ~.'0' ~."8" +# the following is AMBIGUOUS, perl sees 10 and not .10 only when bitwise feature is enabled +# so it's feature-setting-dependent, no plans to change current behaviour + $a&.10   $a|.10   $a^.10  ~.$a  ~.10 + +# assignment variants +$a&=10;    $a|=10;    $a^=10; +$b&.='20'; $b|.='20'; $b^.='20'; +$c&="30";  $c|="30";  $c^="30"; +$d&.=$e;   $d|.=$e;   $d^.=$e; + +#-------------------------------------------------------------------------- +# New double-diamond operator +#-------------------------------------------------------------------------- +# <<>> is like <> but each element of @ARGV will be treated as an actual file name + +# example snippet from brian d foy's blog post +while( <<>> ) {  # new, safe line input operator +	...; +	} + +#-------------------------------------------------------------------------- +# New \b boundaries in regular expressions +#-------------------------------------------------------------------------- + +qr/\b{gcb}/ +qr/\b{wb}/ +qr/\b{sb}/ + +#-------------------------------------------------------------------------- +# Non-Capturing Regular Expression Flag +#-------------------------------------------------------------------------- +# disables capturing and filling in $1, $2, etc + +"hello" =~ /(hi|hello)/n; # $1 is not set + +#-------------------------------------------------------------------------- +# Aliasing via reference +#-------------------------------------------------------------------------- +# Variables and subroutines can now be aliased by assigning to a reference + +\$c = \$d; +\&x = \&y; + +# Aliasing can also be applied to foreach iterator variables + +foreach \%hash (@array_of_hash_refs) { ... } + +# example snippet from brian d foy's blog post + +use feature qw(refaliasing); + +\%other_hash = \%hash; + +use v5.22; +use feature qw(refaliasing); + +foreach \my %hash ( @array_of_hashes ) { # named hash control variable +	foreach my $key ( keys %hash ) { # named hash now! +		...; +		} +	} + +#-------------------------------------------------------------------------- +# New :const subroutine attribute +#-------------------------------------------------------------------------- + +my $x = 54321; +*INLINED = sub : const { $x }; +$x++; + +# more examples of attributes +# (not 5.22 stuff, but some general examples for study, useful for +#  handling subroutine signature and subroutine prototype highlighting) + +sub foo : lvalue ; + +package X; +sub Y::x : lvalue { 1 } + +package X; +sub foo { 1 } +package Y; +BEGIN { *bar = \&X::foo; } +package Z; +sub Y::bar : lvalue ; + +# built-in attributes for subroutines: +lvalue method prototype(..) locked const + +#-------------------------------------------------------------------------- +# Repetition in list assignment +#-------------------------------------------------------------------------- + +# example snippet from brian d foy's blog post +use v5.22; +my(undef, $card_num, (undef)x3, $count) = split /:/; + +(undef,undef,$foo) = that_function() +# is equivalent to  +((undef)x2, $foo) = that_function() + +#-------------------------------------------------------------------------- +# Floating point parsing has been improved +#-------------------------------------------------------------------------- +# Hexadecimal floating point literals + +# some hex floats from a program by Rick Regan +# appropriated and extended from Lua 5.2.x test cases +# tested on perl 5.22/cygwin + +0x1p-1074; +0x3.3333333333334p-5; +0xcc.ccccccccccdp-11; +0x1p+1; +0x1p-6; +0x1.b7p-1; +0x1.fffffffffffffp+1023; +0x1p-1022; +0X1.921FB4D12D84AP+1; +0x1.999999999999ap-4; + +# additional test cases for characterization +0x1p-1074.		# dot is a string operator +0x.ABCDEFp10		# legal, dot immediately after 0x +0x.p10			# perl allows 0x as a zero, then concat with p10 bareword +0x.p 0x0.p		# dot then bareword +0x_0_.A_BC___DEF_p1_0	# legal hex float, underscores are mostly allowed +0x0._ABCDEFp10		# _ABCDEFp10 is a bareword, no underscore allowed after dot + +# illegal, but does not use error highlighting +0x0p1ABC		# illegal, highlighted as 0x0p1 abut with bareword ABC  + +# allowed to FAIL for now +0x0.ABCDEFp_10		# ABCDEFp_10 is a bareword, '_10' exponent not allowed +0xp 0xp1 0x0.0p		# syntax errors +0x41.65.65 		# hex dot number, but lexer now fails with 0x41.65 left as a partial hex float + +#-------------------------------------------------------------------------- +# Support for ?PATTERN? without explicit operator has been removed +#-------------------------------------------------------------------------- +# ?PATTERN? must now be written as m?PATTERN? + +?PATTERN?	# does not work in current LexPerl anyway, NO ACTION NEEDED +m?PATTERN? + +#-------------------------------------------------------------------------- +# end of test file +#-------------------------------------------------------------------------- diff --git a/test/examples/perl-test-5220delta.pl.styled b/test/examples/perl-test-5220delta.pl.styled new file mode 100644 index 000000000..c01757999 --- /dev/null +++ b/test/examples/perl-test-5220delta.pl.styled @@ -0,0 +1,178 @@ +{2}# -*- coding: utf-8 -*-{0} +{2}#--------------------------------------------------------------------------{0} +{2}# perl-test-5220delta.pl{0} +{2}#--------------------------------------------------------------------------{0} +{2}# REF: https://metacpan.org/pod/distribution/perl/pod/perldelta.pod{0} +{2}# maybe future ref: https://metacpan.org/pod/distribution/perl/pod/perl5220delta.pod{0} +{2}# also: http://perltricks.com/article/165/2015/4/10/A-preview-of-Perl-5-22{0} +{2}#{0} +{2}#--------------------------------------------------------------------------{0} +{2}# Kein-Hong Man <keinhong@gmail.com> Public Domain 20151217{0} +{2}#--------------------------------------------------------------------------{0} +{2}# 20151217	initial document{0} +{2}# 20151218	updated tests and comments{0} +{2}#--------------------------------------------------------------------------{0} + +{5}use{0} {6}v5.22{10};{0}			{2}# may be needed{0} + +{2}#--------------------------------------------------------------------------{0} +{2}# New bitwise operators{0} +{2}#--------------------------------------------------------------------------{0} + +{5}use{0} {11}feature{0} {7}'bitwise'{0}		{2}# enable feature, warning enabled{0} +{5}use{0} {11}experimental{0} {6}"bitwise"{10};{0}	{2}# enable feature, warning disabled{0} + +{2}# numerical operands{0} +{4}10{10}&{4}20{0}  {4}10{10}|{4}20{0}   {4}10{10}^{4}20{0} {10}~{4}10{0} +{12}$a{10}&{6}"8"{0} {12}$a{10}|{6}"8"{0} {12}$a{10}^{6}"8"{0} {10}~{12}$a{0} {10}~{6}"8"{0} + +{2}# string operands{0} +{7}'0'{10}&.{6}"8"{0} {7}'0'{10}|.{6}"8"{0} {7}'0'{10}^.{6}"8"{0} {10}~.{7}'0'{0} {10}~.{6}"8"{0} +{2}# the following is AMBIGUOUS, perl sees 10 and not .10 only when bitwise feature is enabled{0} +{2}# so it's feature-setting-dependent, no plans to change current behaviour{0} + {12}$a{10}&{4}.10{0}   {12}$a{10}|{4}.10{0}   {12}$a{10}^{4}.10{0}  {10}~.{12}$a{0}  {10}~{4}.10{0} + +{2}# assignment variants{0} +{12}$a{10}&={4}10{10};{0}    {12}$a{10}|={4}10{10};{0}    {12}$a{10}^={4}10{10};{0} +{12}$b{10}&.={7}'20'{10};{0} {12}$b{10}|.={7}'20'{10};{0} {12}$b{10}^.={7}'20'{10};{0} +{12}$c{10}&={6}"30"{10};{0}  {12}$c{10}|={6}"30"{10};{0}  {12}$c{10}^={6}"30"{10};{0} +{12}$d{10}&.={12}$e{10};{0}   {12}$d{10}|.={12}$e{10};{0}   {12}$d{10}^.={12}$e{10};{0} + +{2}#--------------------------------------------------------------------------{0} +{2}# New double-diamond operator{0} +{2}#--------------------------------------------------------------------------{0} +{2}# <<>> is like <> but each element of @ARGV will be treated as an actual file name{0} + +{2}# example snippet from brian d foy's blog post{0} +{5}while{10}({0} {10}<<>>{0} {10}){0} {10}{{0}  {2}# new, safe line input operator{0} +	{10}...;{0} +	{10}}{0} + +{2}#--------------------------------------------------------------------------{0} +{2}# New \b boundaries in regular expressions{0} +{2}#--------------------------------------------------------------------------{0} + +{29}qr/\b{gcb}/{0} +{29}qr/\b{wb}/{0} +{29}qr/\b{sb}/{0} + +{2}#--------------------------------------------------------------------------{0} +{2}# Non-Capturing Regular Expression Flag{0} +{2}#--------------------------------------------------------------------------{0} +{2}# disables capturing and filling in $1, $2, etc{0} + +{6}"hello"{0} {10}=~{0} {17}/(hi|hello)/n{10};{0} {2}# $1 is not set{0} + +{2}#--------------------------------------------------------------------------{0} +{2}# Aliasing via reference{0} +{2}#--------------------------------------------------------------------------{0} +{2}# Variables and subroutines can now be aliased by assigning to a reference{0} + +{10}\{12}$c{0} {10}={0} {10}\{12}$d{10};{0} +{10}\&{11}x{0} {10}={0} {10}\&{11}y{10};{0} + +{2}# Aliasing can also be applied to foreach iterator variables{0} + +{5}foreach{0} {10}\{14}%hash{0} {10}({13}@array_of_hash_refs{10}){0} {10}{{0} {10}...{0} {10}}{0} + +{2}# example snippet from brian d foy's blog post{0} + +{5}use{0} {11}feature{0} {30}qw(refaliasing){10};{0} + +{10}\{14}%other_hash{0} {10}={0} {10}\{14}%hash{10};{0} + +{5}use{0} {6}v5.22{10};{0} +{5}use{0} {11}feature{0} {30}qw(refaliasing){10};{0} + +{5}foreach{0} {10}\{5}my{0} {14}%hash{0} {10}({0} {13}@array_of_hashes{0} {10}){0} {10}{{0} {2}# named hash control variable{0} +	{5}foreach{0} {5}my{0} {12}$key{0} {10}({0} {5}keys{0} {14}%hash{0} {10}){0} {10}{{0} {2}# named hash now!{0} +		{10}...;{0} +		{10}}{0} +	{10}}{0} + +{2}#--------------------------------------------------------------------------{0} +{2}# New :const subroutine attribute{0} +{2}#--------------------------------------------------------------------------{0} + +{5}my{0} {12}$x{0} {10}={0} {4}54321{10};{0} +{15}*INLINED{0} {10}={0} {5}sub{0} {10}:{0} {11}const{0} {10}{{0} {12}$x{0} {10}};{0} +{12}$x{10}++;{0} + +{2}# more examples of attributes{0} +{2}# (not 5.22 stuff, but some general examples for study, useful for{0} +{2}#  handling subroutine signature and subroutine prototype highlighting){0} + +{5}sub{0} {11}foo{0} {10}:{0} {11}lvalue{0} {10};{0} + +{5}package{0} {11}X{10};{0} +{5}sub{0} {11}Y{10}::x{0} {10}:{0} {11}lvalue{0} {10}{{0} {4}1{0} {10}}{0} + +{5}package{0} {11}X{10};{0} +{5}sub{0} {11}foo{0} {10}{{0} {4}1{0} {10}}{0} +{5}package{0} {11}Y{10};{0} +{5}BEGIN{0} {10}{{0} {15}*bar{0} {10}={0} {10}\&{11}X{10}::{11}foo{10};{0} {10}}{0} +{5}package{0} {11}Z{10};{0} +{5}sub{0} {11}Y{10}::{11}bar{0} {10}:{0} {11}lvalue{0} {10};{0} + +{2}# built-in attributes for subroutines:{0} +{11}lvalue{0} {11}method{0} {5}prototype{10}(..){0} {11}locked{0} {11}const{0} + +{2}#--------------------------------------------------------------------------{0} +{2}# Repetition in list assignment{0} +{2}#--------------------------------------------------------------------------{0} + +{2}# example snippet from brian d foy's blog post{0} +{5}use{0} {6}v5.22{10};{0} +{5}my{10}({5}undef{10},{0} {12}$card_num{10},{0} {10}({5}undef{10})x{4}3{10},{0} {12}$count{10}){0} {10}={0} {5}split{0} {17}/:/{10};{0} + +{10}({5}undef{10},{5}undef{10},{12}$foo{10}){0} {10}={0} {11}that_function{10}(){0} +{2}# is equivalent to {0} +{10}(({5}undef{10})x{4}2{10},{0} {12}$foo{10}){0} {10}={0} {11}that_function{10}(){0} + +{2}#--------------------------------------------------------------------------{0} +{2}# Floating point parsing has been improved{0} +{2}#--------------------------------------------------------------------------{0} +{2}# Hexadecimal floating point literals{0} + +{2}# some hex floats from a program by Rick Regan{0} +{2}# appropriated and extended from Lua 5.2.x test cases{0} +{2}# tested on perl 5.22/cygwin{0} + +{4}0x1p-1074{10};{0} +{4}0x3.3333333333334p-5{10};{0} +{4}0xcc.ccccccccccdp-11{10};{0} +{4}0x1p+1{10};{0} +{4}0x1p-6{10};{0} +{4}0x1.b7p-1{10};{0} +{4}0x1.fffffffffffffp+1023{10};{0} +{4}0x1p-1022{10};{0} +{4}0X1.921FB4D12D84AP+1{10};{0} +{4}0x1.999999999999ap-4{10};{0} + +{2}# additional test cases for characterization{0} +{4}0x1p-1074{10}.{0}		{2}# dot is a string operator{0} +{4}0x.ABCDEFp10{0}		{2}# legal, dot immediately after 0x{0} +{4}0x{10}.{11}p10{0}			{2}# perl allows 0x as a zero, then concat with p10 bareword{0} +{4}0x{10}.{11}p{0} {4}0x0{10}.{11}p{0}		{2}# dot then bareword{0} +{4}0x_0_.A_BC___DEF_p1_0{0}	{2}# legal hex float, underscores are mostly allowed{0} +{4}0x0{10}.{11}_ABCDEFp10{0}		{2}# _ABCDEFp10 is a bareword, no underscore allowed after dot{0} + +{2}# illegal, but does not use error highlighting{0} +{4}0x0p1{11}ABC{0}		{2}# illegal, highlighted as 0x0p1 abut with bareword ABC {0} + +{2}# allowed to FAIL for now{0} +{4}0x0.ABCDEFp_10{0}		{2}# ABCDEFp_10 is a bareword, '_10' exponent not allowed{0} +{4}0xp{0} {4}0xp1{0} {4}0x0.0p{0}		{2}# syntax errors{0} +{4}0x41.65{10}.{4}65{0} 		{2}# hex dot number, but lexer now fails with 0x41.65 left as a partial hex float{0} + +{2}#--------------------------------------------------------------------------{0} +{2}# Support for ?PATTERN? without explicit operator has been removed{0} +{2}#--------------------------------------------------------------------------{0} +{2}# ?PATTERN? must now be written as m?PATTERN?{0} + +{10}?{11}PATTERN{10}?{0}	{2}# does not work in current LexPerl anyway, NO ACTION NEEDED{0} +{17}m?PATTERN?{0} + +{2}#--------------------------------------------------------------------------{0} +{2}# end of test file{0} +{2}#--------------------------------------------------------------------------{0} diff --git a/test/lexTests.py b/test/lexTests.py index a53db7eb8..421c6393b 100644 --- a/test/lexTests.py +++ b/test/lexTests.py @@ -15,6 +15,37 @@ b"function",  b"sub"  ] +keywordsPerl = [ +b"NULL __FILE__ __LINE__ __PACKAGE__ __DATA__ __END__ AUTOLOAD " +b"BEGIN CORE DESTROY END EQ GE GT INIT LE LT NE CHECK abs accept " +b"alarm and atan2 bind binmode bless caller chdir chmod chomp chop " +b"chown chr chroot close closedir cmp connect continue cos crypt " +b"dbmclose dbmopen defined delete die do dump each else elsif endgrent " +b"endhostent endnetent endprotoent endpwent endservent eof eq eval " +b"exec exists exit exp fcntl fileno flock for foreach fork format " +b"formline ge getc getgrent getgrgid getgrnam gethostbyaddr gethostbyname " +b"gethostent getlogin getnetbyaddr getnetbyname getnetent getpeername " +b"getpgrp getppid getpriority getprotobyname getprotobynumber getprotoent " +b"getpwent getpwnam getpwuid getservbyname getservbyport getservent " +b"getsockname getsockopt glob gmtime goto grep gt hex if index " +b"int ioctl join keys kill last lc lcfirst le length link listen " +b"local localtime lock log lstat lt map mkdir msgctl msgget msgrcv " +b"msgsnd my ne next no not oct open opendir or ord our pack package " +b"pipe pop pos print printf prototype push quotemeta qu " +b"rand read readdir readline readlink readpipe recv redo " +b"ref rename require reset return reverse rewinddir rindex rmdir " +b"scalar seek seekdir select semctl semget semop send setgrent " +b"sethostent setnetent setpgrp setpriority setprotoent setpwent " +b"setservent setsockopt shift shmctl shmget shmread shmwrite shutdown " +b"sin sleep socket socketpair sort splice split sprintf sqrt srand " +b"stat study sub substr symlink syscall sysopen sysread sysseek " +b"system syswrite tell telldir tie tied time times truncate " +b"uc ucfirst umask undef unless unlink unpack unshift untie until " +b"use utime values vec wait waitpid wantarray warn while write " +b"xor " +b"given when default break say state UNITCHECK __SUB__ fc" +] +  class TestLexers(unittest.TestCase):  	def setUp(self): @@ -23,7 +54,7 @@ class TestLexers(unittest.TestCase):  		self.ed.ClearAll()  		self.ed.EmptyUndoBuffer() -	def AsStyled(self): +	def AsStyled(self, withWindowsLineEnds):  		text = self.ed.Contents()  		data = io.BytesIO()  		prevStyle = -1 @@ -34,11 +65,14 @@ class TestLexers(unittest.TestCase):  				data.write(styleBuf.encode('utf-8'))  				prevStyle = styleNow  			data.write(text[o:o+1]) -		return data.getvalue() +		if withWindowsLineEnds: +			return data.getvalue().replace(b"\n", b"\r\n") +		else: +			return data.getvalue() -	def LexExample(self, name, lexerName, keywords=None): -		if keywords is None: -			keywords = [] +	def LexExample(self, name, lexerName, keywords, fileMode="b"): +		self.ed.ClearAll() +		self.ed.EmptyUndoBuffer()  		self.ed.SetCodePage(65001)  		self.ed.LexerLanguage = lexerName  		bits = self.ed.StyleBitsNeeded @@ -52,6 +86,8 @@ class TestLexers(unittest.TestCase):  		nameNew = nameExample +".new"  		with open(nameExample, "rb") as f:  			prog = f.read() +		if fileMode == "t" and sys.platform == "win32": +			prog = prog.replace(b"\r\n", b"\n")  		BOM = b"\xEF\xBB\xBF"  		if prog.startswith(BOM):  			prog = prog[len(BOM):] @@ -62,12 +98,15 @@ class TestLexers(unittest.TestCase):  		try:  			with open(namePrevious, "rb") as f:  				prevStyled = f.read() -		except FileNotFoundError: +			if fileMode == "t" and sys.platform == "win32": +				prog = prog.replace(b"\r\n", b"\n") +		except EnvironmentError:  			prevStyled = "" -		progStyled = self.AsStyled() +		progStyled = self.AsStyled(fileMode == "t" and sys.platform == "win32")  		if progStyled != prevStyled:  			with open(nameNew, "wb") as f:  				f.write(progStyled) +			print("Incorrect lex for " + name)  			print(progStyled)  			print(prevStyled)  			self.assertEquals(progStyled, prevStyled) @@ -75,19 +114,21 @@ class TestLexers(unittest.TestCase):  			# as that is likely to fail many times.  			return -		# Try partial lexes from the start of every line which should all be identical. -		for line in range(self.ed.LineCount): -			lineStart = self.ed.PositionFromLine(line) -			self.ed.StartStyling(lineStart, mask) -			self.assertEquals(self.ed.EndStyled, lineStart) -			self.ed.Colourise(lineStart, lenDocument) -			progStyled = self.AsStyled() -			if progStyled != prevStyled: -				with open(nameNew, "wb") as f: -					f.write(progStyled) -				self.assertEquals(progStyled, prevStyled) -				# Give up after one failure -				return +		if fileMode == "b":	# "t" files are large and this is a quadratic check +			# Try partial lexes from the start of every line which should all be identical. +			for line in range(self.ed.LineCount): +				lineStart = self.ed.PositionFromLine(line) +				self.ed.StartStyling(lineStart, mask) +				self.assertEquals(self.ed.EndStyled, lineStart) +				self.ed.Colourise(lineStart, lenDocument) +				progStyled = self.AsStyled(fileMode == "t" and sys.platform == "win32") +				if progStyled != prevStyled: +					print("Incorrect partial lex for " + name + " at line " + line) +					with open(nameNew, "wb") as f: +						f.write(progStyled) +					self.assertEquals(progStyled, prevStyled) +					# Give up after one failure +					return  	def testCXX(self):  		self.LexExample("x.cxx", b"cpp", [b"int"]) @@ -115,7 +156,10 @@ class TestLexers(unittest.TestCase):  		self.LexExample("x.rb", b"ruby", [b"class def end"])  	def testPerl(self): -		self.LexExample("x.pl", b"perl", [b"printf sleep use while"]) +		self.LexExample("x.pl", b"perl", keywordsPerl) + +	def testPerlCases(self): +		self.LexExample("perl-test-5220delta.pl", b"perl", keywordsPerl, "t")  	def testD(self):  		self.LexExample("x.d", b"d", | 
