diff options
-rw-r--r-- | doc/ScintillaHistory.html | 9 | ||||
-rw-r--r-- | lexers/LexPerl.cxx | 32 | ||||
-rw-r--r-- | test/examples/perl-test-5220delta.pl | 178 | ||||
-rw-r--r-- | test/examples/perl-test-5220delta.pl.styled | 178 | ||||
-rw-r--r-- | test/lexTests.py | 86 |
5 files changed, 453 insertions, 30 deletions
diff --git a/doc/ScintillaHistory.html b/doc/ScintillaHistory.html index b26f955eb..6a6ab5ec5 100644 --- a/doc/ScintillaHistory.html +++ b/doc/ScintillaHistory.html @@ -527,8 +527,13 @@ <a href="http://sourceforge.net/p/scintilla/bugs/1790/">Bug #1790</a>. </li> <li> - Perl lexer allows '_' for subroutine prototypes. - <a href="http://sourceforge.net/p/scintilla/bugs/1791/">Bug #1791</a>. + Perl lexer updated for Perl 5.20 and 5.22.<br /> + Allow '_' for subroutine prototypes. + <a href="http://sourceforge.net/p/scintilla/bugs/1791/">Bug #1791</a>.<br /> + Double-diamond operator <<>>.<br /> + Hexadecimal floating point literals.<br /> + Repetition in list assignment. + <a href="http://sourceforge.net/p/scintilla/bugs/1793/">Bug #1793</a>. </li> <li> Send SCN_UPDATEUI with SC_UPDATE_SELECTION when the application changes multiple diff --git a/lexers/LexPerl.cxx b/lexers/LexPerl.cxx index 39fe10f14..3a69bf8cc 100644 --- a/lexers/LexPerl.cxx +++ b/lexers/LexPerl.cxx @@ -52,10 +52,10 @@ using namespace Scintilla; #define HERE_DELIM_MAX 256 // maximum length of HERE doc delimiter -#define PERLNUM_BINARY 1 // order is significant: 1-4 cannot have a dot -#define PERLNUM_HEX 2 -#define PERLNUM_OCTAL 3 -#define PERLNUM_FLOAT_EXP 4 // exponent part only +#define PERLNUM_BINARY 1 // order is significant: 1-3 cannot have a dot +#define PERLNUM_OCTAL 2 +#define PERLNUM_FLOAT_EXP 3 // exponent part only +#define PERLNUM_HEX 4 // may be a hex float #define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings #define PERLNUM_VECTOR 6 #define PERLNUM_V_VECTOR 7 @@ -760,6 +760,14 @@ void SCI_METHOD LexerPerl::Lex(Sci_PositionU startPos, Sci_Position length, int break; } // number then dot (go through) + } else if (numState == PERLNUM_HEX) { + if (dotCount <= 1 && IsADigit(sc.chNext, 16)) { + break; // hex with one dot is a hex float + } else { + sc.SetState(SCE_PL_OPERATOR); + break; + } + // hex then dot (go through) } else if (IsADigit(sc.chNext)) // vectors break; // vector then dot (go through) @@ -778,8 +786,15 @@ void SCI_METHOD LexerPerl::Lex(Sci_PositionU startPos, Sci_Position length, int break; // number then word (go through) } else if (numState == PERLNUM_HEX) { - if (IsADigit(sc.ch, 16)) + if (sc.ch == 'P' || sc.ch == 'p') { // hex float exponent, sign + numState = PERLNUM_FLOAT_EXP; + if (sc.chNext == '+' || sc.chNext == '-') { + sc.Forward(); + } + break; + } else if (IsADigit(sc.ch, 16)) break; + // hex or hex float then word (go through) } else if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) { if (IsADigit(sc.ch)) // vector break; @@ -1263,7 +1278,7 @@ void SCI_METHOD LexerPerl::Lex(Sci_PositionU startPos, Sci_Position length, int fw++; } else if (sc.ch == 'x' && (sc.chNext == '=' || // repetition !setWord.Contains(sc.chNext) || - (IsADigit(sc.chPrev) && IsADigit(sc.chNext)))) { + ((IsADigit(sc.chPrev) || sc.chPrev == ')') && IsADigit(sc.chNext)))) { sc.ChangeState(SCE_PL_OPERATOR); } // if potentially a keyword, scan forward and grab word, then check @@ -1436,7 +1451,10 @@ void SCI_METHOD LexerPerl::Lex(Sci_PositionU startPos, Sci_Position length, int } backFlag = BACK_NONE; if (isHereDoc) { // handle '<<', HERE doc - if (preferRE) { + if (sc.Match("<<>>")) { // double-diamond operator (5.22) + sc.SetState(SCE_PL_OPERATOR); + sc.Forward(3); + } else if (preferRE) { sc.SetState(SCE_PL_HERE_DELIM); HereDoc.State = 0; } else { // << operator diff --git a/test/examples/perl-test-5220delta.pl b/test/examples/perl-test-5220delta.pl new file mode 100644 index 000000000..a9c80caa2 --- /dev/null +++ b/test/examples/perl-test-5220delta.pl @@ -0,0 +1,178 @@ +# -*- coding: utf-8 -*- +#-------------------------------------------------------------------------- +# perl-test-5220delta.pl +#-------------------------------------------------------------------------- +# REF: https://metacpan.org/pod/distribution/perl/pod/perldelta.pod +# maybe future ref: https://metacpan.org/pod/distribution/perl/pod/perl5220delta.pod +# also: http://perltricks.com/article/165/2015/4/10/A-preview-of-Perl-5-22 +# +#-------------------------------------------------------------------------- +# Kein-Hong Man <keinhong@gmail.com> Public Domain 20151217 +#-------------------------------------------------------------------------- +# 20151217 initial document +# 20151218 updated tests and comments +#-------------------------------------------------------------------------- + +use v5.22; # may be needed + +#-------------------------------------------------------------------------- +# New bitwise operators +#-------------------------------------------------------------------------- + +use feature 'bitwise' # enable feature, warning enabled +use experimental "bitwise"; # enable feature, warning disabled + +# numerical operands +10&20 10|20 10^20 ~10 +$a&"8" $a|"8" $a^"8" ~$a ~"8" + +# string operands +'0'&."8" '0'|."8" '0'^."8" ~.'0' ~."8" +# the following is AMBIGUOUS, perl sees 10 and not .10 only when bitwise feature is enabled +# so it's feature-setting-dependent, no plans to change current behaviour + $a&.10 $a|.10 $a^.10 ~.$a ~.10 + +# assignment variants +$a&=10; $a|=10; $a^=10; +$b&.='20'; $b|.='20'; $b^.='20'; +$c&="30"; $c|="30"; $c^="30"; +$d&.=$e; $d|.=$e; $d^.=$e; + +#-------------------------------------------------------------------------- +# New double-diamond operator +#-------------------------------------------------------------------------- +# <<>> is like <> but each element of @ARGV will be treated as an actual file name + +# example snippet from brian d foy's blog post +while( <<>> ) { # new, safe line input operator + ...; + } + +#-------------------------------------------------------------------------- +# New \b boundaries in regular expressions +#-------------------------------------------------------------------------- + +qr/\b{gcb}/ +qr/\b{wb}/ +qr/\b{sb}/ + +#-------------------------------------------------------------------------- +# Non-Capturing Regular Expression Flag +#-------------------------------------------------------------------------- +# disables capturing and filling in $1, $2, etc + +"hello" =~ /(hi|hello)/n; # $1 is not set + +#-------------------------------------------------------------------------- +# Aliasing via reference +#-------------------------------------------------------------------------- +# Variables and subroutines can now be aliased by assigning to a reference + +\$c = \$d; +\&x = \&y; + +# Aliasing can also be applied to foreach iterator variables + +foreach \%hash (@array_of_hash_refs) { ... } + +# example snippet from brian d foy's blog post + +use feature qw(refaliasing); + +\%other_hash = \%hash; + +use v5.22; +use feature qw(refaliasing); + +foreach \my %hash ( @array_of_hashes ) { # named hash control variable + foreach my $key ( keys %hash ) { # named hash now! + ...; + } + } + +#-------------------------------------------------------------------------- +# New :const subroutine attribute +#-------------------------------------------------------------------------- + +my $x = 54321; +*INLINED = sub : const { $x }; +$x++; + +# more examples of attributes +# (not 5.22 stuff, but some general examples for study, useful for +# handling subroutine signature and subroutine prototype highlighting) + +sub foo : lvalue ; + +package X; +sub Y::x : lvalue { 1 } + +package X; +sub foo { 1 } +package Y; +BEGIN { *bar = \&X::foo; } +package Z; +sub Y::bar : lvalue ; + +# built-in attributes for subroutines: +lvalue method prototype(..) locked const + +#-------------------------------------------------------------------------- +# Repetition in list assignment +#-------------------------------------------------------------------------- + +# example snippet from brian d foy's blog post +use v5.22; +my(undef, $card_num, (undef)x3, $count) = split /:/; + +(undef,undef,$foo) = that_function() +# is equivalent to +((undef)x2, $foo) = that_function() + +#-------------------------------------------------------------------------- +# Floating point parsing has been improved +#-------------------------------------------------------------------------- +# Hexadecimal floating point literals + +# some hex floats from a program by Rick Regan +# appropriated and extended from Lua 5.2.x test cases +# tested on perl 5.22/cygwin + +0x1p-1074; +0x3.3333333333334p-5; +0xcc.ccccccccccdp-11; +0x1p+1; +0x1p-6; +0x1.b7p-1; +0x1.fffffffffffffp+1023; +0x1p-1022; +0X1.921FB4D12D84AP+1; +0x1.999999999999ap-4; + +# additional test cases for characterization +0x1p-1074. # dot is a string operator +0x.ABCDEFp10 # legal, dot immediately after 0x +0x.p10 # perl allows 0x as a zero, then concat with p10 bareword +0x.p 0x0.p # dot then bareword +0x_0_.A_BC___DEF_p1_0 # legal hex float, underscores are mostly allowed +0x0._ABCDEFp10 # _ABCDEFp10 is a bareword, no underscore allowed after dot + +# illegal, but does not use error highlighting +0x0p1ABC # illegal, highlighted as 0x0p1 abut with bareword ABC + +# allowed to FAIL for now +0x0.ABCDEFp_10 # ABCDEFp_10 is a bareword, '_10' exponent not allowed +0xp 0xp1 0x0.0p # syntax errors +0x41.65.65 # hex dot number, but lexer now fails with 0x41.65 left as a partial hex float + +#-------------------------------------------------------------------------- +# Support for ?PATTERN? without explicit operator has been removed +#-------------------------------------------------------------------------- +# ?PATTERN? must now be written as m?PATTERN? + +?PATTERN? # does not work in current LexPerl anyway, NO ACTION NEEDED +m?PATTERN? + +#-------------------------------------------------------------------------- +# end of test file +#-------------------------------------------------------------------------- diff --git a/test/examples/perl-test-5220delta.pl.styled b/test/examples/perl-test-5220delta.pl.styled new file mode 100644 index 000000000..c01757999 --- /dev/null +++ b/test/examples/perl-test-5220delta.pl.styled @@ -0,0 +1,178 @@ +{2}# -*- coding: utf-8 -*-{0} +{2}#--------------------------------------------------------------------------{0} +{2}# perl-test-5220delta.pl{0} +{2}#--------------------------------------------------------------------------{0} +{2}# REF: https://metacpan.org/pod/distribution/perl/pod/perldelta.pod{0} +{2}# maybe future ref: https://metacpan.org/pod/distribution/perl/pod/perl5220delta.pod{0} +{2}# also: http://perltricks.com/article/165/2015/4/10/A-preview-of-Perl-5-22{0} +{2}#{0} +{2}#--------------------------------------------------------------------------{0} +{2}# Kein-Hong Man <keinhong@gmail.com> Public Domain 20151217{0} +{2}#--------------------------------------------------------------------------{0} +{2}# 20151217 initial document{0} +{2}# 20151218 updated tests and comments{0} +{2}#--------------------------------------------------------------------------{0} + +{5}use{0} {6}v5.22{10};{0} {2}# may be needed{0} + +{2}#--------------------------------------------------------------------------{0} +{2}# New bitwise operators{0} +{2}#--------------------------------------------------------------------------{0} + +{5}use{0} {11}feature{0} {7}'bitwise'{0} {2}# enable feature, warning enabled{0} +{5}use{0} {11}experimental{0} {6}"bitwise"{10};{0} {2}# enable feature, warning disabled{0} + +{2}# numerical operands{0} +{4}10{10}&{4}20{0} {4}10{10}|{4}20{0} {4}10{10}^{4}20{0} {10}~{4}10{0} +{12}$a{10}&{6}"8"{0} {12}$a{10}|{6}"8"{0} {12}$a{10}^{6}"8"{0} {10}~{12}$a{0} {10}~{6}"8"{0} + +{2}# string operands{0} +{7}'0'{10}&.{6}"8"{0} {7}'0'{10}|.{6}"8"{0} {7}'0'{10}^.{6}"8"{0} {10}~.{7}'0'{0} {10}~.{6}"8"{0} +{2}# the following is AMBIGUOUS, perl sees 10 and not .10 only when bitwise feature is enabled{0} +{2}# so it's feature-setting-dependent, no plans to change current behaviour{0} + {12}$a{10}&{4}.10{0} {12}$a{10}|{4}.10{0} {12}$a{10}^{4}.10{0} {10}~.{12}$a{0} {10}~{4}.10{0} + +{2}# assignment variants{0} +{12}$a{10}&={4}10{10};{0} {12}$a{10}|={4}10{10};{0} {12}$a{10}^={4}10{10};{0} +{12}$b{10}&.={7}'20'{10};{0} {12}$b{10}|.={7}'20'{10};{0} {12}$b{10}^.={7}'20'{10};{0} +{12}$c{10}&={6}"30"{10};{0} {12}$c{10}|={6}"30"{10};{0} {12}$c{10}^={6}"30"{10};{0} +{12}$d{10}&.={12}$e{10};{0} {12}$d{10}|.={12}$e{10};{0} {12}$d{10}^.={12}$e{10};{0} + +{2}#--------------------------------------------------------------------------{0} +{2}# New double-diamond operator{0} +{2}#--------------------------------------------------------------------------{0} +{2}# <<>> is like <> but each element of @ARGV will be treated as an actual file name{0} + +{2}# example snippet from brian d foy's blog post{0} +{5}while{10}({0} {10}<<>>{0} {10}){0} {10}{{0} {2}# new, safe line input operator{0} + {10}...;{0} + {10}}{0} + +{2}#--------------------------------------------------------------------------{0} +{2}# New \b boundaries in regular expressions{0} +{2}#--------------------------------------------------------------------------{0} + +{29}qr/\b{gcb}/{0} +{29}qr/\b{wb}/{0} +{29}qr/\b{sb}/{0} + +{2}#--------------------------------------------------------------------------{0} +{2}# Non-Capturing Regular Expression Flag{0} +{2}#--------------------------------------------------------------------------{0} +{2}# disables capturing and filling in $1, $2, etc{0} + +{6}"hello"{0} {10}=~{0} {17}/(hi|hello)/n{10};{0} {2}# $1 is not set{0} + +{2}#--------------------------------------------------------------------------{0} +{2}# Aliasing via reference{0} +{2}#--------------------------------------------------------------------------{0} +{2}# Variables and subroutines can now be aliased by assigning to a reference{0} + +{10}\{12}$c{0} {10}={0} {10}\{12}$d{10};{0} +{10}\&{11}x{0} {10}={0} {10}\&{11}y{10};{0} + +{2}# Aliasing can also be applied to foreach iterator variables{0} + +{5}foreach{0} {10}\{14}%hash{0} {10}({13}@array_of_hash_refs{10}){0} {10}{{0} {10}...{0} {10}}{0} + +{2}# example snippet from brian d foy's blog post{0} + +{5}use{0} {11}feature{0} {30}qw(refaliasing){10};{0} + +{10}\{14}%other_hash{0} {10}={0} {10}\{14}%hash{10};{0} + +{5}use{0} {6}v5.22{10};{0} +{5}use{0} {11}feature{0} {30}qw(refaliasing){10};{0} + +{5}foreach{0} {10}\{5}my{0} {14}%hash{0} {10}({0} {13}@array_of_hashes{0} {10}){0} {10}{{0} {2}# named hash control variable{0} + {5}foreach{0} {5}my{0} {12}$key{0} {10}({0} {5}keys{0} {14}%hash{0} {10}){0} {10}{{0} {2}# named hash now!{0} + {10}...;{0} + {10}}{0} + {10}}{0} + +{2}#--------------------------------------------------------------------------{0} +{2}# New :const subroutine attribute{0} +{2}#--------------------------------------------------------------------------{0} + +{5}my{0} {12}$x{0} {10}={0} {4}54321{10};{0} +{15}*INLINED{0} {10}={0} {5}sub{0} {10}:{0} {11}const{0} {10}{{0} {12}$x{0} {10}};{0} +{12}$x{10}++;{0} + +{2}# more examples of attributes{0} +{2}# (not 5.22 stuff, but some general examples for study, useful for{0} +{2}# handling subroutine signature and subroutine prototype highlighting){0} + +{5}sub{0} {11}foo{0} {10}:{0} {11}lvalue{0} {10};{0} + +{5}package{0} {11}X{10};{0} +{5}sub{0} {11}Y{10}::x{0} {10}:{0} {11}lvalue{0} {10}{{0} {4}1{0} {10}}{0} + +{5}package{0} {11}X{10};{0} +{5}sub{0} {11}foo{0} {10}{{0} {4}1{0} {10}}{0} +{5}package{0} {11}Y{10};{0} +{5}BEGIN{0} {10}{{0} {15}*bar{0} {10}={0} {10}\&{11}X{10}::{11}foo{10};{0} {10}}{0} +{5}package{0} {11}Z{10};{0} +{5}sub{0} {11}Y{10}::{11}bar{0} {10}:{0} {11}lvalue{0} {10};{0} + +{2}# built-in attributes for subroutines:{0} +{11}lvalue{0} {11}method{0} {5}prototype{10}(..){0} {11}locked{0} {11}const{0} + +{2}#--------------------------------------------------------------------------{0} +{2}# Repetition in list assignment{0} +{2}#--------------------------------------------------------------------------{0} + +{2}# example snippet from brian d foy's blog post{0} +{5}use{0} {6}v5.22{10};{0} +{5}my{10}({5}undef{10},{0} {12}$card_num{10},{0} {10}({5}undef{10})x{4}3{10},{0} {12}$count{10}){0} {10}={0} {5}split{0} {17}/:/{10};{0} + +{10}({5}undef{10},{5}undef{10},{12}$foo{10}){0} {10}={0} {11}that_function{10}(){0} +{2}# is equivalent to {0} +{10}(({5}undef{10})x{4}2{10},{0} {12}$foo{10}){0} {10}={0} {11}that_function{10}(){0} + +{2}#--------------------------------------------------------------------------{0} +{2}# Floating point parsing has been improved{0} +{2}#--------------------------------------------------------------------------{0} +{2}# Hexadecimal floating point literals{0} + +{2}# some hex floats from a program by Rick Regan{0} +{2}# appropriated and extended from Lua 5.2.x test cases{0} +{2}# tested on perl 5.22/cygwin{0} + +{4}0x1p-1074{10};{0} +{4}0x3.3333333333334p-5{10};{0} +{4}0xcc.ccccccccccdp-11{10};{0} +{4}0x1p+1{10};{0} +{4}0x1p-6{10};{0} +{4}0x1.b7p-1{10};{0} +{4}0x1.fffffffffffffp+1023{10};{0} +{4}0x1p-1022{10};{0} +{4}0X1.921FB4D12D84AP+1{10};{0} +{4}0x1.999999999999ap-4{10};{0} + +{2}# additional test cases for characterization{0} +{4}0x1p-1074{10}.{0} {2}# dot is a string operator{0} +{4}0x.ABCDEFp10{0} {2}# legal, dot immediately after 0x{0} +{4}0x{10}.{11}p10{0} {2}# perl allows 0x as a zero, then concat with p10 bareword{0} +{4}0x{10}.{11}p{0} {4}0x0{10}.{11}p{0} {2}# dot then bareword{0} +{4}0x_0_.A_BC___DEF_p1_0{0} {2}# legal hex float, underscores are mostly allowed{0} +{4}0x0{10}.{11}_ABCDEFp10{0} {2}# _ABCDEFp10 is a bareword, no underscore allowed after dot{0} + +{2}# illegal, but does not use error highlighting{0} +{4}0x0p1{11}ABC{0} {2}# illegal, highlighted as 0x0p1 abut with bareword ABC {0} + +{2}# allowed to FAIL for now{0} +{4}0x0.ABCDEFp_10{0} {2}# ABCDEFp_10 is a bareword, '_10' exponent not allowed{0} +{4}0xp{0} {4}0xp1{0} {4}0x0.0p{0} {2}# syntax errors{0} +{4}0x41.65{10}.{4}65{0} {2}# hex dot number, but lexer now fails with 0x41.65 left as a partial hex float{0} + +{2}#--------------------------------------------------------------------------{0} +{2}# Support for ?PATTERN? without explicit operator has been removed{0} +{2}#--------------------------------------------------------------------------{0} +{2}# ?PATTERN? must now be written as m?PATTERN?{0} + +{10}?{11}PATTERN{10}?{0} {2}# does not work in current LexPerl anyway, NO ACTION NEEDED{0} +{17}m?PATTERN?{0} + +{2}#--------------------------------------------------------------------------{0} +{2}# end of test file{0} +{2}#--------------------------------------------------------------------------{0} diff --git a/test/lexTests.py b/test/lexTests.py index a53db7eb8..421c6393b 100644 --- a/test/lexTests.py +++ b/test/lexTests.py @@ -15,6 +15,37 @@ b"function", b"sub" ] +keywordsPerl = [ +b"NULL __FILE__ __LINE__ __PACKAGE__ __DATA__ __END__ AUTOLOAD " +b"BEGIN CORE DESTROY END EQ GE GT INIT LE LT NE CHECK abs accept " +b"alarm and atan2 bind binmode bless caller chdir chmod chomp chop " +b"chown chr chroot close closedir cmp connect continue cos crypt " +b"dbmclose dbmopen defined delete die do dump each else elsif endgrent " +b"endhostent endnetent endprotoent endpwent endservent eof eq eval " +b"exec exists exit exp fcntl fileno flock for foreach fork format " +b"formline ge getc getgrent getgrgid getgrnam gethostbyaddr gethostbyname " +b"gethostent getlogin getnetbyaddr getnetbyname getnetent getpeername " +b"getpgrp getppid getpriority getprotobyname getprotobynumber getprotoent " +b"getpwent getpwnam getpwuid getservbyname getservbyport getservent " +b"getsockname getsockopt glob gmtime goto grep gt hex if index " +b"int ioctl join keys kill last lc lcfirst le length link listen " +b"local localtime lock log lstat lt map mkdir msgctl msgget msgrcv " +b"msgsnd my ne next no not oct open opendir or ord our pack package " +b"pipe pop pos print printf prototype push quotemeta qu " +b"rand read readdir readline readlink readpipe recv redo " +b"ref rename require reset return reverse rewinddir rindex rmdir " +b"scalar seek seekdir select semctl semget semop send setgrent " +b"sethostent setnetent setpgrp setpriority setprotoent setpwent " +b"setservent setsockopt shift shmctl shmget shmread shmwrite shutdown " +b"sin sleep socket socketpair sort splice split sprintf sqrt srand " +b"stat study sub substr symlink syscall sysopen sysread sysseek " +b"system syswrite tell telldir tie tied time times truncate " +b"uc ucfirst umask undef unless unlink unpack unshift untie until " +b"use utime values vec wait waitpid wantarray warn while write " +b"xor " +b"given when default break say state UNITCHECK __SUB__ fc" +] + class TestLexers(unittest.TestCase): def setUp(self): @@ -23,7 +54,7 @@ class TestLexers(unittest.TestCase): self.ed.ClearAll() self.ed.EmptyUndoBuffer() - def AsStyled(self): + def AsStyled(self, withWindowsLineEnds): text = self.ed.Contents() data = io.BytesIO() prevStyle = -1 @@ -34,11 +65,14 @@ class TestLexers(unittest.TestCase): data.write(styleBuf.encode('utf-8')) prevStyle = styleNow data.write(text[o:o+1]) - return data.getvalue() + if withWindowsLineEnds: + return data.getvalue().replace(b"\n", b"\r\n") + else: + return data.getvalue() - def LexExample(self, name, lexerName, keywords=None): - if keywords is None: - keywords = [] + def LexExample(self, name, lexerName, keywords, fileMode="b"): + self.ed.ClearAll() + self.ed.EmptyUndoBuffer() self.ed.SetCodePage(65001) self.ed.LexerLanguage = lexerName bits = self.ed.StyleBitsNeeded @@ -52,6 +86,8 @@ class TestLexers(unittest.TestCase): nameNew = nameExample +".new" with open(nameExample, "rb") as f: prog = f.read() + if fileMode == "t" and sys.platform == "win32": + prog = prog.replace(b"\r\n", b"\n") BOM = b"\xEF\xBB\xBF" if prog.startswith(BOM): prog = prog[len(BOM):] @@ -62,12 +98,15 @@ class TestLexers(unittest.TestCase): try: with open(namePrevious, "rb") as f: prevStyled = f.read() - except FileNotFoundError: + if fileMode == "t" and sys.platform == "win32": + prog = prog.replace(b"\r\n", b"\n") + except EnvironmentError: prevStyled = "" - progStyled = self.AsStyled() + progStyled = self.AsStyled(fileMode == "t" and sys.platform == "win32") if progStyled != prevStyled: with open(nameNew, "wb") as f: f.write(progStyled) + print("Incorrect lex for " + name) print(progStyled) print(prevStyled) self.assertEquals(progStyled, prevStyled) @@ -75,19 +114,21 @@ class TestLexers(unittest.TestCase): # as that is likely to fail many times. return - # Try partial lexes from the start of every line which should all be identical. - for line in range(self.ed.LineCount): - lineStart = self.ed.PositionFromLine(line) - self.ed.StartStyling(lineStart, mask) - self.assertEquals(self.ed.EndStyled, lineStart) - self.ed.Colourise(lineStart, lenDocument) - progStyled = self.AsStyled() - if progStyled != prevStyled: - with open(nameNew, "wb") as f: - f.write(progStyled) - self.assertEquals(progStyled, prevStyled) - # Give up after one failure - return + if fileMode == "b": # "t" files are large and this is a quadratic check + # Try partial lexes from the start of every line which should all be identical. + for line in range(self.ed.LineCount): + lineStart = self.ed.PositionFromLine(line) + self.ed.StartStyling(lineStart, mask) + self.assertEquals(self.ed.EndStyled, lineStart) + self.ed.Colourise(lineStart, lenDocument) + progStyled = self.AsStyled(fileMode == "t" and sys.platform == "win32") + if progStyled != prevStyled: + print("Incorrect partial lex for " + name + " at line " + line) + with open(nameNew, "wb") as f: + f.write(progStyled) + self.assertEquals(progStyled, prevStyled) + # Give up after one failure + return def testCXX(self): self.LexExample("x.cxx", b"cpp", [b"int"]) @@ -115,7 +156,10 @@ class TestLexers(unittest.TestCase): self.LexExample("x.rb", b"ruby", [b"class def end"]) def testPerl(self): - self.LexExample("x.pl", b"perl", [b"printf sleep use while"]) + self.LexExample("x.pl", b"perl", keywordsPerl) + + def testPerlCases(self): + self.LexExample("perl-test-5220delta.pl", b"perl", keywordsPerl, "t") def testD(self): self.LexExample("x.d", b"d", |