aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--doc/ScintillaHistory.html9
-rw-r--r--lexers/LexPerl.cxx32
-rw-r--r--test/examples/perl-test-5220delta.pl178
-rw-r--r--test/examples/perl-test-5220delta.pl.styled178
-rw-r--r--test/lexTests.py86
5 files changed, 453 insertions, 30 deletions
diff --git a/doc/ScintillaHistory.html b/doc/ScintillaHistory.html
index b26f955eb..6a6ab5ec5 100644
--- a/doc/ScintillaHistory.html
+++ b/doc/ScintillaHistory.html
@@ -527,8 +527,13 @@
<a href="http://sourceforge.net/p/scintilla/bugs/1790/">Bug #1790</a>.
</li>
<li>
- Perl lexer allows '_' for subroutine prototypes.
- <a href="http://sourceforge.net/p/scintilla/bugs/1791/">Bug #1791</a>.
+ Perl lexer updated for Perl 5.20 and 5.22.<br />
+ Allow '_' for subroutine prototypes.
+ <a href="http://sourceforge.net/p/scintilla/bugs/1791/">Bug #1791</a>.<br />
+ Double-diamond operator &lt;&lt;&gt;&gt;.<br />
+ Hexadecimal floating point literals.<br />
+ Repetition in list assignment.
+ <a href="http://sourceforge.net/p/scintilla/bugs/1793/">Bug #1793</a>.
</li>
<li>
Send SCN_UPDATEUI with SC_UPDATE_SELECTION when the application changes multiple
diff --git a/lexers/LexPerl.cxx b/lexers/LexPerl.cxx
index 39fe10f14..3a69bf8cc 100644
--- a/lexers/LexPerl.cxx
+++ b/lexers/LexPerl.cxx
@@ -52,10 +52,10 @@ using namespace Scintilla;
#define HERE_DELIM_MAX 256 // maximum length of HERE doc delimiter
-#define PERLNUM_BINARY 1 // order is significant: 1-4 cannot have a dot
-#define PERLNUM_HEX 2
-#define PERLNUM_OCTAL 3
-#define PERLNUM_FLOAT_EXP 4 // exponent part only
+#define PERLNUM_BINARY 1 // order is significant: 1-3 cannot have a dot
+#define PERLNUM_OCTAL 2
+#define PERLNUM_FLOAT_EXP 3 // exponent part only
+#define PERLNUM_HEX 4 // may be a hex float
#define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings
#define PERLNUM_VECTOR 6
#define PERLNUM_V_VECTOR 7
@@ -760,6 +760,14 @@ void SCI_METHOD LexerPerl::Lex(Sci_PositionU startPos, Sci_Position length, int
break;
}
// number then dot (go through)
+ } else if (numState == PERLNUM_HEX) {
+ if (dotCount <= 1 && IsADigit(sc.chNext, 16)) {
+ break; // hex with one dot is a hex float
+ } else {
+ sc.SetState(SCE_PL_OPERATOR);
+ break;
+ }
+ // hex then dot (go through)
} else if (IsADigit(sc.chNext)) // vectors
break;
// vector then dot (go through)
@@ -778,8 +786,15 @@ void SCI_METHOD LexerPerl::Lex(Sci_PositionU startPos, Sci_Position length, int
break;
// number then word (go through)
} else if (numState == PERLNUM_HEX) {
- if (IsADigit(sc.ch, 16))
+ if (sc.ch == 'P' || sc.ch == 'p') { // hex float exponent, sign
+ numState = PERLNUM_FLOAT_EXP;
+ if (sc.chNext == '+' || sc.chNext == '-') {
+ sc.Forward();
+ }
+ break;
+ } else if (IsADigit(sc.ch, 16))
break;
+ // hex or hex float then word (go through)
} else if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {
if (IsADigit(sc.ch)) // vector
break;
@@ -1263,7 +1278,7 @@ void SCI_METHOD LexerPerl::Lex(Sci_PositionU startPos, Sci_Position length, int
fw++;
} else if (sc.ch == 'x' && (sc.chNext == '=' || // repetition
!setWord.Contains(sc.chNext) ||
- (IsADigit(sc.chPrev) && IsADigit(sc.chNext)))) {
+ ((IsADigit(sc.chPrev) || sc.chPrev == ')') && IsADigit(sc.chNext)))) {
sc.ChangeState(SCE_PL_OPERATOR);
}
// if potentially a keyword, scan forward and grab word, then check
@@ -1436,7 +1451,10 @@ void SCI_METHOD LexerPerl::Lex(Sci_PositionU startPos, Sci_Position length, int
}
backFlag = BACK_NONE;
if (isHereDoc) { // handle '<<', HERE doc
- if (preferRE) {
+ if (sc.Match("<<>>")) { // double-diamond operator (5.22)
+ sc.SetState(SCE_PL_OPERATOR);
+ sc.Forward(3);
+ } else if (preferRE) {
sc.SetState(SCE_PL_HERE_DELIM);
HereDoc.State = 0;
} else { // << operator
diff --git a/test/examples/perl-test-5220delta.pl b/test/examples/perl-test-5220delta.pl
new file mode 100644
index 000000000..a9c80caa2
--- /dev/null
+++ b/test/examples/perl-test-5220delta.pl
@@ -0,0 +1,178 @@
+# -*- coding: utf-8 -*-
+#--------------------------------------------------------------------------
+# perl-test-5220delta.pl
+#--------------------------------------------------------------------------
+# REF: https://metacpan.org/pod/distribution/perl/pod/perldelta.pod
+# maybe future ref: https://metacpan.org/pod/distribution/perl/pod/perl5220delta.pod
+# also: http://perltricks.com/article/165/2015/4/10/A-preview-of-Perl-5-22
+#
+#--------------------------------------------------------------------------
+# Kein-Hong Man <keinhong@gmail.com> Public Domain 20151217
+#--------------------------------------------------------------------------
+# 20151217 initial document
+# 20151218 updated tests and comments
+#--------------------------------------------------------------------------
+
+use v5.22; # may be needed
+
+#--------------------------------------------------------------------------
+# New bitwise operators
+#--------------------------------------------------------------------------
+
+use feature 'bitwise' # enable feature, warning enabled
+use experimental "bitwise"; # enable feature, warning disabled
+
+# numerical operands
+10&20 10|20 10^20 ~10
+$a&"8" $a|"8" $a^"8" ~$a ~"8"
+
+# string operands
+'0'&."8" '0'|."8" '0'^."8" ~.'0' ~."8"
+# the following is AMBIGUOUS, perl sees 10 and not .10 only when bitwise feature is enabled
+# so it's feature-setting-dependent, no plans to change current behaviour
+ $a&.10 $a|.10 $a^.10 ~.$a ~.10
+
+# assignment variants
+$a&=10; $a|=10; $a^=10;
+$b&.='20'; $b|.='20'; $b^.='20';
+$c&="30"; $c|="30"; $c^="30";
+$d&.=$e; $d|.=$e; $d^.=$e;
+
+#--------------------------------------------------------------------------
+# New double-diamond operator
+#--------------------------------------------------------------------------
+# <<>> is like <> but each element of @ARGV will be treated as an actual file name
+
+# example snippet from brian d foy's blog post
+while( <<>> ) { # new, safe line input operator
+ ...;
+ }
+
+#--------------------------------------------------------------------------
+# New \b boundaries in regular expressions
+#--------------------------------------------------------------------------
+
+qr/\b{gcb}/
+qr/\b{wb}/
+qr/\b{sb}/
+
+#--------------------------------------------------------------------------
+# Non-Capturing Regular Expression Flag
+#--------------------------------------------------------------------------
+# disables capturing and filling in $1, $2, etc
+
+"hello" =~ /(hi|hello)/n; # $1 is not set
+
+#--------------------------------------------------------------------------
+# Aliasing via reference
+#--------------------------------------------------------------------------
+# Variables and subroutines can now be aliased by assigning to a reference
+
+\$c = \$d;
+\&x = \&y;
+
+# Aliasing can also be applied to foreach iterator variables
+
+foreach \%hash (@array_of_hash_refs) { ... }
+
+# example snippet from brian d foy's blog post
+
+use feature qw(refaliasing);
+
+\%other_hash = \%hash;
+
+use v5.22;
+use feature qw(refaliasing);
+
+foreach \my %hash ( @array_of_hashes ) { # named hash control variable
+ foreach my $key ( keys %hash ) { # named hash now!
+ ...;
+ }
+ }
+
+#--------------------------------------------------------------------------
+# New :const subroutine attribute
+#--------------------------------------------------------------------------
+
+my $x = 54321;
+*INLINED = sub : const { $x };
+$x++;
+
+# more examples of attributes
+# (not 5.22 stuff, but some general examples for study, useful for
+# handling subroutine signature and subroutine prototype highlighting)
+
+sub foo : lvalue ;
+
+package X;
+sub Y::x : lvalue { 1 }
+
+package X;
+sub foo { 1 }
+package Y;
+BEGIN { *bar = \&X::foo; }
+package Z;
+sub Y::bar : lvalue ;
+
+# built-in attributes for subroutines:
+lvalue method prototype(..) locked const
+
+#--------------------------------------------------------------------------
+# Repetition in list assignment
+#--------------------------------------------------------------------------
+
+# example snippet from brian d foy's blog post
+use v5.22;
+my(undef, $card_num, (undef)x3, $count) = split /:/;
+
+(undef,undef,$foo) = that_function()
+# is equivalent to
+((undef)x2, $foo) = that_function()
+
+#--------------------------------------------------------------------------
+# Floating point parsing has been improved
+#--------------------------------------------------------------------------
+# Hexadecimal floating point literals
+
+# some hex floats from a program by Rick Regan
+# appropriated and extended from Lua 5.2.x test cases
+# tested on perl 5.22/cygwin
+
+0x1p-1074;
+0x3.3333333333334p-5;
+0xcc.ccccccccccdp-11;
+0x1p+1;
+0x1p-6;
+0x1.b7p-1;
+0x1.fffffffffffffp+1023;
+0x1p-1022;
+0X1.921FB4D12D84AP+1;
+0x1.999999999999ap-4;
+
+# additional test cases for characterization
+0x1p-1074. # dot is a string operator
+0x.ABCDEFp10 # legal, dot immediately after 0x
+0x.p10 # perl allows 0x as a zero, then concat with p10 bareword
+0x.p 0x0.p # dot then bareword
+0x_0_.A_BC___DEF_p1_0 # legal hex float, underscores are mostly allowed
+0x0._ABCDEFp10 # _ABCDEFp10 is a bareword, no underscore allowed after dot
+
+# illegal, but does not use error highlighting
+0x0p1ABC # illegal, highlighted as 0x0p1 abut with bareword ABC
+
+# allowed to FAIL for now
+0x0.ABCDEFp_10 # ABCDEFp_10 is a bareword, '_10' exponent not allowed
+0xp 0xp1 0x0.0p # syntax errors
+0x41.65.65 # hex dot number, but lexer now fails with 0x41.65 left as a partial hex float
+
+#--------------------------------------------------------------------------
+# Support for ?PATTERN? without explicit operator has been removed
+#--------------------------------------------------------------------------
+# ?PATTERN? must now be written as m?PATTERN?
+
+?PATTERN? # does not work in current LexPerl anyway, NO ACTION NEEDED
+m?PATTERN?
+
+#--------------------------------------------------------------------------
+# end of test file
+#--------------------------------------------------------------------------
diff --git a/test/examples/perl-test-5220delta.pl.styled b/test/examples/perl-test-5220delta.pl.styled
new file mode 100644
index 000000000..c01757999
--- /dev/null
+++ b/test/examples/perl-test-5220delta.pl.styled
@@ -0,0 +1,178 @@
+{2}# -*- coding: utf-8 -*-{0}
+{2}#--------------------------------------------------------------------------{0}
+{2}# perl-test-5220delta.pl{0}
+{2}#--------------------------------------------------------------------------{0}
+{2}# REF: https://metacpan.org/pod/distribution/perl/pod/perldelta.pod{0}
+{2}# maybe future ref: https://metacpan.org/pod/distribution/perl/pod/perl5220delta.pod{0}
+{2}# also: http://perltricks.com/article/165/2015/4/10/A-preview-of-Perl-5-22{0}
+{2}#{0}
+{2}#--------------------------------------------------------------------------{0}
+{2}# Kein-Hong Man <keinhong@gmail.com> Public Domain 20151217{0}
+{2}#--------------------------------------------------------------------------{0}
+{2}# 20151217 initial document{0}
+{2}# 20151218 updated tests and comments{0}
+{2}#--------------------------------------------------------------------------{0}
+
+{5}use{0} {6}v5.22{10};{0} {2}# may be needed{0}
+
+{2}#--------------------------------------------------------------------------{0}
+{2}# New bitwise operators{0}
+{2}#--------------------------------------------------------------------------{0}
+
+{5}use{0} {11}feature{0} {7}'bitwise'{0} {2}# enable feature, warning enabled{0}
+{5}use{0} {11}experimental{0} {6}"bitwise"{10};{0} {2}# enable feature, warning disabled{0}
+
+{2}# numerical operands{0}
+{4}10{10}&{4}20{0} {4}10{10}|{4}20{0} {4}10{10}^{4}20{0} {10}~{4}10{0}
+{12}$a{10}&{6}"8"{0} {12}$a{10}|{6}"8"{0} {12}$a{10}^{6}"8"{0} {10}~{12}$a{0} {10}~{6}"8"{0}
+
+{2}# string operands{0}
+{7}'0'{10}&.{6}"8"{0} {7}'0'{10}|.{6}"8"{0} {7}'0'{10}^.{6}"8"{0} {10}~.{7}'0'{0} {10}~.{6}"8"{0}
+{2}# the following is AMBIGUOUS, perl sees 10 and not .10 only when bitwise feature is enabled{0}
+{2}# so it's feature-setting-dependent, no plans to change current behaviour{0}
+ {12}$a{10}&{4}.10{0} {12}$a{10}|{4}.10{0} {12}$a{10}^{4}.10{0} {10}~.{12}$a{0} {10}~{4}.10{0}
+
+{2}# assignment variants{0}
+{12}$a{10}&={4}10{10};{0} {12}$a{10}|={4}10{10};{0} {12}$a{10}^={4}10{10};{0}
+{12}$b{10}&.={7}'20'{10};{0} {12}$b{10}|.={7}'20'{10};{0} {12}$b{10}^.={7}'20'{10};{0}
+{12}$c{10}&={6}"30"{10};{0} {12}$c{10}|={6}"30"{10};{0} {12}$c{10}^={6}"30"{10};{0}
+{12}$d{10}&.={12}$e{10};{0} {12}$d{10}|.={12}$e{10};{0} {12}$d{10}^.={12}$e{10};{0}
+
+{2}#--------------------------------------------------------------------------{0}
+{2}# New double-diamond operator{0}
+{2}#--------------------------------------------------------------------------{0}
+{2}# <<>> is like <> but each element of @ARGV will be treated as an actual file name{0}
+
+{2}# example snippet from brian d foy's blog post{0}
+{5}while{10}({0} {10}<<>>{0} {10}){0} {10}{{0} {2}# new, safe line input operator{0}
+ {10}...;{0}
+ {10}}{0}
+
+{2}#--------------------------------------------------------------------------{0}
+{2}# New \b boundaries in regular expressions{0}
+{2}#--------------------------------------------------------------------------{0}
+
+{29}qr/\b{gcb}/{0}
+{29}qr/\b{wb}/{0}
+{29}qr/\b{sb}/{0}
+
+{2}#--------------------------------------------------------------------------{0}
+{2}# Non-Capturing Regular Expression Flag{0}
+{2}#--------------------------------------------------------------------------{0}
+{2}# disables capturing and filling in $1, $2, etc{0}
+
+{6}"hello"{0} {10}=~{0} {17}/(hi|hello)/n{10};{0} {2}# $1 is not set{0}
+
+{2}#--------------------------------------------------------------------------{0}
+{2}# Aliasing via reference{0}
+{2}#--------------------------------------------------------------------------{0}
+{2}# Variables and subroutines can now be aliased by assigning to a reference{0}
+
+{10}\{12}$c{0} {10}={0} {10}\{12}$d{10};{0}
+{10}\&{11}x{0} {10}={0} {10}\&{11}y{10};{0}
+
+{2}# Aliasing can also be applied to foreach iterator variables{0}
+
+{5}foreach{0} {10}\{14}%hash{0} {10}({13}@array_of_hash_refs{10}){0} {10}{{0} {10}...{0} {10}}{0}
+
+{2}# example snippet from brian d foy's blog post{0}
+
+{5}use{0} {11}feature{0} {30}qw(refaliasing){10};{0}
+
+{10}\{14}%other_hash{0} {10}={0} {10}\{14}%hash{10};{0}
+
+{5}use{0} {6}v5.22{10};{0}
+{5}use{0} {11}feature{0} {30}qw(refaliasing){10};{0}
+
+{5}foreach{0} {10}\{5}my{0} {14}%hash{0} {10}({0} {13}@array_of_hashes{0} {10}){0} {10}{{0} {2}# named hash control variable{0}
+ {5}foreach{0} {5}my{0} {12}$key{0} {10}({0} {5}keys{0} {14}%hash{0} {10}){0} {10}{{0} {2}# named hash now!{0}
+ {10}...;{0}
+ {10}}{0}
+ {10}}{0}
+
+{2}#--------------------------------------------------------------------------{0}
+{2}# New :const subroutine attribute{0}
+{2}#--------------------------------------------------------------------------{0}
+
+{5}my{0} {12}$x{0} {10}={0} {4}54321{10};{0}
+{15}*INLINED{0} {10}={0} {5}sub{0} {10}:{0} {11}const{0} {10}{{0} {12}$x{0} {10}};{0}
+{12}$x{10}++;{0}
+
+{2}# more examples of attributes{0}
+{2}# (not 5.22 stuff, but some general examples for study, useful for{0}
+{2}# handling subroutine signature and subroutine prototype highlighting){0}
+
+{5}sub{0} {11}foo{0} {10}:{0} {11}lvalue{0} {10};{0}
+
+{5}package{0} {11}X{10};{0}
+{5}sub{0} {11}Y{10}::x{0} {10}:{0} {11}lvalue{0} {10}{{0} {4}1{0} {10}}{0}
+
+{5}package{0} {11}X{10};{0}
+{5}sub{0} {11}foo{0} {10}{{0} {4}1{0} {10}}{0}
+{5}package{0} {11}Y{10};{0}
+{5}BEGIN{0} {10}{{0} {15}*bar{0} {10}={0} {10}\&{11}X{10}::{11}foo{10};{0} {10}}{0}
+{5}package{0} {11}Z{10};{0}
+{5}sub{0} {11}Y{10}::{11}bar{0} {10}:{0} {11}lvalue{0} {10};{0}
+
+{2}# built-in attributes for subroutines:{0}
+{11}lvalue{0} {11}method{0} {5}prototype{10}(..){0} {11}locked{0} {11}const{0}
+
+{2}#--------------------------------------------------------------------------{0}
+{2}# Repetition in list assignment{0}
+{2}#--------------------------------------------------------------------------{0}
+
+{2}# example snippet from brian d foy's blog post{0}
+{5}use{0} {6}v5.22{10};{0}
+{5}my{10}({5}undef{10},{0} {12}$card_num{10},{0} {10}({5}undef{10})x{4}3{10},{0} {12}$count{10}){0} {10}={0} {5}split{0} {17}/:/{10};{0}
+
+{10}({5}undef{10},{5}undef{10},{12}$foo{10}){0} {10}={0} {11}that_function{10}(){0}
+{2}# is equivalent to {0}
+{10}(({5}undef{10})x{4}2{10},{0} {12}$foo{10}){0} {10}={0} {11}that_function{10}(){0}
+
+{2}#--------------------------------------------------------------------------{0}
+{2}# Floating point parsing has been improved{0}
+{2}#--------------------------------------------------------------------------{0}
+{2}# Hexadecimal floating point literals{0}
+
+{2}# some hex floats from a program by Rick Regan{0}
+{2}# appropriated and extended from Lua 5.2.x test cases{0}
+{2}# tested on perl 5.22/cygwin{0}
+
+{4}0x1p-1074{10};{0}
+{4}0x3.3333333333334p-5{10};{0}
+{4}0xcc.ccccccccccdp-11{10};{0}
+{4}0x1p+1{10};{0}
+{4}0x1p-6{10};{0}
+{4}0x1.b7p-1{10};{0}
+{4}0x1.fffffffffffffp+1023{10};{0}
+{4}0x1p-1022{10};{0}
+{4}0X1.921FB4D12D84AP+1{10};{0}
+{4}0x1.999999999999ap-4{10};{0}
+
+{2}# additional test cases for characterization{0}
+{4}0x1p-1074{10}.{0} {2}# dot is a string operator{0}
+{4}0x.ABCDEFp10{0} {2}# legal, dot immediately after 0x{0}
+{4}0x{10}.{11}p10{0} {2}# perl allows 0x as a zero, then concat with p10 bareword{0}
+{4}0x{10}.{11}p{0} {4}0x0{10}.{11}p{0} {2}# dot then bareword{0}
+{4}0x_0_.A_BC___DEF_p1_0{0} {2}# legal hex float, underscores are mostly allowed{0}
+{4}0x0{10}.{11}_ABCDEFp10{0} {2}# _ABCDEFp10 is a bareword, no underscore allowed after dot{0}
+
+{2}# illegal, but does not use error highlighting{0}
+{4}0x0p1{11}ABC{0} {2}# illegal, highlighted as 0x0p1 abut with bareword ABC {0}
+
+{2}# allowed to FAIL for now{0}
+{4}0x0.ABCDEFp_10{0} {2}# ABCDEFp_10 is a bareword, '_10' exponent not allowed{0}
+{4}0xp{0} {4}0xp1{0} {4}0x0.0p{0} {2}# syntax errors{0}
+{4}0x41.65{10}.{4}65{0} {2}# hex dot number, but lexer now fails with 0x41.65 left as a partial hex float{0}
+
+{2}#--------------------------------------------------------------------------{0}
+{2}# Support for ?PATTERN? without explicit operator has been removed{0}
+{2}#--------------------------------------------------------------------------{0}
+{2}# ?PATTERN? must now be written as m?PATTERN?{0}
+
+{10}?{11}PATTERN{10}?{0} {2}# does not work in current LexPerl anyway, NO ACTION NEEDED{0}
+{17}m?PATTERN?{0}
+
+{2}#--------------------------------------------------------------------------{0}
+{2}# end of test file{0}
+{2}#--------------------------------------------------------------------------{0}
diff --git a/test/lexTests.py b/test/lexTests.py
index a53db7eb8..421c6393b 100644
--- a/test/lexTests.py
+++ b/test/lexTests.py
@@ -15,6 +15,37 @@ b"function",
b"sub"
]
+keywordsPerl = [
+b"NULL __FILE__ __LINE__ __PACKAGE__ __DATA__ __END__ AUTOLOAD "
+b"BEGIN CORE DESTROY END EQ GE GT INIT LE LT NE CHECK abs accept "
+b"alarm and atan2 bind binmode bless caller chdir chmod chomp chop "
+b"chown chr chroot close closedir cmp connect continue cos crypt "
+b"dbmclose dbmopen defined delete die do dump each else elsif endgrent "
+b"endhostent endnetent endprotoent endpwent endservent eof eq eval "
+b"exec exists exit exp fcntl fileno flock for foreach fork format "
+b"formline ge getc getgrent getgrgid getgrnam gethostbyaddr gethostbyname "
+b"gethostent getlogin getnetbyaddr getnetbyname getnetent getpeername "
+b"getpgrp getppid getpriority getprotobyname getprotobynumber getprotoent "
+b"getpwent getpwnam getpwuid getservbyname getservbyport getservent "
+b"getsockname getsockopt glob gmtime goto grep gt hex if index "
+b"int ioctl join keys kill last lc lcfirst le length link listen "
+b"local localtime lock log lstat lt map mkdir msgctl msgget msgrcv "
+b"msgsnd my ne next no not oct open opendir or ord our pack package "
+b"pipe pop pos print printf prototype push quotemeta qu "
+b"rand read readdir readline readlink readpipe recv redo "
+b"ref rename require reset return reverse rewinddir rindex rmdir "
+b"scalar seek seekdir select semctl semget semop send setgrent "
+b"sethostent setnetent setpgrp setpriority setprotoent setpwent "
+b"setservent setsockopt shift shmctl shmget shmread shmwrite shutdown "
+b"sin sleep socket socketpair sort splice split sprintf sqrt srand "
+b"stat study sub substr symlink syscall sysopen sysread sysseek "
+b"system syswrite tell telldir tie tied time times truncate "
+b"uc ucfirst umask undef unless unlink unpack unshift untie until "
+b"use utime values vec wait waitpid wantarray warn while write "
+b"xor "
+b"given when default break say state UNITCHECK __SUB__ fc"
+]
+
class TestLexers(unittest.TestCase):
def setUp(self):
@@ -23,7 +54,7 @@ class TestLexers(unittest.TestCase):
self.ed.ClearAll()
self.ed.EmptyUndoBuffer()
- def AsStyled(self):
+ def AsStyled(self, withWindowsLineEnds):
text = self.ed.Contents()
data = io.BytesIO()
prevStyle = -1
@@ -34,11 +65,14 @@ class TestLexers(unittest.TestCase):
data.write(styleBuf.encode('utf-8'))
prevStyle = styleNow
data.write(text[o:o+1])
- return data.getvalue()
+ if withWindowsLineEnds:
+ return data.getvalue().replace(b"\n", b"\r\n")
+ else:
+ return data.getvalue()
- def LexExample(self, name, lexerName, keywords=None):
- if keywords is None:
- keywords = []
+ def LexExample(self, name, lexerName, keywords, fileMode="b"):
+ self.ed.ClearAll()
+ self.ed.EmptyUndoBuffer()
self.ed.SetCodePage(65001)
self.ed.LexerLanguage = lexerName
bits = self.ed.StyleBitsNeeded
@@ -52,6 +86,8 @@ class TestLexers(unittest.TestCase):
nameNew = nameExample +".new"
with open(nameExample, "rb") as f:
prog = f.read()
+ if fileMode == "t" and sys.platform == "win32":
+ prog = prog.replace(b"\r\n", b"\n")
BOM = b"\xEF\xBB\xBF"
if prog.startswith(BOM):
prog = prog[len(BOM):]
@@ -62,12 +98,15 @@ class TestLexers(unittest.TestCase):
try:
with open(namePrevious, "rb") as f:
prevStyled = f.read()
- except FileNotFoundError:
+ if fileMode == "t" and sys.platform == "win32":
+ prog = prog.replace(b"\r\n", b"\n")
+ except EnvironmentError:
prevStyled = ""
- progStyled = self.AsStyled()
+ progStyled = self.AsStyled(fileMode == "t" and sys.platform == "win32")
if progStyled != prevStyled:
with open(nameNew, "wb") as f:
f.write(progStyled)
+ print("Incorrect lex for " + name)
print(progStyled)
print(prevStyled)
self.assertEquals(progStyled, prevStyled)
@@ -75,19 +114,21 @@ class TestLexers(unittest.TestCase):
# as that is likely to fail many times.
return
- # Try partial lexes from the start of every line which should all be identical.
- for line in range(self.ed.LineCount):
- lineStart = self.ed.PositionFromLine(line)
- self.ed.StartStyling(lineStart, mask)
- self.assertEquals(self.ed.EndStyled, lineStart)
- self.ed.Colourise(lineStart, lenDocument)
- progStyled = self.AsStyled()
- if progStyled != prevStyled:
- with open(nameNew, "wb") as f:
- f.write(progStyled)
- self.assertEquals(progStyled, prevStyled)
- # Give up after one failure
- return
+ if fileMode == "b": # "t" files are large and this is a quadratic check
+ # Try partial lexes from the start of every line which should all be identical.
+ for line in range(self.ed.LineCount):
+ lineStart = self.ed.PositionFromLine(line)
+ self.ed.StartStyling(lineStart, mask)
+ self.assertEquals(self.ed.EndStyled, lineStart)
+ self.ed.Colourise(lineStart, lenDocument)
+ progStyled = self.AsStyled(fileMode == "t" and sys.platform == "win32")
+ if progStyled != prevStyled:
+ print("Incorrect partial lex for " + name + " at line " + line)
+ with open(nameNew, "wb") as f:
+ f.write(progStyled)
+ self.assertEquals(progStyled, prevStyled)
+ # Give up after one failure
+ return
def testCXX(self):
self.LexExample("x.cxx", b"cpp", [b"int"])
@@ -115,7 +156,10 @@ class TestLexers(unittest.TestCase):
self.LexExample("x.rb", b"ruby", [b"class def end"])
def testPerl(self):
- self.LexExample("x.pl", b"perl", [b"printf sleep use while"])
+ self.LexExample("x.pl", b"perl", keywordsPerl)
+
+ def testPerlCases(self):
+ self.LexExample("perl-test-5220delta.pl", b"perl", keywordsPerl, "t")
def testD(self):
self.LexExample("x.d", b"d",