1 files changed, 75 insertions, 14 deletions
diff --git a/lexers/LexPerl.cxx b/lexers/LexPerl.cxx
index 3a69bf8cc..dccad1542 100644
--- a/lexers/LexPerl.cxx
+++ b/lexers/LexPerl.cxx
@@ -65,6 +65,12 @@ using namespace Scintilla;
 #define BACK_OPERATOR	1	// whitespace/comments are insignificant
 #define BACK_KEYWORD	2	// operators/keywords are needed for disambiguation
 
+#define SUB_BEGIN		0	// states for subroutine prototype scan:
+#define SUB_HAS_PROTO	1	// only 'prototype' attribute allows prototypes
+#define SUB_HAS_ATTRIB	2	// other attributes can exist leftward
+#define SUB_HAS_MODULE	3	// sub name can have a ::identifier part
+#define SUB_HAS_SUB		4	// 'sub' keyword
+
 // all interpolated styles are different from their parent styles by a constant difference
 // we also assume SCE_PL_STRING_VAR is the interpolated style with the smallest value
 #define	INTERPOLATE_SHIFT	(SCE_PL_STRING_VAR - SCE_PL_STRING)
@@ -136,6 +142,22 @@ static void skipWhitespaceComment(LexAccessor &styler, Sci_PositionU &p) {
 		p--;
 }
 
+static int findPrevLexeme(LexAccessor &styler, Sci_PositionU &bk, int &style) {
+	// scan backward past whitespace and comments to find a lexeme
+	skipWhitespaceComment(styler, bk);
+	if (bk == 0)
+		return 0;
+	int sz = 1;
+	style = styler.StyleAt(bk);
+	while (bk > 0) {	// find extent of lexeme
+		if (styler.StyleAt(bk - 1) == style) {
+			bk--; sz++;
+		} else
+			break;
+	}
+	return sz;
+}
+
 static int styleBeforeBracePair(LexAccessor &styler, Sci_PositionU bk) {
 	// backtrack to find open '{' corresponding to a '}', balanced
 	// return significant style to be tested for '/' disambiguation
@@ -214,20 +236,59 @@ static int podLineScan(LexAccessor &styler, Sci_PositionU &pos, Sci_PositionU en
 
 static bool styleCheckSubPrototype(LexAccessor &styler, Sci_PositionU bk) {
 	// backtrack to identify if we're starting a subroutine prototype
-	// we also need to ignore whitespace/comments:
-	// 'sub' [whitespace|comment] <identifier> [whitespace|comment]
+	// we also need to ignore whitespace/comments, format is like:
+	//     sub abc::pqr :const :prototype(...)
+	// lexemes are tested in pairs, e.g. '::'+'pqr', ':'+'const', etc.
+	// and a state machine generates legal subroutine syntax matches
 	styler.Flush();
-	skipWhitespaceComment(styler, bk);
-	if (bk == 0 || styler.StyleAt(bk) != SCE_PL_IDENTIFIER)	// check identifier
-		return false;
-	while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_IDENTIFIER)) {
-		bk--;
-	}
-	skipWhitespaceComment(styler, bk);
-	if (bk < 2 || styler.StyleAt(bk) != SCE_PL_WORD	// check "sub" keyword
-	        || !styler.Match(bk - 2, "sub"))	// assume suffix is unique!
-		return false;
-	return true;
+	int state = SUB_BEGIN;
+	do {
+		// find two lexemes, lexeme 2 follows lexeme 1
+		int style2 = SCE_PL_DEFAULT;
+		Sci_PositionU pos2 = bk;
+		int len2 = findPrevLexeme(styler, pos2, style2);
+		int style1 = SCE_PL_DEFAULT;
+		Sci_PositionU pos1 = pos2;
+		if (pos1 > 0) pos1--;
+		int len1 = findPrevLexeme(styler, pos1, style1);
+		if (len1 == 0 || len2 == 0)		// lexeme pair must exist
+			break;
+
+		// match parts of syntax, if invalid subroutine syntax, break off
+		if (style1 == SCE_PL_OPERATOR && len1 == 1 &&
+		    styler.SafeGetCharAt(pos1) == ':') {	// ':'
+			if (style2 == SCE_PL_IDENTIFIER || style2 == SCE_PL_WORD) {
+				if (len2 == 9 && styler.Match(pos2, "prototype")) {	// ':' 'prototype'
+					if (state == SUB_BEGIN) {
+						state = SUB_HAS_PROTO;
+					} else
+						break;
+				} else {	// ':' <attribute>
+					if (state == SUB_HAS_PROTO || state == SUB_HAS_ATTRIB) {
+						state = SUB_HAS_ATTRIB;
+					} else
+						break;
+				}
+			} else
+				break;
+		} else if (style1 == SCE_PL_OPERATOR && len1 == 2 &&
+		           styler.Match(pos1, "::")) {	// '::'
+			if (style2 == SCE_PL_IDENTIFIER) {	// '::' <identifier>
+				state = SUB_HAS_MODULE;
+			} else
+				break;
+		} else if (style1 == SCE_PL_WORD && len1 == 3 &&
+		           styler.Match(pos1, "sub")) {	// 'sub'
+			if (style2 == SCE_PL_IDENTIFIER) {	// 'sub' <identifier>
+				state = SUB_HAS_SUB;
+			} else
+				break;
+		} else
+			break;
+		bk = pos1;			// set position for finding next lexeme pair
+		if (bk > 0) bk--;
+	} while (state != SUB_HAS_SUB);
+	return (state == SUB_HAS_SUB);
 }
 
 static int actualNumStyle(int numberStyle) {
@@ -537,7 +598,7 @@ void SCI_METHOD LexerPerl::Lex(Sci_PositionU startPos, Sci_Position length, int
 	CharacterSet &setPOD = setModifiers;
 	CharacterSet setNonHereDoc(CharacterSet::setDigits, "=$@");
 	CharacterSet setHereDocDelim(CharacterSet::setAlphaNum, "_");
-	CharacterSet setSubPrototype(CharacterSet::setNone, "\\[$@%&*+];_");
+	CharacterSet setSubPrototype(CharacterSet::setNone, "\\[$@%&*+];_ \t");
 	// for format identifiers
 	CharacterSet setFormatStart(CharacterSet::setAlpha, "_=");
 	CharacterSet &setFormat = setHereDocDelim;