Rewrite the GetText translation (po) lexer

The old one had a few bugs and was somewhat limited, this one should hopefully fix the issues. The new one should behave like the old one but adding some more styles and following better the file format.
author: Colomban Wendling <ban@herbesfolles.org> 2012-09-08 21:49:11 +0200
committer: Colomban Wendling <ban@herbesfolles.org> 2012-09-08 21:49:11 +0200
commit: 50fd584ed6c1a30769f9aa78a73a20c6b1ce46a2 (patch)
tree: 2d2b6beecb172d4c8a86b7b042f8e11e9e86bc19
parent: 3a7b1192dfc7fb1e1e41652a2c5221b18e51fa3d (diff)
download: scintilla-mirror-50fd584ed6c1a30769f9aa78a73a20c6b1ce46a2.tar.gz
3 files changed, 119 insertions, 66 deletions
diff --git a/include/SciLexer.h b/include/SciLexer.h
index 57b5cf6e7..85ba2a1cc 100644
--- a/include/SciLexer.h
+++ b/include/SciLexer.h
@@ -1354,6 +1354,13 @@
 #define SCE_PO_MSGCTXT 6
 #define SCE_PO_MSGCTXT_TEXT 7
 #define SCE_PO_FUZZY 8
+#define SCE_PO_PROGRAMMER_COMMENT 9
+#define SCE_PO_REFERENCE 10
+#define SCE_PO_FLAGS 11
+#define SCE_PO_MSGID_TEXT_EOL 12
+#define SCE_PO_MSGSTR_TEXT_EOL 13
+#define SCE_PO_MSGCTXT_TEXT_EOL 14
+#define SCE_PO_ERROR 15
 #define SCE_PAS_DEFAULT 0
 #define SCE_PAS_IDENTIFIER 1
 #define SCE_PAS_COMMENT 2
diff --git a/include/Scintilla.iface b/include/Scintilla.iface
index ff70c0a5a..28e21ee15 100644
--- a/include/Scintilla.iface
+++ b/include/Scintilla.iface
@@ -3924,6 +3924,13 @@ val SCE_PO_MSGSTR_TEXT=5
 val SCE_PO_MSGCTXT=6
 val SCE_PO_MSGCTXT_TEXT=7
 val SCE_PO_FUZZY=8
+val SCE_PO_PROGRAMMER_COMMENT=9
+val SCE_PO_REFERENCE=10
+val SCE_PO_FLAGS=11
+val SCE_PO_MSGID_TEXT_EOL=12
+val SCE_PO_MSGSTR_TEXT_EOL=13
+val SCE_PO_MSGCTXT_TEXT_EOL=14
+val SCE_PO_ERROR=15
 # Lexical states for SCLEX_PASCAL
 lex Pascal=SCLEX_PASCAL SCE_PAS_
 val SCE_PAS_DEFAULT=0
diff --git a/lexers/LexOthers.cxx b/lexers/LexOthers.cxx
index 77c156a3c..fb8c97b31 100644
--- a/lexers/LexOthers.cxx
+++ b/lexers/LexOthers.cxx
@@ -614,76 +614,115 @@ static void FoldDiffDoc(unsigned int startPos, int length, int, WordList *[], Ac
 	} while (static_cast<int>(startPos) + length > curLineStart);
 }
 
-static void ColourisePoLine(
-    char *lineBuffer,
-    unsigned int lengthLine,
-    unsigned int startLine,
-    unsigned int endPos,
-    Accessor &styler) {
-
-	unsigned int i = 0;
-	static unsigned int state = SCE_PO_DEFAULT;
-	unsigned int state_start = SCE_PO_DEFAULT;
-
-	while ((i < lengthLine) && isspacechar(lineBuffer[i]))	// Skip initial spaces
-		i++;
-	if (i < lengthLine) {
-		if (lineBuffer[i] == '#') {
-			// check if the comment contains any flags ("#, ") and
-			// then whether the flags contain "fuzzy"
-			if (strstart(lineBuffer, "#, ") && strstr(lineBuffer, "fuzzy"))
-				styler.ColourTo(endPos, SCE_PO_FUZZY);
-			else
-				styler.ColourTo(endPos, SCE_PO_COMMENT);
-		} else {
-			if (lineBuffer[0] == '"') {
-				// line continuation, use previous style
-				styler.ColourTo(endPos, state);
-				return;
-			// this implicitly also matches "msgid_plural"
-			} else if (strstart(lineBuffer, "msgid")) {
-				state_start = SCE_PO_MSGID;
-				state = SCE_PO_MSGID_TEXT;
-			} else if (strstart(lineBuffer, "msgstr")) {
-				state_start = SCE_PO_MSGSTR;
-				state = SCE_PO_MSGSTR_TEXT;
-			} else if (strstart(lineBuffer, "msgctxt")) {
-				state_start = SCE_PO_MSGCTXT;
-				state = SCE_PO_MSGCTXT_TEXT;
-			}
-			if (state_start != SCE_PO_DEFAULT) {
-				// find the next space
-				while ((i < lengthLine) && ! isspacechar(lineBuffer[i]))
-					i++;
-				styler.ColourTo(startLine + i - 1, state_start);
-				styler.ColourTo(startLine + i, SCE_PO_DEFAULT);
-				styler.ColourTo(endPos, state);
+// see https://www.gnu.org/software/gettext/manual/gettext.html#PO-Files for the syntax reference
+// some details are taken from the GNU msgfmt behavior (like that indent is allows in front of lines)
+static void ColourisePoDoc(unsigned int startPos, int length, int initStyle, WordList *[], Accessor &styler) {
+	StyleContext sc(startPos, length, initStyle, styler);
+	bool escaped = false;
+	int curLine = styler.GetLine(startPos);
+	// the line state holds the last state on or before the line that isn't the default style
+	int curLineState = curLine > 0 ? styler.GetLineState(curLine - 1) : SCE_PO_DEFAULT;
+	
+	for (; sc.More(); sc.Forward()) {
+		// whether we should leave a state
+		switch (sc.state) {
+			case SCE_PO_COMMENT:
+			case SCE_PO_PROGRAMMER_COMMENT:
+			case SCE_PO_REFERENCE:
+			case SCE_PO_FLAGS:
+			case SCE_PO_FUZZY:
+				if (sc.atLineEnd)
+					sc.SetState(SCE_PO_DEFAULT);
+				else if (sc.state == SCE_PO_FLAGS && sc.Match("fuzzy"))
+					// here we behave like the previous parser, but this should probably be highlighted
+					// on its own like a keyword rather than changing the whole flags style
+					sc.ChangeState(SCE_PO_FUZZY);
+				break;
+			
+			case SCE_PO_MSGCTXT:
+			case SCE_PO_MSGID:
+			case SCE_PO_MSGSTR:
+				if (isspacechar(sc.ch))
+					sc.SetState(SCE_PO_DEFAULT);
+				break;
+			
+			case SCE_PO_ERROR:
+				if (sc.atLineEnd)
+					sc.SetState(SCE_PO_DEFAULT);
+				break;
+			
+			case SCE_PO_MSGCTXT_TEXT:
+			case SCE_PO_MSGID_TEXT:
+			case SCE_PO_MSGSTR_TEXT:
+				if (sc.atLineEnd) { // invalid inside a string
+					if (sc.state == SCE_PO_MSGCTXT_TEXT)
+						sc.ChangeState(SCE_PO_MSGCTXT_TEXT_EOL);
+					else if (sc.state == SCE_PO_MSGID_TEXT)
+						sc.ChangeState(SCE_PO_MSGID_TEXT_EOL);
+					else if (sc.state == SCE_PO_MSGSTR_TEXT)
+						sc.ChangeState(SCE_PO_MSGSTR_TEXT_EOL);
+					sc.SetState(SCE_PO_DEFAULT);
+					escaped = false;
+				} else {
+					if (escaped)
+						escaped = false;
+					else if (sc.ch == '\\')
+						escaped = true;
+					else if (sc.ch == '"')
+						sc.ForwardSetState(SCE_PO_DEFAULT);
+				}
+				break;
+		}
+		
+		// whether we should enter a new state
+		if (sc.state == SCE_PO_DEFAULT) {
+			// forward to the first non-white character on the line
+			bool atLineStart = sc.atLineStart;
+			if (atLineStart) {
+				while (sc.More() && ! sc.atLineEnd && isspacechar(sc.ch))
+					sc.Forward();
 			}
+			
+			if (atLineStart && sc.ch == '#') {
+				if (sc.chNext == '.')
+					sc.SetState(SCE_PO_PROGRAMMER_COMMENT);
+				else if (sc.chNext == ':')
+					sc.SetState(SCE_PO_REFERENCE);
+				else if (sc.chNext == ',')
+					sc.SetState(SCE_PO_FLAGS);
+				else if (sc.chNext == '|')
+					sc.SetState(SCE_PO_COMMENT); // previous untranslated string, no special style yet
+				else
+					sc.SetState(SCE_PO_COMMENT);
+			} else if (atLineStart && sc.Match("msgid")) { // includes msgid_plural
+				sc.SetState(SCE_PO_MSGID);
+			} else if (atLineStart && sc.Match("msgstr")) { // includes [] suffixes
+				sc.SetState(SCE_PO_MSGSTR);
+			} else if (atLineStart && sc.Match("msgctxt")) {
+				sc.SetState(SCE_PO_MSGCTXT);
+			} else if (sc.ch == '"') {
+				if (curLineState == SCE_PO_MSGCTXT || curLineState == SCE_PO_MSGCTXT_TEXT)
+					sc.SetState(SCE_PO_MSGCTXT_TEXT);
+				else if (curLineState == SCE_PO_MSGID || curLineState == SCE_PO_MSGID_TEXT)
+					sc.SetState(SCE_PO_MSGID_TEXT);
+				else if (curLineState == SCE_PO_MSGSTR || curLineState == SCE_PO_MSGSTR_TEXT)
+					sc.SetState(SCE_PO_MSGSTR_TEXT);
+				else
+					sc.SetState(SCE_PO_ERROR);
+			} else if (! isspacechar(sc.ch))
+				sc.SetState(SCE_PO_ERROR);
+			
+			if (sc.state != SCE_PO_DEFAULT)
+				curLineState = sc.state;
 		}
-	} else {
-		styler.ColourTo(endPos, SCE_PO_DEFAULT);
-	}
-}
-
-static void ColourisePoDoc(unsigned int startPos, int length, int, WordList *[], Accessor &styler) {
-	char lineBuffer[1024];
-	styler.StartAt(startPos);
-	styler.StartSegment(startPos);
-	unsigned int linePos = 0;
-	unsigned int startLine = startPos;
-	for (unsigned int i = startPos; i < startPos + length; i++) {
-		lineBuffer[linePos++] = styler[i];
-		if (AtEOL(styler, i) || (linePos >= sizeof(lineBuffer) - 1)) {
-			// End of line (or of line buffer) met, colourise it
-			lineBuffer[linePos] = '\0';
-			ColourisePoLine(lineBuffer, linePos, startLine, i, styler);
-			linePos = 0;
-			startLine = i + 1;
+		
+		if (sc.atLineEnd) {
+			// Update the line state, so it can be seen by next line
+			curLine = styler.GetLine(sc.currentPos);
+			styler.SetLineState(curLine, curLineState);
 		}
 	}
-	if (linePos > 0) {	// Last line does not have ending characters
-		ColourisePoLine(lineBuffer, linePos, startLine, startPos + length - 1, styler);
-	}
+	sc.Complete();
 }
 
 static inline bool isassignchar(unsigned char ch) {
author	Colomban Wendling <ban@herbesfolles.org>	2012-09-08 21:49:11 +0200
committer	Colomban Wendling <ban@herbesfolles.org>	2012-09-08 21:49:11 +0200
commit	50fd584ed6c1a30769f9aa78a73a20c6b1ce46a2 (patch)
tree	2d2b6beecb172d4c8a86b7b042f8e11e9e86bc19
parent	3a7b1192dfc7fb1e1e41652a2c5221b18e51fa3d (diff)
download	scintilla-mirror-50fd584ed6c1a30769f9aa78a73a20c6b1ce46a2.tar.gz