New Ruby lexer from Eric Promislow of ActiveState.

author: nyamatongwe <unknown> 2005-08-25 23:34:00 +0000
committer: nyamatongwe <unknown> 2005-08-25 23:34:00 +0000
commit: 477b5db0a414f597e4aa8731811aeea7a0d03609 (patch)
tree: 525902d6fa25c1f8c60f26a9ab18a09cbb10a698 /src
parent: 16e93df078b92211981df7f50777aeee582e707d (diff)
download: scintilla-mirror-477b5db0a414f597e4aa8731811aeea7a0d03609.tar.gz
1 files changed, 1157 insertions, 266 deletions
diff --git a/src/LexRuby.cxx b/src/LexRuby.cxx
index 692a47ade..878071a53 100644
--- a/src/LexRuby.cxx
+++ b/src/LexRuby.cxx
@@ -19,342 +19,1233 @@
 #include "Scintilla.h"
 #include "SciLexer.h"
 
-static void ClassifyWordRb(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord) {
+#ifdef SCI_NAMESPACE
+using namespace Scintilla;
+#endif
+
+//XXX Identical to Perl, put in common area
+static inline bool isEOLChar(char ch) {
+	return (ch == '\r') || (ch == '\n');
+}
+
+static inline bool isRubyOperatorChar(char ch) {
+	return strchr("%^&*\\()-+=|{}[]:;<>,/?!.~",ch) != NULL;
+}
+
+
+static inline bool isSafeAlpha(char ch) {
+    return ((unsigned int) ch <= 127) && isalpha(ch);
+}
+
+#define MAX_KEYWORD_LENGTH 200
+
+#define STYLE_MASK 63
+#define actual_style(style) (style & STYLE_MASK)
+
+static bool followsDot(unsigned int pos, Accessor &styler) {
+    styler.Flush();
+    for (; pos >= 1; --pos) {
+        int style = actual_style(styler.StyleAt(pos));
+        char ch;
+        switch (style) {
+            case SCE_RB_DEFAULT:
+                ch = styler[pos];
+                if (ch == ' ' || ch == '\t') {
+                    //continue
+                } else {
+                    return false;
+                }
+                break;
+                
+            case SCE_RB_OPERATOR:
+                return styler[pos] == '.';
+
+            default:
+                return false;
+        }
+    }
+    return false;
+}
+
+// Forward declarations
+static bool keywordIsAmbiguous(const char *prevWord);
+static bool keywordDoStartsLoop(int pos,
+                                Accessor &styler);
+static bool keywordIsModifier(const char *word,
+                              int pos,
+                              Accessor &styler);
+
+static int ClassifyWordRb(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord) {
 	char s[100];
-	bool wordIsNumber = isdigit(styler[start]) != 0;
-	for (unsigned int i = 0; i < end - start + 1 && i < 30; i++) {
-		s[i] = styler[start + i];
-		s[i + 1] = '\0';
+    unsigned int i, j;
+	unsigned int lim = end - start + 1; // num chars to copy
+	if (lim >= MAX_KEYWORD_LENGTH) {
+		lim = MAX_KEYWORD_LENGTH - 1;
+	}
+	for (i = start, j = 0; j < lim; i++, j++) {
+		s[j] = styler[i];
 	}
-	char chAttr = SCE_P_IDENTIFIER;
+    s[j] = '\0';
+	int chAttr;
 	if (0 == strcmp(prevWord, "class"))
-		chAttr = SCE_P_CLASSNAME;
+		chAttr = SCE_RB_CLASSNAME;
 	else if (0 == strcmp(prevWord, "module"))
-		chAttr = SCE_P_CLASSNAME;
+		chAttr = SCE_RB_MODULE_NAME;
 	else if (0 == strcmp(prevWord, "def"))
-		chAttr = SCE_P_DEFNAME;
-	else if (wordIsNumber)
-		chAttr = SCE_P_NUMBER;
-	else if (keywords.InList(s))
-		chAttr = SCE_P_WORD;
-	// make sure that dot-qualifiers inside the word are lexed correct
-	else for (unsigned int i = 0; i < end - start + 1; i++) {
-		if (styler[start + i] == '.') {
-			styler.ColourTo(start + i - 1, chAttr);
-			styler.ColourTo(start + i, SCE_P_OPERATOR);
+		chAttr = SCE_RB_DEFNAME;
+    else if (keywords.InList(s) && !followsDot(start - 1, styler)) {
+        if (keywordIsAmbiguous(s)
+            && keywordIsModifier(s, start, styler)) {
+            
+            // Demoted keywords are colored as keywords,
+            // but do not affect changes in indentation.
+            //
+            // Consider the word 'if':
+            // 1. <<if test ...>> : normal
+            // 2. <<stmt if test>> : demoted
+            // 3. <<lhs = if ...>> : normal: start a new indent level
+            // 4. <<obj.if = 10>> : color as identifer, since it follows '.'
+            
+            chAttr = SCE_RB_WORD_DEMOTED;
+        } else {
+            chAttr = SCE_RB_WORD;
+        }
+	} else
+        chAttr = SCE_RB_IDENTIFIER;
+	styler.ColourTo(end, chAttr);
+	if (chAttr == SCE_RB_WORD) {
+		strcpy(prevWord, s);
+	} else {
+		prevWord[0] = 0;
+	}
+    return chAttr;
+}
+
+
+//XXX Identical to Perl, put in common area
+static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {
+	if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
+		return false;
+	}
+	while (*val) {
+		if (*val != styler[pos++]) {
+			return false;
 		}
+		val++;
 	}
-	styler.ColourTo(end, chAttr);
-	strcpy(prevWord, s);
+	return true;
 }
 
-static bool IsRbComment(Accessor &styler, int pos, int len) {
-	return len>0 && styler[pos]=='#';
+// Do Ruby better -- find the end of the line, work back,
+// and then check for leading white space
+
+// Precondition: the here-doc target can be indented
+static bool lookingAtHereDocDelim(Accessor	   &styler,
+                                  int 			pos,
+                                  int 			lengthDoc,
+                                  const char   *HereDocDelim)
+{
+    if (!isMatch(styler, lengthDoc, pos, HereDocDelim)) {
+        return false;
+    }
+    while (--pos > 0) {
+        char ch = styler[pos];
+        if (isEOLChar(ch)) {
+            return true;
+        } else if (ch != ' ' && ch != '\t') {
+            return false;
+        }
+    }
+    return false;
 }
 
-static bool IsRbStringStart(char ch, char chNext, char chNext2) {
-	if (ch == '\'' || ch == '"')
-		return true;
-	if (ch == 'u' || ch == 'U') {
-		if (chNext == '"' || chNext == '\'')
-			return true;
-		if ((chNext == 'r' || chNext == 'R') && (chNext2 == '"' || chNext2 == '\''))
-			return true;
-	}
-	if ((ch == 'r' || ch == 'R') && (chNext == '"' || chNext == '\''))
-		return true;
+//XXX Identical to Perl, put in common area
+static char opposite(char ch) {
+	if (ch == '(')
+		return ')';
+	if (ch == '[')
+		return ']';
+	if (ch == '{')
+		return '}';
+	if (ch == '<')
+		return '>';
+	return ch;
+}
 
-	return false;
+// Null transitions when we see we've reached the end
+// and need to relex the curr char.
+
+static void redo_char(int &i, char &ch, char &chNext, char &chNext2,
+                      int &state) {
+    i--;
+    chNext2 = chNext;
+    chNext = ch;
+    state = SCE_RB_DEFAULT;
 }
 
-static bool IsRbWordStart(char ch, char chNext, char chNext2) {
-	return (iswordchar(ch) && !IsRbStringStart(ch, chNext, chNext2));
+static void advance_char(int &i, char &ch, char &chNext, char &chNext2) {
+    i++;
+    ch = chNext;
+    chNext = chNext2;
 }
 
-/* Return the state to use for the string starting at i; *nextIndex will be set to the first index following the quote(s) */
-static int GetRbStringState(Accessor &styler, int i, int *nextIndex) {
-	char ch = styler.SafeGetCharAt(i);
-	char chNext = styler.SafeGetCharAt(i + 1);
+// precondition: startPos points to one after the EOL char
+static bool currLineContainsHereDelims(int& startPos,
+                                       Accessor &styler) {
+    if (startPos <= 1)
+        return false;
 
-	// Advance beyond r, u, or ur prefix, but bail if there are any unexpected chars
-	if (ch == 'r' || ch == 'R') {
-		i++;
-		ch = styler.SafeGetCharAt(i);
-		chNext = styler.SafeGetCharAt(i + 1);
-	}
-	else if (ch == 'u' || ch == 'U') {
-		if (chNext == 'r' || chNext == 'R')
-			i += 2;
-		else
-			i += 1;
-		ch = styler.SafeGetCharAt(i);
-		chNext = styler.SafeGetCharAt(i + 1);
-	}
+    int pos;
+    for (pos = startPos - 1; pos > 0; pos--) {
+        char ch = styler.SafeGetCharAt(pos);
+        if (isEOLChar(ch)) {
+            // Leave the pointers where they are -- there are no
+            // here doc delims on the current line, even if
+            // the EOL isn't default style
+            
+            return false;
+        } else {
+            styler.Flush();
+            if (actual_style(styler.StyleAt(pos)) == SCE_RB_HERE_DELIM) {
+                break;
+            }
+        }
+    }
+    if (pos == 0) {
+        return false;
+    }
+    // Update the pointers so we don't have to re-analyze the string
+    startPos = pos;
+    return true;
+}
 
-	if (ch != '"' && ch != '\'') {
-		*nextIndex = i + 1;
-		return SCE_P_DEFAULT;
-	}
 
-	if (i>0 && styler.SafeGetCharAt(i-1) == '$') {
-		*nextIndex = i + 1;
-		return SCE_P_DEFAULT;
-	}
+static bool isEmptyLine(int pos,
+                        Accessor &styler) {
+	int spaceFlags = 0;
+	int lineCurrent = styler.GetLine(pos);
+	int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
+    return (indentCurrent & SC_FOLDLEVELWHITEFLAG) != 0;
+}
 
-	if (ch == chNext && ch == styler.SafeGetCharAt(i + 2)) {
-		*nextIndex = i + 3;
+static bool RE_CanFollowKeyword(const char *keyword) {
+    if (!strcmp(keyword, "and")
+        || !strcmp(keyword, "begin")
+        || !strcmp(keyword, "break")
+        || !strcmp(keyword, "case")
+        || !strcmp(keyword, "do")
+        || !strcmp(keyword, "else")
+        || !strcmp(keyword, "elsif")
+        || !strcmp(keyword, "if")
+        || !strcmp(keyword, "next")
+        || !strcmp(keyword, "return")
+        || !strcmp(keyword, "when")
+        || !strcmp(keyword, "unless")
+        || !strcmp(keyword, "until")
+        || !strcmp(keyword, "not")
+        || !strcmp(keyword, "or")) {
+        return true;
+    }
+    return false;
+}
+    
 
-		if (ch == '"')
-			return SCE_P_TRIPLEDOUBLE;
-		else
-			return SCE_P_TRIPLE;
-	} else {
-		*nextIndex = i + 1;
+//todo: if we aren't looking at a stdio character,
+// move to the start of the first line that is not in a 
+// multi-line construct
 
-		if (ch == '"')
-			return SCE_P_STRING;
-		else
-			return SCE_P_CHARACTER;
-	}
+static void synchronizeDocStart(unsigned int& startPos,
+                                int &length,
+                                int &initStyle,
+                                Accessor &styler,
+                                bool skipWhiteSpace=false) {
+
+    styler.Flush();
+    int style = actual_style(styler.StyleAt(startPos));
+    switch (style) {
+        case SCE_RB_STDIN:
+        case SCE_RB_STDOUT:
+        case SCE_RB_STDERR:
+            // Don't do anything else with these.
+            return;
+    }
+    
+    int pos = startPos;
+    // Quick way to characterize each line
+    int lineStart;
+    for (lineStart = styler.GetLine(pos); lineStart > 0; lineStart--) {
+        // Now look at the style before the previous line's EOL
+        pos = styler.LineStart(lineStart) - 1;
+        if (pos <= 10) {
+            lineStart = 0;
+            break;
+        }
+        char ch = styler.SafeGetCharAt(pos);
+        char chPrev = styler.SafeGetCharAt(pos - 1);
+        if (ch == '\n' && chPrev == '\r') {
+            pos--;
+        }
+        if (styler.SafeGetCharAt(pos - 1) == '\\') {
+            // Continuation line -- keep going
+        } else if (actual_style(styler.StyleAt(pos)) != SCE_RB_DEFAULT) {
+            // Part of multi-line construct -- keep going
+        } else if (currLineContainsHereDelims(pos, styler)) {
+            // Keep going, with pos and length now pointing
+            // at the end of the here-doc delimiter
+        } else if (skipWhiteSpace && isEmptyLine(pos, styler)) {
+            // Keep going
+        } else {
+            break;
+        }
+    }
+    pos = styler.LineStart(lineStart);
+    length += (startPos - pos);
+    startPos = pos;
+    initStyle = SCE_RB_DEFAULT;
 }
 
 static void ColouriseRbDoc(unsigned int startPos, int length, int initStyle,
 						   WordList *keywordlists[], Accessor &styler) {
 
-	int lengthDoc = startPos + length;
+	// Lexer for Ruby often has to backtrack to start of current style to determine
+	// which characters are being used as quotes, how deeply nested is the
+	// start position and what the termination string is for here documents
+    
+	WordList &keywords = *keywordlists[0];
 
-	// Backtrack to previous line in case need to fix its tab whinging
-	if (startPos > 0) {
-		int lineCurrent = styler.GetLine(startPos);
-		if (lineCurrent > 0) {
-			startPos = styler.LineStart(lineCurrent-1);
-			if (startPos == 0)
-				initStyle = SCE_P_DEFAULT;
-			else
-				initStyle = styler.StyleAt(startPos-1);
+	class HereDocCls {
+	public:
+		int State;
+        // States
+        // 0: '<<' encountered
+		// 1: collect the delimiter
+        // 1b: text between the end of the delimiter and the EOL
+		// 2: here doc text (lines after the delimiter)
+		char Quote;		// the char after '<<'
+		bool Quoted;		// true if Quote in ('\'','"','`')
+		int DelimiterLength;	// strlen(Delimiter)
+		char Delimiter[256];	// the Delimiter, limit of 256: from Perl
+        bool CanBeIndented;
+		HereDocCls() {
+			State = 0;
+			DelimiterLength = 0;
+			Delimiter[0] = '\0';
+            CanBeIndented = false;
 		}
-	}
+	};
+	HereDocCls HereDoc;	
 
-	// Ruby uses a different mask because bad indentation is marked by oring with 32
-	styler.StartAt(startPos, 127);
+	class QuoteCls {
+		public:
+		int  Count;
+		char Up;
+		char Down;
+		QuoteCls() {
+			this->New();
+		}
+		void New() {
+			Count = 0;
+			Up    = '\0';
+			Down  = '\0';
+		}
+		void Open(char u) {
+			Count++;
+			Up    = u;
+			Down  = opposite(Up);
+		}
+	};
+	QuoteCls Quote;
 
-	WordList &keywords = *keywordlists[0];
+    int numDots = 0;  // For numbers --
+                      // Don't start lexing in the middle of a num
+
+    synchronizeDocStart(startPos, length, initStyle, styler, // ref args
+                        false);
 
-	int whingeLevel = styler.GetPropertyInt("tab.timmy.whinge.level");
-	char prevWord[200];
+	bool preferRE = true;
+    int state = initStyle;
+	int lengthDoc = startPos + length;
+
+	char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
 	prevWord[0] = '\0';
 	if (length == 0)
-		return ;
-
-	int state = initStyle & 31;
+		return;
 
-	int nextIndex = 0;
-	char chPrev = ' ';
-	char chPrev2 = ' ';
-	char chNext = styler[startPos];
+	char chPrev = styler.SafeGetCharAt(startPos - 1);
+	char chNext = styler.SafeGetCharAt(startPos);
+	// Ruby uses a different mask because bad indentation is marked by oring with 32
+	styler.StartAt(startPos, 127);
 	styler.StartSegment(startPos);
-	bool atStartLine = true;
-	int spaceFlags = 0;
-	for (int i = startPos; i < lengthDoc; i++) {
-
-		if (atStartLine) {
-			char chBad = static_cast<char>(64);
-			char chGood = static_cast<char>(0);
-			char chFlags = chGood;
-			if (whingeLevel == 1) {
-				chFlags = (spaceFlags & wsInconsistent) ? chBad : chGood;
-			} else if (whingeLevel == 2) {
-				chFlags = (spaceFlags & wsSpaceTab) ? chBad : chGood;
-			} else if (whingeLevel == 3) {
-				chFlags = (spaceFlags & wsSpace) ? chBad : chGood;
-			} else if (whingeLevel == 4) {
-				chFlags = (spaceFlags & wsTab) ? chBad : chGood;
-			}
-			styler.SetFlags(chFlags, static_cast<char>(state));
-			atStartLine = false;
-		}
 
+    static int q_states[] = {SCE_RB_STRING_Q,
+                             SCE_RB_STRING_QQ,
+                             SCE_RB_STRING_QR,
+                             SCE_RB_STRING_QW,
+                             SCE_RB_STRING_QW,
+                             SCE_RB_STRING_QX};
+    static const char* q_chars = "qQrwWx";
+    
+	for (int i = startPos; i < lengthDoc; i++) {
 		char ch = chNext;
 		chNext = styler.SafeGetCharAt(i + 1);
 		char chNext2 = styler.SafeGetCharAt(i + 2);
 
-		if ((ch == '\r' && chNext != '\n') || (ch == '\n') || (i == lengthDoc)) {
-			if ((state == SCE_P_DEFAULT) || (state == SCE_P_TRIPLE) || (state == SCE_P_TRIPLEDOUBLE)) {
-				// Perform colourisation of white space and triple quoted strings at end of each line to allow
-				// tab marking to work inside white space and triple quoted strings
-				styler.ColourTo(i, state);
-			}
-			atStartLine = true;
-		}
-
-		if (styler.IsLeadByte(ch)) {
-			chNext = styler.SafeGetCharAt(i + 2);
+        if (styler.IsLeadByte(ch)) {
+			chNext = chNext2;
 			chPrev = ' ';
-			chPrev2 = ' ';
 			i += 1;
 			continue;
 		}
+		
+        // skip on DOS/Windows
+        //No, don't, because some things will get tagged on,
+        // so we won't recognize keywords, for example
+#if 0
+		if (ch == '\r' && chNext == '\n') {
+	    	continue;
+        }
+#endif
+            
+        if (HereDoc.State == 1 && isEOLChar(ch)) {
+			// Begin of here-doc (the line after the here-doc delimiter):
+			HereDoc.State = 2;
+			styler.ColourTo(i-1, state);
+            // Don't check for a missing quote, just jump into
+            // the here-doc state
+            state = SCE_RB_HERE_Q;
+        }
 
-		if (state == SCE_P_STRINGEOL) {
-			if (ch != '\r' && ch != '\n') {
-				styler.ColourTo(i - 1, state);
-				state = SCE_P_DEFAULT;
-			}
-		}
-		if (state == SCE_P_DEFAULT) {
-			if (IsRbWordStart(ch, chNext, chNext2)) {
-				styler.ColourTo(i - 1, state);
-				state = SCE_P_WORD;
+        // Regular transitions
+		if (state == SCE_RB_DEFAULT) {
+            if (isdigit(ch)) {
+            	styler.ColourTo(i - 1, state);
+				state = SCE_RB_NUMBER;
+                numDots = 0;
+            } else if (iswordstart(ch)) {
+            	styler.ColourTo(i - 1, state);
+				state = SCE_RB_WORD;
 			} else if (ch == '#') {
 				styler.ColourTo(i - 1, state);
-				state = chNext == '#' ? SCE_P_COMMENTBLOCK : SCE_P_COMMENTLINE;
-			} else if (ch == '=' && chNext == 'b') {
+				state = SCE_RB_COMMENTLINE;
+			} else if (ch == '=') {
 				// =begin indicates the start of a comment (doc) block
-				if(styler.SafeGetCharAt(i + 2) == 'e' && styler.SafeGetCharAt(i + 3) == 'g' && styler.SafeGetCharAt(i + 4) == 'i' && styler.SafeGetCharAt(i + 5) == 'n') {
+                if (i == 0 || isEOLChar(chPrev)
+                    && chNext == 'b'
+                    && styler.SafeGetCharAt(i + 2) == 'e'
+                    && styler.SafeGetCharAt(i + 3) == 'g'
+                    && styler.SafeGetCharAt(i + 4) == 'i'
+                    && styler.SafeGetCharAt(i + 5) == 'n'
+                    && !iswordchar(styler.SafeGetCharAt(i + 6))) {
+                    styler.ColourTo(i - 1, state);
+                    state = SCE_RB_POD;
+				} else {
 					styler.ColourTo(i - 1, state);
-					state = SCE_P_TRIPLEDOUBLE; //SCE_C_COMMENT;
+					styler.ColourTo(i, SCE_RB_OPERATOR);
+					preferRE = true;
 				}
-			}  else if (IsRbStringStart(ch, chNext, chNext2)) {
+			} else if (ch == '"') {
 				styler.ColourTo(i - 1, state);
-				state = GetRbStringState(styler, i, &nextIndex);
-				if (nextIndex != i + 1) {
-					i = nextIndex - 1;
-					ch = ' ';
-					chPrev = ' ';
-					chNext = styler.SafeGetCharAt(i + 1);
-				}
-			} else if (isoperator(ch)) {
+				state = SCE_RB_STRING;
+				Quote.New();
+				Quote.Open(ch);
+			} else if (ch == '\'') {
+                styler.ColourTo(i - 1, state);
+                state = SCE_RB_CHARACTER;
+                Quote.New();
+                Quote.Open(ch);
+			} else if (ch == '`') {
 				styler.ColourTo(i - 1, state);
-				styler.ColourTo(i, SCE_P_OPERATOR);
-			}
-			} else if (state == SCE_P_WORD) {
-			if (!iswordchar(ch)) {
-				ClassifyWordRb(styler.GetStartSegment(), i - 1, keywords, styler, prevWord);
-				state = SCE_P_DEFAULT;
-				if (ch == '#') {
-					state = chNext == '#' ? SCE_P_COMMENTBLOCK : SCE_P_COMMENTLINE;
-				} else if (IsRbStringStart(ch, chNext, chNext2)) {
-					styler.ColourTo(i - 1, state);
-					state = GetRbStringState(styler, i, &nextIndex);
-					if (nextIndex != i + 1) {
-						i = nextIndex - 1;
-						ch = ' ';
-						chPrev = ' ';
+				state = SCE_RB_BACKTICKS;
+				Quote.New();
+				Quote.Open(ch);
+			} else if (ch == '@') {
+                // Instance or class var
+				styler.ColourTo(i - 1, state);
+                if (chNext == '@') {
+                    state = SCE_RB_CLASS_VAR;
+                    advance_char(i, ch, chNext, chNext2); // pass by ref
+                } else {
+                    state = SCE_RB_INSTANCE_VAR;
+                }
+			} else if (ch == '$') {
+                // Check for a builtin global
+				styler.ColourTo(i - 1, state);
+                // Recognize it bit by bit
+                state = SCE_RB_GLOBAL;
+            } else if (ch == '/' && preferRE) {
+                // Ambigous operator
+				styler.ColourTo(i - 1, state);
+				state = SCE_RB_REGEX;
+                Quote.New();
+                Quote.Open(ch);
+			} else if (ch == '<' && chNext == '<' && chNext2 != '=') {
+
+            // Recognise the '<<' symbol - either a here document or a binary op
+                
+				styler.ColourTo(i - 1, state);
+                i++;
+                chNext = chNext2;
+				styler.ColourTo(i, SCE_RB_OPERATOR);
+                
+                if (preferRE) {
+                    state = SCE_RB_HERE_DELIM;
+				    HereDoc.State = 0;
+                } else {
+                    // leave state as default
+                    // We don't have all the heuristics Perl has for indications
+                    // of a here-doc, because '<<' is overloadable and used
+                    // for so many other classes.
+					preferRE = true;
+                }
+            } else if (ch == ':') {
+				styler.ColourTo(i - 1, state);
+                if (chNext == ':') {
+                    // Mark "::" as an operator, not symbol start
+                    styler.ColourTo(i + 1, SCE_RB_OPERATOR);
+                    advance_char(i, ch, chNext, chNext2); // pass by ref
+                    state = SCE_RB_DEFAULT;
+					preferRE = false;
+                } else if (iswordchar(chNext)) {
+					state = SCE_RB_SYMBOL;
+                } else if (strchr("[*!~+-*/%=<>&^|", chNext)) {
+                    // Do the operator analysis in-line, looking ahead
+                    // Based on the table in pickaxe 2nd ed., page 339
+                    bool doColoring = true;
+                    switch (chNext) {
+                    case '[':
+                        if (chNext2 == ']' ) {
+                            char ch_tmp = styler.SafeGetCharAt(i + 3);
+                            if (ch_tmp == '=') {
+                                i += 3;
+                                ch = ch_tmp;
+                                chNext = styler.SafeGetCharAt(i + 1);
+                            } else {
+                                i += 2;
+                                ch = chNext2;
+                                chNext = ch_tmp;
+                            }
+                        } else {
+                            doColoring = false;
+                        }
+                        break;
+
+                    case '*':
+                        if (chNext2 == '*') {
+                            i += 2;
+                            ch = chNext2;
+                            chNext = styler.SafeGetCharAt(i + 1);
+                        } else {
+                            advance_char(i, ch, chNext, chNext2);
+                        }
+                        break;
+
+                    case '!':
+                        if (chNext2 == '=' || chNext2 == '~') {
+                            i += 2;
+                            ch = chNext2;
+                            chNext = styler.SafeGetCharAt(i + 1);
+                        } else {
+                            advance_char(i, ch, chNext, chNext2);
+                        }
+                        break;
+
+                    case '<':
+                        if (chNext2 == '<') {
+                            i += 2;
+                            ch = chNext2;
+                            chNext = styler.SafeGetCharAt(i + 1);
+                        } else if (chNext2 == '=') {
+                            char ch_tmp = styler.SafeGetCharAt(i + 3);
+                            if (ch_tmp == '>') {  // <=> operator
+                                i += 3;
+                                ch = ch_tmp;
+                                chNext = styler.SafeGetCharAt(i + 1);
+                            } else {
+                                i += 2;
+                                ch = chNext2;
+                                chNext = ch_tmp;
+                            }
+                        } else {
+                            advance_char(i, ch, chNext, chNext2);
+                        }
+                        break;
+
+                    default:
+                        // Simple one-character operators
+                        advance_char(i, ch, chNext, chNext2);
+                        break;
+                    }
+                    if (doColoring) {
+                        styler.ColourTo(i, SCE_RB_SYMBOL);
+                        state = SCE_RB_DEFAULT;
+                    }
+				} else if (!preferRE) {
+					// Don't color symbol strings (yet)
+					// Just color the ":" and color rest as string
+					styler.ColourTo(i, SCE_RB_SYMBOL);
+					state = SCE_RB_DEFAULT;
+                } else {
+                    styler.ColourTo(i, SCE_RB_OPERATOR);
+                    state = SCE_RB_DEFAULT;
+                    preferRE = true;
+                }
+            } else if (ch == '%') {
+                styler.ColourTo(i - 1, state);
+                bool have_string = false;
+                if (strchr(q_chars, chNext) && !iswordchar(chNext2)) {
+                    Quote.New();
+                    const char *hit = strchr(q_chars, chNext);
+                    if (hit != NULL) {
+                        state = q_states[hit - q_chars];
+                        Quote.Open(chNext2);
+                        i += 2;
+                        ch = chNext2;
 						chNext = styler.SafeGetCharAt(i + 1);
+                        have_string = true;
+                    }
+                } else if (!iswordchar(chNext)) {
+                    state = SCE_RB_STRING_QQ;
+                    Quote.Open(chNext);
+                    advance_char(i, ch, chNext, chNext2); // pass by ref
+                    have_string = true;
+                }
+                if (!have_string) {
+                    styler.ColourTo(i, SCE_RB_OPERATOR);
+                    // stay in default
+                    preferRE = true;
+                }
+            } else if (isoperator(ch)) {
+				styler.ColourTo(i - 1, state);
+				styler.ColourTo(i, SCE_RB_OPERATOR);
+                // If we're ending an expression or block,
+                // assume it ends an object, and the ambivalent
+                // constructs are binary operators
+                //
+                // So if we don't have one of these chars,
+                // we aren't ending an object exp'n, and ops
+                // like : << / are unary operators.
+                
+                preferRE = (strchr(")}]", ch) == NULL);
+                // Stay in default state
+            } else if (isEOLChar(ch)) {
+                // Make sure it's a true line-end, with no backslash
+                if ((ch == '\r' || (ch == '\n' && chPrev != '\r'))
+                    && chPrev != '\\') {
+                    // Assume we've hit the end of the statement.
+                    preferRE = true;
+                }
+            }
+        } else if (state == SCE_RB_WORD) {
+            if (ch == '.' || !iswordchar(ch)) {
+                // Words include x? in all contexts,
+                // and <letters>= after either 'def' or a dot
+                // Move along until a complete word is on our left
+
+                // Default accessor treats '.' as word-chars,
+                // but we don't for now.
+                
+                if (ch == '='
+                    && iswordchar(chPrev)
+                    && (chNext == '('
+                        || strchr(" \t\n\r", chNext) != NULL)
+                    && (!strcmp(prevWord, "def")
+                        || followsDot(styler.GetStartSegment(), styler))) {
+                    // <name>= is a name only when being def'd -- Get it the next time
+                    // This means that <name>=<name> is always lexed as
+                    // <name>, (op, =), <name>
+                } else if ((ch == '?' || ch == '!')
+                           && iswordchar(chPrev)
+                           && !iswordchar(chNext)) {
+                    // <name>? is a name -- Get it the next time
+                    // But <name>?<name> is always lexed as
+                    // <name>, (op, ?), <name>
+                    // Same with <name>! to indicate a method that
+                    // modifies its target
+                } else if (isEOLChar(ch)
+                           && isMatch(styler, lengthDoc, i - 7, "__END__")) {
+                    styler.ColourTo(i, SCE_RB_DATASECTION);
+                    state = SCE_RB_DATASECTION;
+                    // No need to handle this state -- we'll just move to the end
+                    preferRE = false;
+                } else {
+					int wordStartPos = styler.GetStartSegment();
+                    int word_style = ClassifyWordRb(wordStartPos, i - 1, keywords, styler, prevWord);
+                    switch (word_style) {
+                        case SCE_RB_WORD:
+                            preferRE = RE_CanFollowKeyword(prevWord);
+							break;
+                            
+                        case SCE_RB_WORD_DEMOTED:
+                            preferRE = true;
+							break;
+                            
+                        case SCE_RB_IDENTIFIER:
+                            if (isMatch(styler, lengthDoc, wordStartPos, "print")) {
+                                preferRE = true;
+                            } else if (isEOLChar(ch)) {
+                                preferRE = true;
+                            } else {
+                                preferRE = false;
+                            }
+							break;
+                        default:
+                            preferRE = false;
+                    }
+                    redo_char(i, ch, chNext, chNext2, state); // pass by ref
+                }
+            }
+        } else if (state == SCE_RB_NUMBER) {
+            if (isalnum(ch) || ch == '_') {
+                // Keep going
+            } else if (ch == '.' && ++numDots == 1) {
+                // Keep going
+            } else {
+                styler.ColourTo(i - 1, state);
+                redo_char(i, ch, chNext, chNext2, state); // pass by ref
+                preferRE = false;
+            }
+        } else if (state == SCE_RB_COMMENTLINE) {
+			if (isEOLChar(ch)) {
+                styler.ColourTo(i - 1, state);
+                state = SCE_RB_DEFAULT;
+                // Use whatever setting we had going into the comment
+            }
+        } else if (state == SCE_RB_HERE_DELIM) {
+            // See the comment for SCE_RB_HERE_DELIM in LexPerl.cxx
+            // Slightly different: if we find an immediate '-',
+            // the target can appear indented.
+            
+			if (HereDoc.State == 0) { // '<<' encountered
+				HereDoc.State = 1;
+                HereDoc.DelimiterLength = 0;
+                if (ch == '-') {
+                    HereDoc.CanBeIndented = true;
+                    advance_char(i, ch, chNext, chNext2); // pass by ref
+                } else {
+                    HereDoc.CanBeIndented = false;
+                }
+                if (isEOLChar(ch)) {
+                    // Bail out of doing a here doc if there's no target
+                    state = SCE_RB_DEFAULT;
+                    preferRE = false;
+                } else {
+                    HereDoc.Quote = ch;
+                
+                    if (ch == '\'' || ch == '"' || ch == '`') {
+                        HereDoc.Quoted = true;
+                        HereDoc.Delimiter[0] = '\0';
+                    } else {
+                        HereDoc.Quoted = false;
+                        HereDoc.Delimiter[0] = ch;
+                        HereDoc.Delimiter[1] = '\0';
+                        HereDoc.DelimiterLength = 1;
+                    }
+                }
+			} else if (HereDoc.State == 1) { // collect the delimiter
+                if (isEOLChar(ch)) {
+                    // End the quote now, and go back for more
+                    styler.ColourTo(i - 1, state);
+                    state = SCE_RB_DEFAULT;
+                    i--;
+                    chNext = ch;
+                    chNext2 = chNext;
+                    preferRE = false;
+                } else if (HereDoc.Quoted) {
+					if (ch == HereDoc.Quote) { // closing quote => end of delimiter
+						styler.ColourTo(i, state);
+						state = SCE_RB_DEFAULT;
+                        preferRE = false;
+                    } else {
+						if (ch == '\\' && !isEOLChar(chNext)) {
+                            advance_char(i, ch, chNext, chNext2);
+						}
+						HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
+						HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
+                    }
+                } else { // an unquoted here-doc delimiter
+					if (isalnum(ch) || ch == '_') {
+						HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
+						HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
+					} else {
+						styler.ColourTo(i - 1, state);
+                        redo_char(i, ch, chNext, chNext2, state);
+                        preferRE = false;
 					}
-				} else if (isoperator(ch)) {
-					styler.ColourTo(i, SCE_P_OPERATOR);
-				}
-			}
-		} else {
-			if (state == SCE_P_COMMENTLINE || state == SCE_P_COMMENTBLOCK) {
-				if (ch == '\r' || ch == '\n') {
+                }
+				if (HereDoc.DelimiterLength >= static_cast<int>(sizeof(HereDoc.Delimiter)) - 1) {
 					styler.ColourTo(i - 1, state);
-					state = SCE_P_DEFAULT;
-				}
-			} else if (state == SCE_P_STRING) {
-				if ((ch == '\r' || ch == '\n') && (chPrev != '\\')) {
-					styler.ColourTo(i - 1, state);
-					state = SCE_P_STRINGEOL;
-				} else if (ch == '\\') {
-					if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
-						i++;
-						ch = chNext;
-						chNext = styler.SafeGetCharAt(i + 1);
-					}
-				} else if (ch == '\"') {
-					styler.ColourTo(i, state);
-					state = SCE_P_DEFAULT;
+					state = SCE_RB_ERROR;
+                    preferRE = false;
 				}
-			} else if (state == SCE_P_CHARACTER) {
-				if ((ch == '\r' || ch == '\n') && (chPrev != '\\')) {
-					styler.ColourTo(i - 1, state);
-					state = SCE_P_STRINGEOL;
-				} else if (ch == '\\') {
-					if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
-						i++;
+            }
+        } else if (state == SCE_RB_HERE_Q) {
+            // Not needed: HereDoc.State == 2
+            // Indentable here docs: look backwards
+            // Non-indentable: look forwards, like in Perl
+            //
+            // Why: so we can quickly resolve things like <<-" abc"
+
+            if (!HereDoc.CanBeIndented) {
+                if (isEOLChar(chPrev)
+                    && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
+                    styler.ColourTo(i - 1, state);
+                    i += HereDoc.DelimiterLength - 1;
+                    chNext = styler.SafeGetCharAt(i + 1);
+                    if (isEOLChar(chNext)) {
+                        styler.ColourTo(i, SCE_RB_HERE_DELIM);
+                        state = SCE_RB_DEFAULT;
+                        HereDoc.State = 0;
+                        preferRE = false;
+                    }
+                    // Otherwise we skipped through the here doc faster.
+                }
+            } else if (isEOLChar(chNext)
+                       && lookingAtHereDocDelim(styler,
+                                                i - HereDoc.DelimiterLength + 1,
+                                                lengthDoc,
+                                                HereDoc.Delimiter)) {
+                styler.ColourTo(i - 1 - HereDoc.DelimiterLength, state);
+                styler.ColourTo(i, SCE_RB_HERE_DELIM);
+                state = SCE_RB_DEFAULT;
+                preferRE = false;
+                HereDoc.State = 0;
+            }
+        } else if (state == SCE_RB_CLASS_VAR
+                   || state == SCE_RB_INSTANCE_VAR
+                   || state == SCE_RB_SYMBOL) {
+            if (!iswordchar(ch)) {
+                styler.ColourTo(i - 1, state);
+                redo_char(i, ch, chNext, chNext2, state); // pass by ref
+                preferRE = false;
+            }
+        } else if (state == SCE_RB_GLOBAL) {
+            if (!iswordchar(ch)) {
+                // handle special globals here as well
+                if (chPrev == '$') {
+                    if (ch == '-') {
+                        // Include the next char, like $-a
+                        advance_char(i, ch, chNext, chNext2);
+                    }
+                    styler.ColourTo(i, state);
+                    state = SCE_RB_DEFAULT;
+                } else {
+                    styler.ColourTo(i - 1, state);
+                    redo_char(i, ch, chNext, chNext2, state); // pass by ref
+                }
+                preferRE = false;
+            }
+        } else if (state == SCE_RB_POD) {
+            // PODs end with ^=end\s, -- any whitespace can follow =end
+            if (strchr(" \t\n\r", ch) != NULL
+                && i > 5
+                && isEOLChar(styler[i - 5])
+                && isMatch(styler, lengthDoc, i - 4, "=end")) {
+                styler.ColourTo(i - 1, state);
+                state = SCE_RB_DEFAULT;
+                preferRE = false;
+            }
+        } else if (state == SCE_RB_REGEX || state == SCE_RB_STRING_QR) {
+            if (ch == '\\' && Quote.Up != '\\') {
+                // Skip one
+                advance_char(i, ch, chNext, chNext2);
+            } else if (ch == Quote.Down) {
+                Quote.Count--;
+                if (Quote.Count == 0) {
+                    // Include the options
+                    while (isSafeAlpha(chNext)) {
+                        i++;
 						ch = chNext;
-						chNext = styler.SafeGetCharAt(i + 1);
-					}
-				} else if (ch == '\'') {
-					styler.ColourTo(i, state);
-					state = SCE_P_DEFAULT;
-				}
-			} else if (state == SCE_P_TRIPLE) {
-				if (ch == '\'' && chPrev == '\'' && chPrev2 == '\'') {
-					styler.ColourTo(i, state);
-					state = SCE_P_DEFAULT;
-				}
-			} else if (state == SCE_P_TRIPLEDOUBLE) {
-				// =end terminates the comment block
-				if (ch == 'd' && chPrev == 'n' && chPrev2 == 'e') {
-					if  (styler.SafeGetCharAt(i - 3) == '=') {
-						styler.ColourTo(i, state);
-						state = SCE_P_DEFAULT;
-					}
-				}
+                        chNext = styler.SafeGetCharAt(i + 1);
+                    }
+                    styler.ColourTo(i, state);
+                    state = SCE_RB_DEFAULT;
+                    preferRE = false;
+                }
+            } else if (ch == Quote.Up) {
+                // Only if close quoter != open quoter
+                Quote.Count++;
+                
+            } else if (ch == '#' ) {
+                //todo: distinguish comments from pound chars
+                // for now, handle as comment
+                styler.ColourTo(i - 1, state);
+                bool inEscape = false;
+                while (++i < lengthDoc) {
+                    ch = styler.SafeGetCharAt(i);
+                    if (ch == '\\') {
+                        inEscape = true;
+                    } else if (isEOLChar(ch)) {
+                        // Comment inside a regex
+                        styler.ColourTo(i - 1, SCE_RB_COMMENTLINE);
+                        break;
+                    } else if (inEscape) {
+                        inEscape = false;  // don't look at char
+                    } else if (ch == Quote.Down) {
+                        // Have the regular handler deal with this
+                        // to get trailing modifiers.
+                        i--;
+                        ch = styler[i];
+						break;
+                    }
+                }
+                chNext = styler.SafeGetCharAt(i + 1);
+                chNext2 = styler.SafeGetCharAt(i + 2);
+            }
+        // Quotes of all kinds...
+        } else if (state == SCE_RB_STRING_Q || state == SCE_RB_STRING_QQ || 
+                   state == SCE_RB_STRING_QX || state == SCE_RB_STRING_QW ||
+                   state == SCE_RB_STRING || state == SCE_RB_CHARACTER ||
+                   state == SCE_RB_BACKTICKS) {
+            if (!Quote.Down && !isspacechar(ch)) {
+                Quote.Open(ch);
+            } else if (ch == '\\' && Quote.Up != '\\') {
+                //Riddle me this: Is it safe to skip *every* escaped char?
+                advance_char(i, ch, chNext, chNext2);
+            } else if (ch == Quote.Down) {
+                Quote.Count--;
+                if (Quote.Count == 0) {
+                    styler.ColourTo(i, state);
+                    state = SCE_RB_DEFAULT;
+                    preferRE = false;
+                }
+            } else if (ch == Quote.Up) {
+                Quote.Count++;
+            }
+        }
+            
+        if (state == SCE_RB_ERROR) {
+            break;
+        }
+        chPrev = ch;
+    }
+    if (state == SCE_RB_WORD) {
+        // We've ended on a word, possibly at EOF, and need to
+        // classify it.
+        (void) ClassifyWordRb(styler.GetStartSegment(), lengthDoc - 1, keywords, styler, prevWord);
+    } else {
+        styler.ColourTo(lengthDoc - 1, state);
+    }
+}
+
+// Helper functions for folding
+
+static void getPrevWord(int pos,
+                        char *prevWord,
+                        Accessor &styler,
+                        int word_state)
+{
+    int i;
+    styler.Flush();
+    for (i = pos - 1; i > 0; i--) {
+        if (actual_style(styler.StyleAt(i)) != word_state) {
+            i++;
+            break;
+        }
+    }
+    if (i < pos - MAX_KEYWORD_LENGTH) // overflow
+        i = pos - MAX_KEYWORD_LENGTH;
+    char *dst = prevWord;
+    for (; i <= pos; i++) {
+        *dst++ = styler[i];
+    }
+	*dst = 0;
+}
+
+static bool keywordIsAmbiguous(const char *prevWord)
+{
+    // Order from most likely used to least likely
+    // Lots of ways to do a loop in Ruby besides 'while/until'
+    if (!strcmp(prevWord, "if")
+        || !strcmp(prevWord, "do")
+        || !strcmp(prevWord, "while")
+        || !strcmp(prevWord, "unless")
+        || !strcmp(prevWord, "until")) {
+        return true;
+    } else {
+        return false;
+    }
+}
+
+static bool inline iswhitespace(char ch) {
+	return ch == ' ' || ch == '\t';
+}
+
+// Demote keywords in the following conditions:
+// if, while, unless, until modify a statement
+// do after a while or until, as a noise word (like then after if) 
+
+static bool keywordIsModifier(const char *word,
+                              int pos,
+                              Accessor &styler)
+{
+    if (word[0] == 'd' && word[1] == 'o' && !word[2]) {
+        return keywordDoStartsLoop(pos, styler);
+    }
+    char ch;
+    int style = SCE_RB_DEFAULT;
+	int lineStart = styler.GetLine(pos);
+    int lineStartPosn = styler.LineStart(lineStart);
+    styler.Flush();
+    while (--pos >= lineStartPosn) {
+        style = actual_style(styler.StyleAt(pos));
+		if (style == SCE_RB_DEFAULT) {
+			if (iswhitespace(ch = styler[pos])) {
+				//continue
+			} else if (ch == '\r' || ch == '\n') {
+				// Scintilla's LineStart() and GetLine() routines aren't
+				// platform-independent, so if we have text prepared with
+				// a different system we can't rely on it.
+				return false;
 			}
+		} else {
+            break;
 		}
-		chPrev2 = chPrev;
-		chPrev = ch;
-	}
-	if (state == SCE_P_WORD) {
-		ClassifyWordRb(styler.GetStartSegment(), lengthDoc-1, keywords, styler, prevWord);
-	} else {
-		styler.ColourTo(lengthDoc-1, state);
-	}
+    }
+    if (pos < lineStartPosn) {
+        return false; //XXX not quite right if the prev line is a continuation
+    }
+    // First things where the action is unambiguous
+    switch (style) {
+        case SCE_RB_DEFAULT:
+        case SCE_RB_COMMENTLINE:
+        case SCE_RB_POD:
+        case SCE_RB_CLASSNAME:
+        case SCE_RB_DEFNAME:
+        case SCE_RB_MODULE_NAME:
+            return false;
+        case SCE_RB_OPERATOR:
+            break;
+        case SCE_RB_WORD:
+            // Watch out for uses of 'else if'
+            //XXX: Make a list of other keywords where 'if' isn't a modifier
+            //     and can appear legitimately
+            // Formulate this to avoid warnings from most compilers
+            if (strcmp(word, "if") == 0) {
+                char prevWord[MAX_KEYWORD_LENGTH + 1];
+                getPrevWord(pos, prevWord, styler, SCE_RB_WORD);
+                return strcmp(prevWord, "else") != 0;
+            }
+            return true;
+        default:
+            return true;
+    }
+    // Assume that if the keyword follows an operator,
+    // usually it's a block assignment, like
+    // a << if x then y else z
+    
+    ch = styler[pos];
+    switch (ch) {
+        case ')':
+        case ']':
+        case '}':
+            return true;
+        default:
+            return false;
+    }
 }
 
-static void FoldRbDoc(unsigned int startPos, int length, int initStyle,
-						   WordList *[], Accessor &styler) {
-	int lengthDoc = startPos + length;
+#define WHILE_BACKWARDS "elihw"
+#define UNTIL_BACKWARDS "litnu"
 
-	// Backtrack to previous line in case need to fix its fold status
+// Nothing fancy -- look to see if we follow a while/until somewhere
+// on the current line
+
+static bool keywordDoStartsLoop(int pos,
+                                Accessor &styler)
+{
+    char ch;
+    int style;
+	int lineStart = styler.GetLine(pos);
+    int lineStartPosn = styler.LineStart(lineStart);
+    styler.Flush();
+    while (--pos >= lineStartPosn) {
+        style = actual_style(styler.StyleAt(pos));
+		if (style == SCE_RB_DEFAULT) {
+			if ((ch = styler[pos]) == '\r' || ch == '\n') {
+				// Scintilla's LineStart() and GetLine() routines aren't
+				// platform-independent, so if we have text prepared with
+				// a different system we can't rely on it.
+				return false;
+			}
+		} else if (style == SCE_RB_WORD) {
+            // Check for while or until, but write the word in backwards
+            char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
+            char *dst = prevWord;
+            int wordLen = 0;
+            int start_word;
+            for (start_word = pos;
+                 start_word >= lineStartPosn && actual_style(styler.StyleAt(start_word)) == SCE_RB_WORD;
+                 start_word--) {
+                if (++wordLen < MAX_KEYWORD_LENGTH) {
+                    *dst++ = styler[start_word];
+                }
+            }
+            *dst = 0;
+            // Did we see our keyword?
+            if (!strcmp(prevWord, WHILE_BACKWARDS)
+                || !strcmp(prevWord, UNTIL_BACKWARDS)) {
+                return true;
+            }
+            // We can move pos to the beginning of the keyword, and then
+            // accept another decrement, as we can never have two contiguous
+            // keywords:
+            // word1 word2
+            //           ^
+            //        <-  move to start_word
+            //      ^
+            //      <- loop decrement
+            //     ^  # pointing to end of word1 is fine
+            pos = start_word;
+        }
+    }
+    return false;
+}
+
+/*
+ *  Folding Ruby
+ * 
+ *  The language is quite complex to analyze without a full parse.
+ *  For example, this line shouldn't affect fold level:
+ * 
+ *   print "hello" if feeling_friendly?
+ * 
+ *  Neither should this:
+ * 
+ *   print "hello" \
+ *      if feeling_friendly?
+ * 
+ * 
+ *  But this should:
+ * 
+ *   if feeling_friendly?  #++
+ *     print "hello" \
+ *     print "goodbye"
+ *   end                   #--
+ * 
+ *  So we cheat, by actually looking at the existing indentation
+ *  levels for each line, and just echoing it back.  Like Python.
+ *  Then if we get better at it, we'll take braces into consideration,
+ *  which always affect folding levels.
+
+ *  How the keywords should work:
+ *  No effect:
+ *  __FILE__ __LINE__ BEGIN END alias and 
+ *  defined? false in nil not or self super then
+ *  true undef
+
+ *  Always increment:
+ *  begin  class def do for module when {
+ * 
+ *  Always decrement:
+ *  end }
+ * 
+ *  Increment if these start a statement
+ *  if unless until while -- do nothing if they're modifiers
+
+ *  These end a block if there's no modifier, but don't bother
+ *  break next redo retry return yield
+ * 
+ *  These temporarily de-indent, but re-indent
+ *  case else elsif ensure rescue
+ * 
+ *  This means that the folder reflects indentation rather
+ *  than setting it.  The language-service updates indentation
+ *  when users type return and finishes entering de-denters.
+ * 
+ *  Later offer to fold POD, here-docs, strings, and blocks of comments
+ */
+
+static void FoldRbDoc(unsigned int startPos, int length, int initStyle,
+                      WordList *[], Accessor &styler) {
+	const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
+	bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
+    
+    synchronizeDocStart(startPos, length, initStyle, styler, // ref args
+                        false);
+	unsigned int endPos = startPos + length;
+	int visibleChars = 0;
 	int lineCurrent = styler.GetLine(startPos);
-	if (startPos > 0) {
-		if (lineCurrent > 0) {
-			lineCurrent--;
-			startPos = styler.LineStart(lineCurrent);
-			if (startPos == 0)
-				initStyle = SCE_P_DEFAULT;
-			else
-				initStyle = styler.StyleAt(startPos-1);
-		}
-	}
-	int state = initStyle & 31;
-	int spaceFlags = 0;
-	int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, IsRbComment);
-	if ((state == SCE_P_TRIPLE) || (state == SCE_P_TRIPLEDOUBLE))
-		indentCurrent |= SC_FOLDLEVELWHITEFLAG;
+	int levelPrev = startPos == 0 ? 0 : (styler.LevelAt(lineCurrent)
+                                         & SC_FOLDLEVELNUMBERMASK
+                                         & ~SC_FOLDLEVELBASE);
+	int levelCurrent = levelPrev;
 	char chNext = styler[startPos];
-	for (int i = startPos; i < lengthDoc; i++) {
+	int styleNext = styler.StyleAt(startPos);
+	int stylePrev = startPos <= 1 ? SCE_RB_DEFAULT : styler.StyleAt(startPos - 1);
+    bool buffer_ends_with_eol = false;
+	for (unsigned int i = startPos; i < endPos; i++) {
 		char ch = chNext;
 		chNext = styler.SafeGetCharAt(i + 1);
-		int style = styler.StyleAt(i) & 31;
-
-		if ((ch == '\r' && chNext != '\n') || (ch == '\n') || (i == lengthDoc)) {
-			int lev = indentCurrent;
-			int indentNext = styler.IndentAmount(lineCurrent + 1, &spaceFlags, IsRbComment);
-			if ((style == SCE_P_TRIPLE) || (style== SCE_P_TRIPLEDOUBLE))
-				indentNext |= SC_FOLDLEVELWHITEFLAG;
-			if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG)) {
-				// Only non whitespace lines can be headers
-				if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) < (indentNext & SC_FOLDLEVELNUMBERMASK)) {
-					lev |= SC_FOLDLEVELHEADERFLAG;
-				} else if (indentNext & SC_FOLDLEVELWHITEFLAG) {
-					// Line after is blank so check the next - maybe should continue further?
-					int spaceFlags2 = 0;
-					int indentNext2 = styler.IndentAmount(lineCurrent + 2, &spaceFlags2, IsRbComment);
-					if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) < (indentNext2 & SC_FOLDLEVELNUMBERMASK)) {
-						lev |= SC_FOLDLEVELHEADERFLAG;
-					}
+		int style = styleNext;
+		styleNext = styler.StyleAt(i + 1);
+		bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
+        if (style == SCE_RB_COMMENTLINE) {
+            if (foldComment && stylePrev != SCE_RB_COMMENTLINE) {
+                if (chNext == '{') {
+					levelCurrent++;
+				} else if (chNext == '}') {
+					levelCurrent--;
 				}
+            }
+        } else if (style == SCE_RB_OPERATOR) {
+			if (strchr("[{(", ch)) {
+				levelCurrent++;
+			} else if (strchr(")}]", ch)) {
+                // Don't decrement below 0
+                if (levelCurrent > 0)
+                    levelCurrent--;
 			}
-			indentCurrent = indentNext;
-			styler.SetLevel(lineCurrent, lev);
+        } else if (style == SCE_RB_WORD && styleNext != SCE_RB_WORD) {
+            // Look at the keyword on the left and decide what to do
+            char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
+            prevWord[0] = 0;
+            getPrevWord(i, prevWord, styler, SCE_RB_WORD);
+            if (!strcmp(prevWord, "end")) {
+                // Don't decrement below 0
+                if (levelCurrent > 0)
+                    levelCurrent--;
+            } else if (   !strcmp(prevWord, "if")
+                       || !strcmp(prevWord, "def")
+                       || !strcmp(prevWord, "class")
+                       || !strcmp(prevWord, "module")
+                       || !strcmp(prevWord, "begin")
+                       || !strcmp(prevWord, "case")
+                       || !strcmp(prevWord, "do")
+                       || !strcmp(prevWord, "while")
+                       || !strcmp(prevWord, "unless")
+                       || !strcmp(prevWord, "until")
+                       || !strcmp(prevWord, "for")
+                          ) {
+				levelCurrent++;
+            }
+        }
+		if (atEOL) {
+			int lev = levelPrev;
+			if (visibleChars == 0 && foldCompact)
+				lev |= SC_FOLDLEVELWHITEFLAG;
+			if ((levelCurrent > levelPrev) && (visibleChars > 0))
+				lev |= SC_FOLDLEVELHEADERFLAG;
+            styler.SetLevel(lineCurrent, lev|SC_FOLDLEVELBASE);
 			lineCurrent++;
-		}
-	}
+			levelPrev = levelCurrent;
+			visibleChars = 0;
+            buffer_ends_with_eol = true;
+		} else if (!isspacechar(ch)) {
+			visibleChars++;
+            buffer_ends_with_eol = false;
+        }
+    }
+	// Fill in the real level of the next line, keeping the current flags as they will be filled in later
+    if (!buffer_ends_with_eol) {
+        lineCurrent++;
+        int new_lev = levelCurrent;
+        if (visibleChars == 0 && foldCompact)
+            new_lev |= SC_FOLDLEVELWHITEFLAG;
+			if ((levelCurrent > levelPrev) && (visibleChars > 0))
+				new_lev |= SC_FOLDLEVELHEADERFLAG;
+            levelCurrent = new_lev;
+    }
+	styler.SetLevel(lineCurrent, levelCurrent|SC_FOLDLEVELBASE);
 }
 
 static const char * const rubyWordListDesc[] = {
author	nyamatongwe <unknown>	2005-08-25 23:34:00 +0000
committer	nyamatongwe <unknown>	2005-08-25 23:34:00 +0000
commit	477b5db0a414f597e4aa8731811aeea7a0d03609 (patch)
tree	525902d6fa25c1f8c60f26a9ab18a09cbb10a698 /src
parent	16e93df078b92211981df7f50777aeee582e707d (diff)
download	scintilla-mirror-477b5db0a414f597e4aa8731811aeea7a0d03609.tar.gz