From d6ac5bf56d40512ac0634d7a5bee6f7328b7d41f Mon Sep 17 00:00:00 2001 From: nyamatongwe Date: Sat, 19 Jan 2013 11:40:47 +1100 Subject: Support the three Unicode line ends NEL, LS, and PS in CellBuffer, Document, Editor and the message interface. Will only be turned on for lexers that support Unicode line ends. --- src/CellBuffer.cxx | 115 ++++++++++++++++++++++++++++++++++++++++++++++++---- src/CellBuffer.h | 5 +++ src/Document.cxx | 34 +++++++++++++++- src/Document.h | 4 ++ src/Editor.cxx | 18 ++++++++ src/UniConversion.h | 13 ++++++ 6 files changed, 181 insertions(+), 8 deletions(-) (limited to 'src') diff --git a/src/CellBuffer.cxx b/src/CellBuffer.cxx index 11b8b4acd..7bb96ca76 100644 --- a/src/CellBuffer.cxx +++ b/src/CellBuffer.cxx @@ -16,6 +16,7 @@ #include "SplitVector.h" #include "Partitioning.h" #include "CellBuffer.h" +#include "UniConversion.h" #ifdef SCI_NAMESPACE using namespace Scintilla; @@ -331,6 +332,7 @@ void UndoHistory::CompletedRedoStep() { CellBuffer::CellBuffer() { readOnly = false; + utf8LineEnds = 0; collectingUndo = true; } @@ -458,6 +460,13 @@ void CellBuffer::Allocate(int newSize) { style.ReAllocate(newSize); } +void CellBuffer::SetLineEndTypes(int utf8LineEnds_) { + if (utf8LineEnds != utf8LineEnds_) { + utf8LineEnds = utf8LineEnds_; + ResetLineEnds(); + } +} + void CellBuffer::SetPerLine(PerLine *pl) { lv.SetPerLine(pl); } @@ -501,11 +510,64 @@ void CellBuffer::RemoveLine(int line) { lv.RemoveLine(line); } +bool CellBuffer::UTF8LineEndOverlaps(int position) const { + unsigned char bytes[] = { + static_cast(substance.ValueAt(position-2)), + static_cast(substance.ValueAt(position-1)), + static_cast(substance.ValueAt(position)), + static_cast(substance.ValueAt(position+1)), + }; + return UTF8IsSeparator(bytes) || UTF8IsSeparator(bytes+1) || UTF8IsNEL(bytes+1); +} + +void CellBuffer::ResetLineEnds() { + // Reinitialize line data -- too much work to preserve + lv.Init(); + + int position = 0; + int length = Length(); + int lineInsert = 1; + bool atLineStart = true; + lv.InsertText(lineInsert-1, length); + unsigned char chBeforePrev = 0; + unsigned char chPrev = 0; + unsigned char ch = ' '; + for (int i = 0; i < length; i++) { + ch = substance.ValueAt(position + i); + if (ch == '\r') { + InsertLine(lineInsert, (position + i) + 1, atLineStart); + lineInsert++; + } else if (ch == '\n') { + if (chPrev == '\r') { + // Patch up what was end of line + lv.SetLineStart(lineInsert - 1, (position + i) + 1); + } else { + InsertLine(lineInsert, (position + i) + 1, atLineStart); + lineInsert++; + } + } else if (utf8LineEnds) { + unsigned char back3[3] = {chBeforePrev, chPrev, ch}; + if (UTF8IsSeparator(back3) || UTF8IsNEL(back3+1)) { + InsertLine(lineInsert, (position + i) + 1, atLineStart); + lineInsert++; + } + } + chBeforePrev = chPrev; + chPrev = ch; + } +} + void CellBuffer::BasicInsertString(int position, const char *s, int insertLength) { if (insertLength == 0) return; PLATFORM_ASSERT(insertLength > 0); + unsigned char chAfter = substance.ValueAt(position); + bool breakingUTF8LineEnd = false; + if (utf8LineEnds && UTF8IsTrailByte(chAfter)) { + breakingUTF8LineEnd = UTF8LineEndOverlaps(position); + } + substance.InsertFromArray(position, s, 0, insertLength); style.InsertValue(position, insertLength, 0); @@ -513,14 +575,17 @@ void CellBuffer::BasicInsertString(int position, const char *s, int insertLength bool atLineStart = lv.LineStart(lineInsert-1) == position; // Point all the lines after the insertion point further along in the buffer lv.InsertText(lineInsert-1, insertLength); - char chPrev = substance.ValueAt(position - 1); - char chAfter = substance.ValueAt(position + insertLength); + unsigned char chBeforePrev = substance.ValueAt(position - 2); + unsigned char chPrev = substance.ValueAt(position - 1); if (chPrev == '\r' && chAfter == '\n') { // Splitting up a crlf pair at position InsertLine(lineInsert, position, false); lineInsert++; } - char ch = ' '; + if (breakingUTF8LineEnd) { + RemoveLine(lineInsert); + } + unsigned char ch = ' '; for (int i = 0; i < insertLength; i++) { ch = s[i]; if (ch == '\r') { @@ -534,7 +599,14 @@ void CellBuffer::BasicInsertString(int position, const char *s, int insertLength InsertLine(lineInsert, (position + i) + 1, atLineStart); lineInsert++; } + } else if (utf8LineEnds) { + unsigned char back3[3] = {chBeforePrev, chPrev, ch}; + if (UTF8IsSeparator(back3) || UTF8IsNEL(back3+1)) { + InsertLine(lineInsert, (position + i) + 1, atLineStart); + lineInsert++; + } } + chBeforePrev = chPrev; chPrev = ch; } // Joining two lines where last insertion is cr and following substance starts with lf @@ -543,6 +615,22 @@ void CellBuffer::BasicInsertString(int position, const char *s, int insertLength // End of line already in buffer so drop the newly created one RemoveLine(lineInsert - 1); } + } else if (utf8LineEnds && !UTF8IsAscii(chAfter)) { + // May have end of UTF-8 line end in buffer and start in insertion + for (int j = 0; j < UTF8SeparatorLength-1; j++) { + unsigned char chAt = substance.ValueAt(position + insertLength + j); + unsigned char back3[3] = {chBeforePrev, chPrev, chAt}; + if (UTF8IsSeparator(back3)) { + InsertLine(lineInsert, (position + insertLength + j) + 1, atLineStart); + lineInsert++; + } + if ((j == 0) && UTF8IsNEL(back3+1)) { + InsertLine(lineInsert, (position + insertLength + j) + 1, atLineStart); + lineInsert++; + } + chBeforePrev = chPrev; + chPrev = chAt; + } } } @@ -560,9 +648,9 @@ void CellBuffer::BasicDeleteChars(int position, int deleteLength) { int lineRemove = lv.LineFromPosition(position) + 1; lv.InsertText(lineRemove-1, - (deleteLength)); - char chPrev = substance.ValueAt(position - 1); - char chBefore = chPrev; - char chNext = substance.ValueAt(position); + unsigned char chPrev = substance.ValueAt(position - 1); + unsigned char chBefore = chPrev; + unsigned char chNext = substance.ValueAt(position); bool ignoreNL = false; if (chPrev == '\r' && chNext == '\n') { // Move back one @@ -570,8 +658,13 @@ void CellBuffer::BasicDeleteChars(int position, int deleteLength) { lineRemove++; ignoreNL = true; // First \n is not real deletion } + if (utf8LineEnds && UTF8IsTrailByte(chNext)) { + if (UTF8LineEndOverlaps(position)) { + RemoveLine(lineRemove); + } + } - char ch = chNext; + unsigned char ch = chNext; for (int i = 0; i < deleteLength; i++) { chNext = substance.ValueAt(position + i + 1); if (ch == '\r') { @@ -584,6 +677,14 @@ void CellBuffer::BasicDeleteChars(int position, int deleteLength) { } else { RemoveLine(lineRemove); } + } else if (utf8LineEnds) { + if (!UTF8IsAscii(ch)) { + unsigned char next3[3] = {ch, chNext, + static_cast(substance.ValueAt(position + i + 2))}; + if (UTF8IsSeparator(next3) || UTF8IsNEL(next3)) { + RemoveLine(lineRemove); + } + } } ch = chNext; diff --git a/src/CellBuffer.h b/src/CellBuffer.h index 388b9027b..bfbb121de 100644 --- a/src/CellBuffer.h +++ b/src/CellBuffer.h @@ -136,12 +136,15 @@ private: SplitVector substance; SplitVector style; bool readOnly; + int utf8LineEnds; bool collectingUndo; UndoHistory uh; LineVector lv; + bool UTF8LineEndOverlaps(int position) const; + void ResetLineEnds(); /// Actions without undo void BasicInsertString(int position, const char *s, int insertLength); void BasicDeleteChars(int position, int deleteLength); @@ -162,6 +165,8 @@ public: int Length() const; void Allocate(int newSize); + int GetLineEndTypes() const { return utf8LineEnds; } + void SetLineEndTypes(int utf8LineEnds_); void SetPerLine(PerLine *pl); int Lines() const; int LineStart(int line) const; diff --git a/src/Document.cxx b/src/Document.cxx index 2036f383c..b75c754ac 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -87,6 +87,7 @@ Document::Document() { eolMode = SC_EOL_LF; #endif dbcsCodePage = 0; + lineEndBitSet = SC_LINE_END_TYPE_DEFAULT; stylingBits = 5; stylingBitsMask = 0x1F; stylingMask = 0; @@ -157,12 +158,29 @@ bool Document::SetDBCSCodePage(int dbcsCodePage_) { if (dbcsCodePage != dbcsCodePage_) { dbcsCodePage = dbcsCodePage_; SetCaseFolder(NULL); + cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported()); return true; } else { return false; } } +bool Document::SetLineEndTypesAllowed(int lineEndBitSet_) { + if (lineEndBitSet != lineEndBitSet_) { + lineEndBitSet = lineEndBitSet_; + int lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported(); + if (lineEndBitSetActive != cb.GetLineEndTypes()) { + ModifiedAt(0); + cb.SetLineEndTypes(lineEndBitSetActive); + return true; + } else { + return false; + } + } else { + return false; + } +} + void Document::InsertLine(int line) { for (int j=0; j(cb.CharAt(position-3)), + static_cast(cb.CharAt(position-2)), + static_cast(cb.CharAt(position-1)), + }; + if (UTF8IsSeparator(bytes)) { + return position - UTF8SeparatorLength; + } + if (UTF8IsNEL(bytes+1)) { + return position - UTF8NELLength; + } + } + position--; // Back over CR or LF // When line terminator is CR+LF, may need to go back one more if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) { position--; diff --git a/src/Document.h b/src/Document.h index 592d2ecb9..16804d3a1 100644 --- a/src/Document.h +++ b/src/Document.h @@ -241,6 +241,7 @@ public: int eolMode; /// Can also be SC_CP_UTF8 to enable UTF-8 mode int dbcsCodePage; + int lineEndBitSet; int tabInChars; int indentInChars; int actualIndentInChars; @@ -259,6 +260,9 @@ public: virtual void Init(); int LineEndTypesSupported() const; bool SetDBCSCodePage(int dbcsCodePage_); + int GetLineEndTypesAllowed() { return cb.GetLineEndTypes(); } + bool SetLineEndTypesAllowed(int lineEndBitSet_); + int GetLineEndTypesActive() { return cb.GetLineEndTypes(); } virtual void InsertLine(int line); virtual void RemoveLine(int line); diff --git a/src/Editor.cxx b/src/Editor.cxx index e4ae6060e..f150aa202 100644 --- a/src/Editor.cxx +++ b/src/Editor.cxx @@ -7784,6 +7784,21 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) { pdoc->eolMode = wParam; break; + case SCI_SETLINEENDTYPESALLOWED: + if (pdoc->SetLineEndTypesAllowed(wParam)) { + cs.Clear(); + cs.InsertLines(0, pdoc->LinesTotal() - 1); + SetAnnotationHeights(0, pdoc->LinesTotal()); + InvalidateStyleRedraw(); + } + break; + + case SCI_GETLINEENDTYPESALLOWED: + return pdoc->GetLineEndTypesAllowed(); + + case SCI_GETLINEENDTYPESACTIVE: + return pdoc->GetLineEndTypesActive(); + case SCI_STARTSTYLING: pdoc->StartStyling(wParam, static_cast(lParam)); break; @@ -8080,6 +8095,9 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) { case SCI_SETCODEPAGE: if (ValidCodePage(wParam)) { if (pdoc->SetDBCSCodePage(wParam)) { + cs.Clear(); + cs.InsertLines(0, pdoc->LinesTotal() - 1); + SetAnnotationHeights(0, pdoc->LinesTotal()); InvalidateStyleRedraw(); } } diff --git a/src/UniConversion.h b/src/UniConversion.h index 704f16239..70e8a9517 100644 --- a/src/UniConversion.h +++ b/src/UniConversion.h @@ -26,3 +26,16 @@ inline bool UTF8IsAscii(int ch) { enum { UTF8MaskWidth=0x7, UTF8MaskInvalid=0x8 }; int UTF8Classify(const unsigned char *us, int len); + +// Line separator is U+2028 \xe2\x80\xa8 +// Paragraph separator is U+2029 \xe2\x80\xa9 +const int UTF8SeparatorLength = 3; +inline bool UTF8IsSeparator(const unsigned char *us) { + return (us[0] == 0xe2) && (us[1] == 0x80) && ((us[2] == 0xa8) || (us[2] == 0xa9)); +} + +// NEL is U+0085 \xc2\x85 +const int UTF8NELLength = 2; +inline bool UTF8IsNEL(const unsigned char *us) { + return (us[0] == 0xc2) && (us[1] == 0x85); +} -- cgit v1.2.3