diff options
author | Neil <nyamatongwe@gmail.com> | 2018-07-10 15:06:50 +1000 |
---|---|---|
committer | Neil <nyamatongwe@gmail.com> | 2018-07-10 15:06:50 +1000 |
commit | 72b5df15f33da27c59efd54eb0c84e173ca8c692 (patch) | |
tree | a65cbcf60c89542255a27672302e5de5e715624e | |
parent | 34540c84e31840787054652b72be7709d79eb1a2 (diff) | |
download | scintilla-mirror-72b5df15f33da27c59efd54eb0c84e173ca8c692.tar.gz |
Backport: Optional indexing of line starts in UTF-8 documents by UTF-32 code points and
UTF-16 code units added.
Converted instances of C++17 std::string_view to C++11.
Also used const_casts where appropriate to fix compile errors.
Backport of changeset 7063:0d5edc93e280.
-rw-r--r-- | doc/ScintillaDoc.html | 126 | ||||
-rw-r--r-- | doc/ScintillaHistory.html | 5 | ||||
-rw-r--r-- | include/Scintilla.h | 12 | ||||
-rw-r--r-- | include/Scintilla.iface | 30 | ||||
-rw-r--r-- | src/CellBuffer.cxx | 334 | ||||
-rw-r--r-- | src/CellBuffer.h | 9 | ||||
-rw-r--r-- | src/Document.cxx | 22 | ||||
-rw-r--r-- | src/Document.h | 5 | ||||
-rw-r--r-- | src/Editor.cxx | 28 | ||||
-rw-r--r-- | src/UniConversion.cxx | 16 | ||||
-rw-r--r-- | src/UniConversion.h | 1 | ||||
-rw-r--r-- | test/simpleTests.py | 95 | ||||
-rw-r--r-- | test/unit/testCellBuffer.cxx | 288 |
13 files changed, 923 insertions, 48 deletions
diff --git a/doc/ScintillaDoc.html b/doc/ScintillaDoc.html index 41ece6fa9..69cbd9e35 100644 --- a/doc/ScintillaDoc.html +++ b/doc/ScintillaDoc.html @@ -267,136 +267,140 @@ <tr> <td>○ <a class="toc" href="#SelectionAndInformation">Selection and information</a></td> - <td>○ <a class="toc" href="#MultipleSelectionAndVirtualSpace">Multiple Selection and Virtual Space</a></td> + <td>○ <a class="toc" href="#ByCharacterOrCodeUnit">By character or UTF-16 code unit</a></td> - <td>○ <a class="toc" href="#ScrollingAndAutomaticScrolling">Scrolling and automatic + <td>○ <a class="toc" href="#MultipleSelectionAndVirtualSpace">Multiple Selection and Virtual Space</a></td> scrolling</a></td> </tr> <tr> + <td>○ <a class="toc" href="#ScrollingAndAutomaticScrolling">Scrolling and automatic + <td>○ <a class="toc" href="#WhiteSpace">White space</a></td> <td>○ <a class="toc" href="#Cursor">Cursor</a></td> - <td>○ <a class="toc" href="#MouseCapture">Mouse capture</a></td> - </tr> <tr> + <td>○ <a class="toc" href="#MouseCapture">Mouse capture</a></td> + <td>○ <a class="toc" href="#LineEndings">Line endings</a></td> <td>○ <a class="toc" href="#Words">Words</a></td> - <td>○ <a class="toc" href="#Styling">Styling</a></td> - </tr> <tr> + <td>○ <a class="toc" href="#Styling">Styling</a></td> + <td>○ <a class="toc" href="#StyleDefinition">Style definition</a></td> <td>○ <a class="toc" href="#CaretAndSelectionStyles">Caret, selection, and hotspot styles</a></td> - <td>○ <a class="toc" href="#CharacterRepresentations">Character representations</a></td> - </tr> <tr> + <td>○ <a class="toc" href="#CharacterRepresentations">Character representations</a></td> + <td>○ <a class="toc" href="#Margins">Margins</a></td> <td>○ <a class="toc" href="#Annotations">Annotations</a></td> - <td>○ <a class="toc" href="#OtherSettings">Other settings</a></td> - </tr> <tr> + <td>○ <a class="toc" href="#OtherSettings">Other settings</a></td> + <td>○ <a class="toc" href="#BraceHighlighting">Brace highlighting</a></td> <td>○ <a class="toc" href="#TabsAndIndentationGuides">Tabs and Indentation Guides</a></td> - <td>○ <a class="toc" href="#Markers">Markers</a></td> - </tr> <tr> + <td>○ <a class="toc" href="#Markers">Markers</a></td> + <td>○ <a class="toc" href="#Indicators">Indicators</a></td> <td>○ <a class="toc" href="#Autocompletion">Autocompletion</a></td> - <td>○ <a class="toc" href="#UserLists">User lists</a></td> - </tr> <tr> + <td>○ <a class="toc" href="#UserLists">User lists</a></td> + <td>○ <a class="toc" href="#CallTips">Call tips</a></td> <td>○ <a class="toc" href="#KeyboardCommands">Keyboard commands</a></td> - <td>○ <a class="toc" href="#KeyBindings">Key bindings</a></td> - </tr> <tr> + <td>○ <a class="toc" href="#KeyBindings">Key bindings</a></td> + <td>○ <a class="toc" href="#PopupEditMenu">Popup edit menu</a></td> <td>○ <a class="toc" href="#MacroRecording">Macro recording</a></td> - <td>○ <a class="toc" href="#Printing">Printing</a></td> - </tr> <tr> + <td>○ <a class="toc" href="#Printing">Printing</a></td> + <td>○ <a class="toc" href="#DirectAccess">Direct access</a></td> <td>○ <a class="toc" href="#MultipleViews">Multiple views</a></td> - <td>○ <a class="toc" href="#BackgroundLoadSave">Background loading and saving</a></td> - </tr> <tr> + <td>○ <a class="toc" href="#BackgroundLoadSave">Background loading and saving</a></td> + <td>○ <a class="toc" href="#Folding">Folding</a></td> <td>○ <a class="toc" href="#LineWrapping">Line wrapping</a></td> - <td>○ <a class="toc" href="#Zooming">Zooming</a></td> - </tr> <tr> + <td>○ <a class="toc" href="#Zooming">Zooming</a></td> + <td>○ <a class="toc" href="#LongLines">Long lines</a></td> <td>○ <a class="toc" href="#Accessibility">Accessibility</a></td> - <td>○ <a class="toc" href="#Lexer">Lexer</a></td> - </tr> <tr> + <td>○ <a class="toc" href="#Lexer">Lexer</a></td> + <td>○ <a class="toc" href="#LexerObjects">Lexer objects</a></td> <td>○ <a class="toc" href="#Notifications">Notifications</a></td> - <td>○ <a class="toc" href="#Images">Images</a></td> - </tr> <tr> + <td>○ <a class="toc" href="#Images">Images</a></td> + <td>○ <a class="toc" href="#GTK">GTK+</a></td> <td>○ <a class="toc" href="#ProvisionalMessages"><span class="provisional">Provisional messages</span></a></td> - <td>○ <a class="toc" href="#DeprecatedMessages">Deprecated messages</a></td> - </tr> <tr> + <td>○ <a class="toc" href="#DeprecatedMessages">Deprecated messages</a></td> + <td>○ <a class="toc" href="#EditMessagesNeverSupportedByScintilla">Edit messages never supported by Scintilla</a></td> <td>○ <a class="toc" href="#RemovedFeatures">Removed features</a></td> + </tr> + <tr> <td>○ <a class="toc" href="#BuildingScintilla">Building Scintilla</a></td> </tr> @@ -1226,8 +1230,6 @@ struct Sci_TextToFind { <a class="message" href="#SCI_MOVECARETINSIDEVIEW">SCI_MOVECARETINSIDEVIEW</a><br /> <a class="message" href="#SCI_POSITIONBEFORE">SCI_POSITIONBEFORE(int pos) → position</a><br /> <a class="message" href="#SCI_POSITIONAFTER">SCI_POSITIONAFTER(int pos) → position</a><br /> - <a class="message" href="#SCI_POSITIONRELATIVE">SCI_POSITIONRELATIVE(int pos, int relative) → position</a><br /> - <a class="message" href="#SCI_COUNTCHARACTERS">SCI_COUNTCHARACTERS(int start, int end) → int</a><br /> <a class="message" href="#SCI_TEXTWIDTH">SCI_TEXTWIDTH(int style, const char *text) → int</a><br /> <a class="message" href="#SCI_TEXTHEIGHT">SCI_TEXTHEIGHT(int line) → int</a><br /> <a class="message" href="#SCI_CHOOSECARETX">SCI_CHOOSECARETX</a><br /> @@ -1445,15 +1447,6 @@ struct Sci_TextToFind { If called with a position within a multi byte character will return the position of the start/end of that character.</p> - <p><b id="SCI_POSITIONRELATIVE">SCI_POSITIONRELATIVE(int pos, int relative) → position</b><br /> - Count a number of whole characters before or after the argument position and return that position. - The minimum position returned is 0 and the maximum is the last position in the document. - If the position goes past the document end then 0 is returned. - </p> - - <p><b id="SCI_COUNTCHARACTERS">SCI_COUNTCHARACTERS(int start, int end) → int</b><br /> - Returns the number of whole characters between two positions..</p> - <p><b id="SCI_TEXTWIDTH">SCI_TEXTWIDTH(int style, const char *text) → int</b><br /> This returns the pixel width of a string drawn in the given <code class="parameter">style</code> which can be used, for example, to decide how wide to make the line number margin in order to display a @@ -1524,6 +1517,61 @@ struct Sci_TextToFind { When this option is turned off, mouse selections will always stick to the mode the selection was started in. It is off by default.</p> + <h2 id="ByCharacterOrCodeUnit">By character or UTF-16 code unit</h2> + + <p>Most Scintilla APIs use byte positions but some applications want to use positions based on counting + (UTF-32) characters or (UTF-16) code units + or need to communicate with other code written in terms of characters or code units. + With only byte positions, this may require examining many bytes to count characters or code units in the document + but this may be sped up in some cases by indexing the line starts by character or code unit.</p> + + <code> + <a class="message" href="#SCI_POSITIONRELATIVE">SCI_POSITIONRELATIVE(int pos, int relative) → position</a><br /> + <a class="message" href="#SCI_POSITIONRELATIVECODEUNITS">SCI_POSITIONRELATIVECODEUNITS(int pos, int relative) → position</a><br /> + <a class="message" href="#SCI_COUNTCHARACTERS">SCI_COUNTCHARACTERS(int start, int end) → int</a><br /> + <a class="message" href="#SCI_COUNTCODEUNITS">SCI_COUNTCODEUNITS(int start, int end) → int</a><br /> + <a class="message" href="#SCI_GETLINECHARACTERINDEX">SCI_GETLINECHARACTERINDEX → int</a><br /> + <a class="message" href="#SCI_ALLOCATELINECHARACTERINDEX">SCI_ALLOCATELINECHARACTERINDEX(int lineCharacterIndex)</a><br /> + <a class="message" href="#SCI_RELEASELINECHARACTERINDEX">SCI_RELEASELINECHARACTERINDEX(int lineCharacterIndex)</a><br /> + <a class="message" href="#SCI_LINEFROMINDEXPOSITION">SCI_LINEFROMINDEXPOSITION(int pos, int lineCharacterIndex) → int</a><br /> + <a class="message" href="#SCI_INDEXPOSITIONFROMLINE">SCI_INDEXPOSITIONFROMLINE(int line, int lineCharacterIndex) → int</a><br /> + </code> + + <p><b id="SCI_POSITIONRELATIVE">SCI_POSITIONRELATIVE(int pos, int relative) → position</b><br /> + Count a number of whole characters before or after the argument position and return that position. + The minimum position returned is 0 and the maximum is the last position in the document. + If the position goes past the document end then 0 is returned. + </p> + + <p><b id="SCI_COUNTCHARACTERS">SCI_COUNTCHARACTERS(int start, int end) → int</b><br /> + Returns the number of whole characters between two positions.</p> + + <p><b id="SCI_POSITIONRELATIVECODEUNITS">SCI_POSITIONRELATIVECODEUNITS(int pos, int relative) → int</b><br /> + <b id="SCI_COUNTCODEUNITS">SCI_COUNTCODEUNITS(int start, int end) → int</b><br /> + These are the UTF-16 versions of <code>SCI_POSITIONRELATIVE</code> and <code>SCI_COUNTCHARACTERS</code> + working in terms of UTF-16 code units.</p> + + <p><b id="SCI_GETLINECHARACTERINDEX">SCI_GETLINECHARACTERINDEX → int</b><br /> + Returns which if any indexes are active. It may be <code>SC_LINECHARACTERINDEX_NONE(0)</code> or one or more + of <code>SC_LINECHARACTERINDEX_UTF32(1)</code> if whole characters are indexed or + <code>SC_LINECHARACTERINDEX_UTF16(2)</code> if UTF-16 code units are indexed. + Character indexes are currently only supported for UTF-8 documents.</p> + + <p><b id="SCI_ALLOCATELINECHARACTERINDEX">SCI_ALLOCATELINECHARACTERINDEX(int lineCharacterIndex)</b><br /> + <b id="SCI_RELEASELINECHARACTERINDEX">SCI_RELEASELINECHARACTERINDEX(int lineCharacterIndex)</b><br /> + Allocate or release one or more indexes using same enumeration as <code>SCI_GETLINECHARACTERINDEX</code>. + Different aspects of an application may need indexes for different periods and should allocate for those periods. + Indexes use additional memory so releasing them can help minimize memory but they also take time to recalculate. + Scintilla may also allocate indexes to support features like accessibility or input method editors. + Only one index of each type is created for a document at a time.</p> + + <p><b id="SCI_LINEFROMINDEXPOSITION">SCI_LINEFROMINDEXPOSITION(int pos, int lineCharacterIndex) → int</b><br /> + <b id="SCI_INDEXPOSITIONFROMLINE">SCI_INDEXPOSITIONFROMLINE(int line, int lineCharacterIndex) → int</b><br /> + The document line of a particular character or code unit may be found by calling <code>SCI_LINEFROMINDEXPOSITION</code> with one of + <code>SC_LINECHARACTERINDEX_UTF32(1)</code> or <code>SC_LINECHARACTERINDEX_UTF16(2)</code>. + The inverse action, finds the starting position of a document line either in characters or code units from the document start by calling + <code>SCI_INDEXPOSITIONFROMLINE</code> with the same <code class="parameter">lineCharacterIndex</code> argument.</p> + <h2 id="MultipleSelectionAndVirtualSpace">Multiple Selection and Virtual Space</h2> <code> diff --git a/doc/ScintillaHistory.html b/doc/ScintillaHistory.html index 14b30a121..8f4d17920 100644 --- a/doc/ScintillaHistory.html +++ b/doc/ScintillaHistory.html @@ -544,6 +544,11 @@ Released 19 June 2018. </li> <li> + Optional indexing of line starts in UTF-8 documents by UTF-32 code points and UTF-16 code units added. + This can improve performance for clients that provide UTF-32 or UTF-16 interfaces or that need to interoperate + with UTF-32 or UTF-16 components. + </li> + <li> Lexers added for SAS and Stata. <a href="https://sourceforge.net/p/scintilla/feature-requests/1185/">Feature #1185.</a> </li> diff --git a/include/Scintilla.h b/include/Scintilla.h index 70f17918b..305e64c88 100644 --- a/include/Scintilla.h +++ b/include/Scintilla.h @@ -365,6 +365,7 @@ typedef sptr_t (*SciFnDirect)(sptr_t ptr, unsigned int iMessage, uptr_t wParam, #define SCI_GETLINEINDENTPOSITION 2128 #define SCI_GETCOLUMN 2129 #define SCI_COUNTCHARACTERS 2633 +#define SCI_COUNTCODEUNITS 2715 #define SCI_SETHSCROLLBAR 2130 #define SCI_GETHSCROLLBAR 2131 #define SC_IV_NONE 0 @@ -755,6 +756,7 @@ typedef sptr_t (*SciFnDirect)(sptr_t ptr, unsigned int iMessage, uptr_t wParam, #define SCI_POSITIONBEFORE 2417 #define SCI_POSITIONAFTER 2418 #define SCI_POSITIONRELATIVE 2670 +#define SCI_POSITIONRELATIVECODEUNITS 2716 #define SCI_COPYRANGE 2419 #define SCI_COPYTEXT 2420 #define SC_SEL_STREAM 0 @@ -1108,6 +1110,16 @@ typedef sptr_t (*SciFnDirect)(sptr_t ptr, unsigned int iMessage, uptr_t wParam, #define SCN_AUTOCCOMPLETED 2030 #define SCN_MARGINRIGHTCLICK 2031 #define SCN_AUTOCSELECTIONCHANGE 2032 +#ifndef SCI_DISABLE_PROVISIONAL +#define SC_LINECHARACTERINDEX_NONE 0 +#define SC_LINECHARACTERINDEX_UTF32 1 +#define SC_LINECHARACTERINDEX_UTF16 2 +#define SCI_GETLINECHARACTERINDEX 2710 +#define SCI_ALLOCATELINECHARACTERINDEX 2711 +#define SCI_RELEASELINECHARACTERINDEX 2712 +#define SCI_LINEFROMINDEXPOSITION 2713 +#define SCI_INDEXPOSITIONFROMLINE 2714 +#endif /* --Autogenerated -- end of section automatically generated from Scintilla.iface */ /* These structures are defined to be exactly the same shape as the Win32 diff --git a/include/Scintilla.iface b/include/Scintilla.iface index 4146d162d..4dc08d4e1 100644 --- a/include/Scintilla.iface +++ b/include/Scintilla.iface @@ -862,6 +862,9 @@ get int GetColumn=2129(position pos,) # Count characters between two positions. fun int CountCharacters=2633(position start, position end) +# Count code units between two positions. +fun int CountCodeUnits=2715(position start, position end) + # Show or hide the horizontal scroll bar. set void SetHScrollBar=2130(bool visible,) # Is the horizontal scroll bar visible? @@ -1966,6 +1969,11 @@ fun position PositionAfter=2418(position pos,) # of characters. Returned value is always between 0 and last position in document. fun position PositionRelative=2670(position pos, int relative) +# Given a valid document position, return a position that differs in a number +# of UTF-16 code units. Returned value is always between 0 and last position in document. +# The result may point half way (2 bytes) inside a non-BMP character. +fun position PositionRelativeCodeUnits=2716(position pos, int relative) + # Copy a range of text to the clipboard. Positions are clipped into the document. fun void CopyRange=2419(position start, position end) @@ -4932,10 +4940,28 @@ evt void AutoCCompleted=2030(string text, int position, int ch, CompletionMethod evt void MarginRightClick=2031(int modifiers, int position, int margin) evt void AutoCSelectionChange=2032(int listType, string text, int position) -# There are no provisional APIs currently. - cat Provisional +enu LineCharacterIndexType=SC_LINECHARACTERINDEX_ +val SC_LINECHARACTERINDEX_NONE=0 +val SC_LINECHARACTERINDEX_UTF32=1 +val SC_LINECHARACTERINDEX_UTF16=2 + +# Retrieve line character index state. +get int GetLineCharacterIndex=2710(,) + +# Request line character index be created or its use count increased. +fun void AllocateLineCharacterIndex=2711(int lineCharacterIndex,) + +# Decrease use count of line character index and remove if 0. +fun void ReleaseLineCharacterIndex=2712(int lineCharacterIndex,) + +# Retrieve the document line containing a position measured in index units. +fun int LineFromIndexPosition=2713(position posUTF32, int lineCharacterIndex) + +# Retrieve the position measured in index units at the start of a document line. +fun position IndexPositionFromLine=2714(int line, int lineCharacterIndex) + cat Deprecated # Divide each styling byte into lexical class bits (default: 5) and indicator diff --git a/src/CellBuffer.cxx b/src/CellBuffer.cxx index 5229dee61..2b1ba74f8 100644 --- a/src/CellBuffer.cxx +++ b/src/CellBuffer.cxx @@ -7,6 +7,7 @@ #include <cstddef> #include <cstdlib> +#include <cassert> #include <cstring> #include <cstdio> #include <cstdarg> @@ -27,17 +28,53 @@ namespace Scintilla { +struct CountWidths { + // Measures the number of characters in a string divided into those + // from the Base Multilingual Plane and those from other planes. + Sci::Position countBasePlane; + Sci::Position countOtherPlanes; + CountWidths(Sci::Position countBasePlane_=0, Sci::Position countOtherPlanes_=0) noexcept : + countBasePlane(countBasePlane_), + countOtherPlanes(countOtherPlanes_) { + } + CountWidths operator-() const noexcept { + return CountWidths(-countBasePlane , -countOtherPlanes); + } + Sci::Position WidthUTF32() const noexcept { + // All code points take one code unit in UTF-32. + return countBasePlane + countOtherPlanes; + } + Sci::Position WidthUTF16() const noexcept { + // UTF-16 takes 2 code units for other planes + return countBasePlane + 2 * countOtherPlanes; + } + void CountChar(int lenChar) noexcept { + if (lenChar == 4) { + countOtherPlanes++; + } else { + countBasePlane++; + } + } +}; + class ILineVector { public: virtual void Init() = 0; virtual void SetPerLine(PerLine *pl) = 0; virtual void InsertText(Sci::Line line, Sci::Position delta) = 0; virtual void InsertLine(Sci::Line line, Sci::Position position, bool lineStart) = 0; - virtual void SetLineStart(Sci::Line line, Sci::Position position) = 0; + virtual void SetLineStart(Sci::Line line, Sci::Position position) noexcept = 0; virtual void RemoveLine(Sci::Line line) = 0; virtual Sci::Line Lines() const noexcept = 0; virtual Sci::Line LineFromPosition(Sci::Position pos) const noexcept = 0; virtual Sci::Position LineStart(Sci::Line line) const noexcept = 0; + virtual void InsertCharacters(Sci::Line line, CountWidths delta) = 0; + virtual void SetLineCharactersWidth(Sci::Line line, CountWidths width) = 0; + virtual int LineCharacterIndex() const noexcept = 0; + virtual bool AllocateLineCharacterIndex(int lineCharacterIndex, Sci::Line lines) = 0; + virtual bool ReleaseLineCharacterIndex(int lineCharacterIndex) = 0; + virtual Sci::Position IndexLineStart(Sci::Line line, int lineCharacterIndex) const noexcept = 0; + virtual Sci::Line LineFromPositionIndex(Sci::Position pos, int lineCharacterIndex) const noexcept = 0; virtual ~ILineVector() {} }; @@ -46,9 +83,58 @@ public: using namespace Scintilla; template <typename POS> +class LineStartIndex { +public: + int refCount; + Partitioning<POS> starts; + + LineStartIndex() : refCount(0), starts(4) { + // Minimal initial allocation + } + // Deleted so LineStartIndex objects can not be copied. + LineStartIndex(const LineStartIndex &) = delete; + LineStartIndex(LineStartIndex &&) = delete; + void operator=(const LineStartIndex &) = delete; + void operator=(LineStartIndex &&) = delete; + virtual ~LineStartIndex() { + starts.DeleteAll(); + } + bool Allocate(Sci::Line lines) { + refCount++; + Sci::Position length = starts.PositionFromPartition(starts.Partitions()); + for (Sci::Line line = starts.Partitions(); line < lines; line++) { + // Produce an ascending sequence that will be filled in with correct widths later + length++; + starts.InsertPartition(static_cast<POS>(line), static_cast<POS>(length)); + } + return refCount == 1; + } + bool Release() { + if (refCount == 1) { + starts.DeleteAll(); + } + refCount--; + return refCount == 0; + } + bool Active() const noexcept { + return refCount > 0; + } + Sci::Position LineWidth(Sci::Line line) const noexcept { + return starts.PositionFromPartition(static_cast<POS>(line) + 1) - + starts.PositionFromPartition(static_cast<POS>(line)); + } + void SetLineWidth(Sci::Line line, Sci::Position width) { + const Sci::Position widthCurrent = LineWidth(line); + starts.InsertText(static_cast<POS>(line), static_cast<POS>(width - widthCurrent)); + } +}; + +template <typename POS> class LineVector : public ILineVector { Partitioning<POS> starts; PerLine *perLine; + LineStartIndex<POS> startsUTF16; + LineStartIndex<POS> startsUTF32; public: LineVector() : starts(256), perLine(0) { Init(); @@ -65,7 +151,9 @@ public: if (perLine) { perLine->Init(); } - } + startsUTF32.starts.DeleteAll(); + startsUTF16.starts.DeleteAll(); + } void SetPerLine(PerLine *pl) override { perLine = pl; } @@ -73,18 +161,33 @@ public: starts.InsertText(static_cast<POS>(line), static_cast<POS>(delta)); } void InsertLine(Sci::Line line, Sci::Position position, bool lineStart) override { - starts.InsertPartition(static_cast<POS>(line), static_cast<POS>(position)); + const POS lineAsPos = static_cast<POS>(line); + starts.InsertPartition(lineAsPos, static_cast<POS>(position)); + if (startsUTF32.Active()) { + startsUTF32.starts.InsertPartition(lineAsPos, + static_cast<POS>(startsUTF32.starts.PositionFromPartition(lineAsPos - 1) + 1)); + } + if (startsUTF16.Active()) { + startsUTF16.starts.InsertPartition(lineAsPos, + static_cast<POS>(startsUTF16.starts.PositionFromPartition(lineAsPos - 1) + 1)); + } if (perLine) { if ((line > 0) && lineStart) line--; perLine->InsertLine(line); } } - void SetLineStart(Sci::Line line, Sci::Position position) override { + void SetLineStart(Sci::Line line, Sci::Position position) noexcept override { starts.SetPartitionStartPosition(static_cast<POS>(line), static_cast<POS>(position)); } void RemoveLine(Sci::Line line) override { starts.RemovePartition(static_cast<POS>(line)); + if (startsUTF32.Active()) { + startsUTF32.starts.RemovePartition(static_cast<POS>(line)); + } + if (startsUTF16.Active()) { + startsUTF16.starts.RemovePartition(static_cast<POS>(line)); + } if (perLine) { perLine->RemoveLine(line); } @@ -98,6 +201,71 @@ public: Sci::Position LineStart(Sci::Line line) const noexcept override { return starts.PositionFromPartition(static_cast<POS>(line)); } + void InsertCharacters(Sci::Line line, CountWidths delta) override { + if (startsUTF32.Active()) { + startsUTF32.starts.InsertText(static_cast<POS>(line), static_cast<POS>(delta.WidthUTF32())); + } + if (startsUTF16.Active()) { + startsUTF16.starts.InsertText(static_cast<POS>(line), static_cast<POS>(delta.WidthUTF16())); + } + } + void SetLineCharactersWidth(Sci::Line line, CountWidths width) override { + if (startsUTF32.Active()) { + assert(startsUTF32.starts.Partitions() == starts.Partitions()); + startsUTF32.SetLineWidth(line, width.WidthUTF32()); + } + if (startsUTF16.Active()) { + assert(startsUTF16.starts.Partitions() == starts.Partitions()); + startsUTF16.SetLineWidth(line, width.WidthUTF16()); + } + } + + int LineCharacterIndex() const noexcept override { + int retVal = 0; + if (startsUTF32.Active()) { + retVal |= SC_LINECHARACTERINDEX_UTF32; + } + if (startsUTF16.Active()) { + retVal |= SC_LINECHARACTERINDEX_UTF16; + } + return retVal; + } + bool AllocateLineCharacterIndex(int lineCharacterIndex, Sci::Line lines) override { + bool changed = false; + if ((lineCharacterIndex & SC_LINECHARACTERINDEX_UTF32) != 0) { + changed = startsUTF32.Allocate(lines) || changed; + assert(startsUTF32.starts.Partitions() == starts.Partitions()); + } + if ((lineCharacterIndex & SC_LINECHARACTERINDEX_UTF16) != 0) { + changed = startsUTF16.Allocate(lines) || changed; + assert(startsUTF16.starts.Partitions() == starts.Partitions()); + } + return changed; + } + bool ReleaseLineCharacterIndex(int lineCharacterIndex) override { + bool changed = false; + if ((lineCharacterIndex & SC_LINECHARACTERINDEX_UTF32) != 0) { + changed = startsUTF32.Release() || changed; + } + if ((lineCharacterIndex & SC_LINECHARACTERINDEX_UTF16) != 0) { + changed = startsUTF16.Release() || changed; + } + return changed; + } + Sci::Position IndexLineStart(Sci::Line line, int lineCharacterIndex) const noexcept override { + if (lineCharacterIndex == SC_LINECHARACTERINDEX_UTF32) { + return startsUTF32.starts.PositionFromPartition(static_cast<POS>(line)); + } else { + return startsUTF16.starts.PositionFromPartition(static_cast<POS>(line)); + } + } + Sci::Line LineFromPositionIndex(Sci::Position pos, int lineCharacterIndex) const noexcept override { + if (lineCharacterIndex == SC_LINECHARACTERINDEX_UTF32) { + return static_cast<Sci::Line>(startsUTF32.starts.PartitionFromPosition(static_cast<POS>(pos))); + } else { + return static_cast<Sci::Line>(startsUTF16.starts.PartitionFromPosition(static_cast<POS>(pos))); + } + } }; Action::Action() { @@ -363,6 +531,7 @@ void UndoHistory::CompletedRedoStep() { CellBuffer::CellBuffer(bool hasStyles_, bool largeDocument_) : hasStyles(hasStyles_), largeDocument(largeDocument_) { readOnly = false; + utf8Substance = false; utf8LineEnds = 0; collectingUndo = true; if (largeDocument) @@ -504,10 +673,19 @@ void CellBuffer::Allocate(Sci::Position newSize) { } } +void CellBuffer::SetUTF8Substance(bool utf8Substance_) { + if (utf8Substance != utf8Substance_) { + utf8Substance = utf8Substance_; + ResetLineEnds(); + } +} + void CellBuffer::SetLineEndTypes(int utf8LineEnds_) { if (utf8LineEnds != utf8LineEnds_) { + const int indexes = plv->LineCharacterIndex(); utf8LineEnds = utf8LineEnds_; ResetLineEnds(); + AllocateLineCharacterIndex(indexes); } } @@ -534,6 +712,23 @@ void CellBuffer::SetPerLine(PerLine *pl) { plv->SetPerLine(pl); } +int CellBuffer::LineCharacterIndex() const noexcept { + return plv->LineCharacterIndex(); +} + +void CellBuffer::AllocateLineCharacterIndex(int lineCharacterIndex) { + if (utf8Substance) { + if (plv->AllocateLineCharacterIndex(lineCharacterIndex, Lines())) { + // Changed so recalculate whole file + RecalculateIndexLineStarts(0, Lines() - 1); + } + } +} + +void CellBuffer::ReleaseLineCharacterIndex(int lineCharacterIndex) { + plv->ReleaseLineCharacterIndex(lineCharacterIndex); +} + Sci::Line CellBuffer::Lines() const noexcept { return plv->Lines(); } @@ -551,6 +746,14 @@ Sci::Line CellBuffer::LineFromPosition(Sci::Position pos) const noexcept { return plv->LineFromPosition(pos); } +Sci::Position CellBuffer::IndexLineStart(Sci::Line line, int lineCharacterIndex) const noexcept { + return plv->IndexLineStart(line, lineCharacterIndex); +} + +Sci::Line CellBuffer::LineFromPositionIndex(Sci::Position pos, int lineCharacterIndex) const noexcept { + return plv->LineFromPositionIndex(pos, lineCharacterIndex); +} + bool CellBuffer::IsReadOnly() const { return readOnly; } @@ -611,6 +814,37 @@ bool CellBuffer::UTF8LineEndOverlaps(Sci::Position position) const { return UTF8IsSeparator(bytes) || UTF8IsSeparator(bytes+1) || UTF8IsNEL(bytes+1); } +bool CellBuffer::UTF8IsCharacterBoundary(Sci::Position position) const { + assert(position >= 0 && position <= Length()); + if (position > 0) { + std::string back; + for (int i = 0; i < UTF8MaxBytes; i++) { + const Sci::Position posBack = position - i; + if (posBack < 0) { + return false; + } + back.insert(0, 1, substance.ValueAt(posBack)); + if (!UTF8IsTrailByte(back.front())) { + if (i > 0) { + // Have reached a non-trail + const int cla = UTF8Classify(reinterpret_cast<const unsigned char*>(back.data()), back.size()); + if ((cla & UTF8MaskInvalid) || (cla != i)) { + return false; + } + } + break; + } + } + } + if (position < Length()) { + const unsigned char fore = substance.ValueAt(position); + if (UTF8IsTrailByte(fore)) { + return false; + } + } + return true; +} + void CellBuffer::ResetLineEnds() { // Reinitialize line data -- too much work to preserve plv->Init(); @@ -647,6 +881,38 @@ void CellBuffer::ResetLineEnds() { } } +namespace { + +CountWidths CountCharacterWidthsUTF8(const char *s, size_t len) noexcept { + CountWidths cw; + size_t remaining = len; + while (remaining > 0) { + const int utf8Status = UTF8Classify(reinterpret_cast<const unsigned char*>(s), len); + const int lenChar = utf8Status & UTF8MaskWidth; + cw.CountChar(lenChar); + s += lenChar; + remaining -= lenChar; + } + return cw; +} + +} + +void CellBuffer::RecalculateIndexLineStarts(Sci::Line lineFirst, Sci::Line lineLast) { + std::string text; + Sci::Position posLineEnd = LineStart(lineFirst); + for (Sci::Line line = lineFirst; line <= lineLast; line++) { + // Find line start and end, retrieve text of line, count characters and update line width + const Sci::Position posLineStart = posLineEnd; + posLineEnd = LineStart(line+1); + const Sci::Position width = posLineEnd - posLineStart; + text.resize(width); + GetCharRange(const_cast<char *>(text.data()), posLineStart, width); + const CountWidths cw = CountCharacterWidthsUTF8(text.data(), text.size()); + plv->SetLineCharactersWidth(line, cw); + } +} + void CellBuffer::BasicInsertString(Sci::Position position, const char *s, Sci::Position insertLength) { if (insertLength == 0) return; @@ -658,12 +924,25 @@ void CellBuffer::BasicInsertString(Sci::Position position, const char *s, Sci::P breakingUTF8LineEnd = UTF8LineEndOverlaps(position); } + const Sci::Line linePosition = plv->LineFromPosition(position); + Sci::Line lineInsert = linePosition + 1; + + // A simple insertion is one that inserts valid text on a single line at a character boundary + bool simpleInsertion = false; + + // Check for breaking apart a UTF-8 sequence and inserting invalid UTF-8 + if (utf8Substance && (plv->LineCharacterIndex() != SC_LINECHARACTERINDEX_NONE)) { + // Actually, don't need to check that whole insertion is valid just that there + // are no potential fragments at ends. + simpleInsertion = UTF8IsCharacterBoundary(position) && + UTF8IsValid(s, insertLength); + } + substance.InsertFromArray(position, s, 0, insertLength); if (hasStyles) { style.InsertValue(position, insertLength, 0); } - Sci::Line lineInsert = plv->LineFromPosition(position) + 1; const bool atLineStart = plv->LineStart(lineInsert-1) == position; // Point all the lines after the insertion point further along in the buffer plv->InsertText(lineInsert-1, insertLength); @@ -683,6 +962,7 @@ void CellBuffer::BasicInsertString(Sci::Position position, const char *s, Sci::P if (ch == '\r') { InsertLine(lineInsert, (position + i) + 1, atLineStart); lineInsert++; + simpleInsertion = false; } else if (ch == '\n') { if (chPrev == '\r') { // Patch up what was end of line @@ -691,11 +971,13 @@ void CellBuffer::BasicInsertString(Sci::Position position, const char *s, Sci::P InsertLine(lineInsert, (position + i) + 1, atLineStart); lineInsert++; } + simpleInsertion = false; } else if (utf8LineEnds) { const unsigned char back3[3] = {chBeforePrev, chPrev, ch}; if (UTF8IsSeparator(back3) || UTF8IsNEL(back3+1)) { InsertLine(lineInsert, (position + i) + 1, atLineStart); lineInsert++; + simpleInsertion = false; } } chBeforePrev = chPrev; @@ -706,6 +988,7 @@ void CellBuffer::BasicInsertString(Sci::Position position, const char *s, Sci::P if (ch == '\r') { // End of line already in buffer so drop the newly created one RemoveLine(lineInsert - 1); + simpleInsertion = false; } } else if (utf8LineEnds && !UTF8IsAscii(chAfter)) { // May have end of UTF-8 line end in buffer and start in insertion @@ -715,21 +998,31 @@ void CellBuffer::BasicInsertString(Sci::Position position, const char *s, Sci::P if (UTF8IsSeparator(back3)) { InsertLine(lineInsert, (position + insertLength + j) + 1, atLineStart); lineInsert++; + simpleInsertion = false; } if ((j == 0) && UTF8IsNEL(back3+1)) { InsertLine(lineInsert, (position + insertLength + j) + 1, atLineStart); lineInsert++; + simpleInsertion = false; } chBeforePrev = chPrev; chPrev = chAt; } } + if (simpleInsertion) { + const CountWidths cw = CountCharacterWidthsUTF8(s, insertLength); + plv->InsertCharacters(linePosition, cw); + } else { + RecalculateIndexLineStarts(linePosition, lineInsert - 1); + } } void CellBuffer::BasicDeleteChars(Sci::Position position, Sci::Position deleteLength) { if (deleteLength == 0) return; + Sci::Line lineRecalculateStart = INVALID_POSITION; + if ((position == 0) && (deleteLength == substance.Length())) { // If whole buffer is being deleted, faster to reinitialise lines data // than to delete each line. @@ -738,11 +1031,37 @@ void CellBuffer::BasicDeleteChars(Sci::Position position, Sci::Position deleteLe // Have to fix up line positions before doing deletion as looking at text in buffer // to work out which lines have been removed - Sci::Line lineRemove = plv->LineFromPosition(position) + 1; + const Sci::Line linePosition = plv->LineFromPosition(position); + Sci::Line lineRemove = linePosition + 1; + plv->InsertText(lineRemove-1, - (deleteLength)); const unsigned char chPrev = substance.ValueAt(position - 1); const unsigned char chBefore = chPrev; unsigned char chNext = substance.ValueAt(position); + + // Check for breaking apart a UTF-8 sequence + // Needs further checks that text is UTF-8 or that some other break apart is occurring + if (utf8Substance && (plv->LineCharacterIndex() != SC_LINECHARACTERINDEX_NONE)) { + const Sci::Position posEnd = position + deleteLength; + const Sci::Line lineEndRemove = plv->LineFromPosition(posEnd); + const bool simpleDeletion = + (linePosition == lineEndRemove) && + UTF8IsCharacterBoundary(position) && UTF8IsCharacterBoundary(posEnd); + if (simpleDeletion) { + std::string text(deleteLength, '\0'); + GetCharRange(const_cast<char *>(text.data()), position, deleteLength); + if (UTF8IsValid(text.data(), text.size())) { + // Everything is good + const CountWidths cw = CountCharacterWidthsUTF8(text.data(), text.size()); + plv->InsertCharacters(linePosition, -cw); + } else { + lineRecalculateStart = linePosition; + } + } else { + lineRecalculateStart = linePosition; + } + } + bool ignoreNL = false; if (chPrev == '\r' && chNext == '\n') { // Move back one @@ -791,6 +1110,9 @@ void CellBuffer::BasicDeleteChars(Sci::Position position, Sci::Position deleteLe } } substance.DeleteRange(position, deleteLength); + if (lineRecalculateStart >= 0) { + RecalculateIndexLineStarts(lineRecalculateStart, lineRecalculateStart); + } if (hasStyles) { style.DeleteRange(position, deleteLength); } diff --git a/src/CellBuffer.h b/src/CellBuffer.h index f360b2a23..b9f2406f1 100644 --- a/src/CellBuffer.h +++ b/src/CellBuffer.h @@ -113,6 +113,7 @@ private: SplitVector<char> substance; SplitVector<char> style; bool readOnly; + bool utf8Substance; int utf8LineEnds; bool collectingUndo; @@ -121,7 +122,9 @@ private: std::unique_ptr<ILineVector> plv; bool UTF8LineEndOverlaps(Sci::Position position) const; + bool UTF8IsCharacterBoundary(Sci::Position position) const; void ResetLineEnds(); + void RecalculateIndexLineStarts(Sci::Line lineFirst, Sci::Line lineLast); /// Actions without undo void BasicInsertString(Sci::Position position, const char *s, Sci::Position insertLength); void BasicDeleteChars(Sci::Position position, Sci::Position deleteLength); @@ -148,13 +151,19 @@ public: Sci::Position Length() const noexcept; void Allocate(Sci::Position newSize); + void SetUTF8Substance(bool utf8Substance_); int GetLineEndTypes() const { return utf8LineEnds; } void SetLineEndTypes(int utf8LineEnds_); bool ContainsLineEnd(const char *s, Sci::Position length) const; void SetPerLine(PerLine *pl); + int LineCharacterIndex() const noexcept; + void AllocateLineCharacterIndex(int lineCharacterIndex); + void ReleaseLineCharacterIndex(int lineCharacterIndex); Sci::Line Lines() const noexcept; Sci::Position LineStart(Sci::Line line) const noexcept; + Sci::Position IndexLineStart(Sci::Line line, int lineCharacterIndex) const noexcept; Sci::Line LineFromPosition(Sci::Position pos) const noexcept; + Sci::Line LineFromPositionIndex(Sci::Position pos, int lineCharacterIndex) const noexcept; void InsertLine(Sci::Line line, Sci::Position position, bool lineStart); void RemoveLine(Sci::Line line); const char *InsertString(Sci::Position position, const char *s, Sci::Position insertLength, bool &startSequence); diff --git a/src/Document.cxx b/src/Document.cxx index 99c15e3ef..681b3c371 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -122,6 +122,7 @@ Document::Document(int options) : decorations = DecorationListCreate(IsLarge()); cb.SetPerLine(this); + cb.SetUTF8Substance(SC_CP_UTF8 == dbcsCodePage); } Document::~Document() { @@ -197,6 +198,7 @@ bool Document::SetDBCSCodePage(int dbcsCodePage_) { dbcsCodePage = dbcsCodePage_; SetCaseFolder(nullptr); cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported()); + cb.SetUTF8Substance(SC_CP_UTF8 == dbcsCodePage); return true; } else { return false; @@ -423,6 +425,14 @@ Sci::Position Document::VCHomePosition(Sci::Position position) const { return startText; } +Sci::Position Document::IndexLineStart(Sci::Line line, int lineCharacterIndex) const { + return cb.IndexLineStart(line, lineCharacterIndex); +} + +Sci::Line Document::LineFromPositionIndex(Sci::Position pos, int lineCharacterIndex) const { + return cb.LineFromPositionIndex(pos, lineCharacterIndex); +} + int SCI_METHOD Document::SetLevel(Sci_Position line, int level) { const int prev = Levels()->SetLevel(static_cast<Sci::Line>(line), level, LinesTotal()); if (prev != level) { @@ -2108,6 +2118,18 @@ const char *Document::SubstituteByPosition(const char *text, Sci::Position *leng return 0; } +int Document::LineCharacterIndex() const { + return cb.LineCharacterIndex(); +} + +void Document::AllocateLineCharacterIndex(int lineCharacterIndex) { + return cb.AllocateLineCharacterIndex(lineCharacterIndex); +} + +void Document::ReleaseLineCharacterIndex(int lineCharacterIndex) { + return cb.ReleaseLineCharacterIndex(lineCharacterIndex); +} + Sci::Line Document::LinesTotal() const noexcept { return cb.Lines(); } diff --git a/src/Document.h b/src/Document.h index 184da2e96..97fc7e880 100644 --- a/src/Document.h +++ b/src/Document.h @@ -389,6 +389,8 @@ public: bool IsLineEndPosition(Sci::Position position) const; bool IsPositionInLineEnd(Sci::Position position) const; Sci::Position VCHomePosition(Sci::Position position) const; + Sci::Position IndexLineStart(Sci::Line line, int lineCharacterIndex) const; + Sci::Line LineFromPositionIndex(Sci::Position pos, int lineCharacterIndex) const; int SCI_METHOD SetLevel(Sci_Position line, int level) override; int SCI_METHOD GetLevel(Sci_Position line) const override; @@ -414,6 +416,9 @@ public: void SetCaseFolder(CaseFolder *pcf_); Sci::Position FindText(Sci::Position minPos, Sci::Position maxPos, const char *search, int flags, Sci::Position *length); const char *SubstituteByPosition(const char *text, Sci::Position *length); + int LineCharacterIndex() const; + void AllocateLineCharacterIndex(int lineCharacterIndex); + void ReleaseLineCharacterIndex(int lineCharacterIndex); Sci::Line LinesTotal() const noexcept; void SetDefaultCharClasses(bool includeWordClass); diff --git a/src/Editor.cxx b/src/Editor.cxx index 6fc49d971..53ec6794f 100644 --- a/src/Editor.cxx +++ b/src/Editor.cxx @@ -6017,6 +6017,11 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) { static_cast<Sci::Position>(wParam), lParam), static_cast<Sci::Position>(0), pdoc->Length()); + case SCI_POSITIONRELATIVECODEUNITS: + return Sci::clamp(pdoc->GetRelativePositionUTF16( + static_cast<Sci::Position>(wParam), lParam), + static_cast<Sci::Position>(0), pdoc->Length()); + case SCI_LINESCROLL: ScrollTo(topLine + static_cast<Sci::Line>(lParam)); HorizontalScrollTo(xOffset + static_cast<int>(wParam) * static_cast<int>(vs.spaceWidth)); @@ -6773,6 +6778,23 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) { case SCI_GETIMEINTERACTION: return imeInteraction; + case SCI_GETLINECHARACTERINDEX: + return pdoc->LineCharacterIndex(); + + case SCI_ALLOCATELINECHARACTERINDEX: + pdoc->AllocateLineCharacterIndex(static_cast<int>(wParam)); + break; + + case SCI_RELEASELINECHARACTERINDEX: + pdoc->ReleaseLineCharacterIndex(static_cast<int>(wParam)); + break; + + case SCI_LINEFROMINDEXPOSITION: + return pdoc->LineFromPositionIndex(static_cast<Sci::Position>(wParam), static_cast<int>(lParam)); + + case SCI_INDEXPOSITIONFROMLINE: + return pdoc->IndexLineStart(static_cast<Sci::Line>(wParam), static_cast<int>(lParam)); + // Marker definition and setting case SCI_MARKERDEFINE: if (wParam <= MARKER_MAX) { @@ -7384,7 +7406,7 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) { return pdoc->decorations->AllOnFor(static_cast<Sci::Position>(wParam)); case SCI_INDICATORVALUEAT: - return pdoc->decorations->ValueAt(static_cast<int>(wParam), static_cast<Sci::Position>(lParam)); + return pdoc->decorations->ValueAt(static_cast<int>(wParam), lParam); case SCI_INDICATORSTART: return pdoc->decorations->Start(static_cast<int>(wParam), lParam); @@ -8178,6 +8200,10 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) { case SCI_COUNTCHARACTERS: return pdoc->CountCharacters(static_cast<Sci::Position>(wParam), lParam); + //return pdoc->CountCharacters(static_cast<Sci::Position>(wParam), static_cast<Sci::Position>(lParam)); + + case SCI_COUNTCODEUNITS: + return pdoc->CountUTF16(static_cast<Sci::Position>(wParam), lParam); default: return DefWndProc(iMessage, wParam, lParam); diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx index 58e899faa..6cd6a8ba9 100644 --- a/src/UniConversion.cxx +++ b/src/UniConversion.cxx @@ -327,6 +327,22 @@ int UTF8DrawBytes(const unsigned char *us, int len) noexcept { return (utf8StatusNext & UTF8MaskInvalid) ? 1 : (utf8StatusNext & UTF8MaskWidth); } +bool UTF8IsValid(const char *s, size_t len) noexcept { + const unsigned char *us = reinterpret_cast<const unsigned char *>(s); + size_t remaining = len; + while (remaining > 0) { + const int utf8Status = UTF8Classify(us, remaining); + if (utf8Status & UTF8MaskInvalid) { + return false; + } else { + const int lenChar = utf8Status & UTF8MaskWidth; + us += lenChar; + remaining -= lenChar; + } + } + return remaining == 0; +} + // Replace invalid bytes in UTF-8 with the replacement character std::string FixInvalidUTF8(const std::string &text) { std::string result; diff --git a/src/UniConversion.h b/src/UniConversion.h index 0eb9f5378..4bb8875d0 100644 --- a/src/UniConversion.h +++ b/src/UniConversion.h @@ -21,6 +21,7 @@ size_t UTF16Length(const char *s, size_t len); size_t UTF16FromUTF8(const char *s, size_t len, wchar_t *tbuf, size_t tlen); size_t UTF32FromUTF8(const char *s, size_t len, unsigned int *tbuf, size_t tlen); unsigned int UTF16FromUTF32Character(unsigned int val, wchar_t *tbuf) noexcept; +bool UTF8IsValid(const char *s, size_t len) noexcept; std::string FixInvalidUTF8(const std::string &text); extern const unsigned char UTF8BytesOfLead[256]; diff --git a/test/simpleTests.py b/test/simpleTests.py index 3ff283dad..b1e8efdb7 100644 --- a/test/simpleTests.py +++ b/test/simpleTests.py @@ -1631,6 +1631,76 @@ class TestStyleAttributes(unittest.TestCase): self.ed.StyleSetHotSpot(self.ed.STYLE_DEFAULT, 1) self.assertEquals(self.ed.StyleGetHotSpot(self.ed.STYLE_DEFAULT), 1) +class TestIndices(unittest.TestCase): + def setUp(self): + self.xite = Xite.xiteFrame + self.ed = self.xite.ed + self.ed.ClearAll() + self.ed.EmptyUndoBuffer() + self.ed.SetCodePage(65001) + # Text includes one non-BMP character + t = "aå\U00010348flﬔ-\n" + self.tv = t.encode("UTF-8") + + def tearDown(self): + self.ed.SetCodePage(0) + + def testAllocation(self): + self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_NONE) + self.ed.AllocateLineCharacterIndex(self.ed.SC_LINECHARACTERINDEX_UTF32) + self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_UTF32) + self.ed.ReleaseLineCharacterIndex(self.ed.SC_LINECHARACTERINDEX_UTF32) + self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_NONE) + + def testUTF32(self): + self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_NONE) + self.ed.SetContents(self.tv) + self.ed.AllocateLineCharacterIndex(self.ed.SC_LINECHARACTERINDEX_UTF32) + self.assertEquals(self.ed.IndexPositionFromLine(0, self.ed.SC_LINECHARACTERINDEX_UTF32), 0) + self.assertEquals(self.ed.IndexPositionFromLine(1, self.ed.SC_LINECHARACTERINDEX_UTF32), 7) + self.ed.ReleaseLineCharacterIndex(self.ed.SC_LINECHARACTERINDEX_UTF32) + self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_NONE) + + def testUTF16(self): + self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_NONE) + t = "aå\U00010348flﬔ-" + tv = t.encode("UTF-8") + self.ed.SetContents(self.tv) + self.ed.AllocateLineCharacterIndex(self.ed.SC_LINECHARACTERINDEX_UTF16) + self.assertEquals(self.ed.IndexPositionFromLine(0, self.ed.SC_LINECHARACTERINDEX_UTF16), 0) + self.assertEquals(self.ed.IndexPositionFromLine(1, self.ed.SC_LINECHARACTERINDEX_UTF16), 8) + self.ed.ReleaseLineCharacterIndex(self.ed.SC_LINECHARACTERINDEX_UTF16) + self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_NONE) + + def testBoth(self): + # Set text before turning indices on + self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_NONE) + self.ed.SetContents(self.tv) + self.ed.AllocateLineCharacterIndex(self.ed.SC_LINECHARACTERINDEX_UTF32+self.ed.SC_LINECHARACTERINDEX_UTF16) + self.assertEquals(self.ed.IndexPositionFromLine(0, self.ed.SC_LINECHARACTERINDEX_UTF32), 0) + self.assertEquals(self.ed.IndexPositionFromLine(1, self.ed.SC_LINECHARACTERINDEX_UTF32), 7) + self.assertEquals(self.ed.IndexPositionFromLine(0, self.ed.SC_LINECHARACTERINDEX_UTF16), 0) + self.assertEquals(self.ed.IndexPositionFromLine(1, self.ed.SC_LINECHARACTERINDEX_UTF16), 8) + # Test the inverse: position->line + self.assertEquals(self.ed.LineFromIndexPosition(0, self.ed.SC_LINECHARACTERINDEX_UTF32), 0) + self.assertEquals(self.ed.LineFromIndexPosition(7, self.ed.SC_LINECHARACTERINDEX_UTF32), 1) + self.assertEquals(self.ed.LineFromIndexPosition(0, self.ed.SC_LINECHARACTERINDEX_UTF16), 0) + self.assertEquals(self.ed.LineFromIndexPosition(8, self.ed.SC_LINECHARACTERINDEX_UTF16), 1) + self.ed.ReleaseLineCharacterIndex(self.ed.SC_LINECHARACTERINDEX_UTF32+self.ed.SC_LINECHARACTERINDEX_UTF16) + self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_NONE) + + def testMaintenance(self): + # Set text after turning indices on + self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_NONE) + self.ed.AllocateLineCharacterIndex(self.ed.SC_LINECHARACTERINDEX_UTF32+self.ed.SC_LINECHARACTERINDEX_UTF16) + self.ed.SetContents(self.tv) + self.assertEquals(self.ed.IndexPositionFromLine(0, self.ed.SC_LINECHARACTERINDEX_UTF32), 0) + self.assertEquals(self.ed.IndexPositionFromLine(1, self.ed.SC_LINECHARACTERINDEX_UTF32), 7) + self.assertEquals(self.ed.IndexPositionFromLine(0, self.ed.SC_LINECHARACTERINDEX_UTF16), 0) + self.assertEquals(self.ed.IndexPositionFromLine(1, self.ed.SC_LINECHARACTERINDEX_UTF16), 8) + self.ed.ReleaseLineCharacterIndex(self.ed.SC_LINECHARACTERINDEX_UTF32+self.ed.SC_LINECHARACTERINDEX_UTF16) + self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_NONE) + class TestCharacterNavigation(unittest.TestCase): def setUp(self): self.xite = Xite.xiteFrame @@ -1677,6 +1747,31 @@ class TestCharacterNavigation(unittest.TestCase): self.assert_(after < previous) previous = after + def testRelativeNonBOM(self): + # \x61 \xF0\x90\x8D\x88 \xef\xac\x82 \xef\xac\x94 \x2d + t = "a\U00010348flﬔ-" + tv = t.encode("UTF-8") + self.ed.SetContents(tv) + self.assertEquals(self.ed.PositionRelative(1, 2), 8) + self.assertEquals(self.ed.CountCharacters(1, 8), 2) + self.assertEquals(self.ed.CountCodeUnits(1, 8), 3) + self.assertEquals(self.ed.PositionRelative(8, -2), 1) + self.assertEquals(self.ed.PositionRelativeCodeUnits(8, -3), 1) + pos = 0 + previous = 0 + for i in range(1, len(t)): + after = self.ed.PositionRelative(pos, i) + self.assert_(after > pos) + self.assert_(after > previous) + previous = after + pos = len(t) + previous = pos + for i in range(1, len(t)-1): + after = self.ed.PositionRelative(pos, -i) + self.assert_(after < pos) + self.assert_(after <= previous) + previous = after + def testLineEnd(self): t = "a\r\nb\nc" tv = t.encode("UTF-8") diff --git a/test/unit/testCellBuffer.cxx b/test/unit/testCellBuffer.cxx index cef88cb17..ab0b8aca9 100644 --- a/test/unit/testCellBuffer.cxx +++ b/test/unit/testCellBuffer.cxx @@ -9,6 +9,7 @@ #include "Platform.h" +#include "Scintilla.h" #include "Position.h" #include "SplitVector.h" #include "Partitioning.h" @@ -144,3 +145,290 @@ TEST_CASE("CellBuffer") { } } + +TEST_CASE("CharacterIndex") { + + CellBuffer cb(true, false); + + SECTION("Setup") { + REQUIRE(cb.LineCharacterIndex() == SC_LINECHARACTERINDEX_NONE); + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 0); + cb.SetUTF8Substance(true); + + cb.AllocateLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16); + REQUIRE(cb.LineCharacterIndex() == SC_LINECHARACTERINDEX_UTF16); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 0); + + cb.ReleaseLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16); + REQUIRE(cb.LineCharacterIndex() == SC_LINECHARACTERINDEX_NONE); + } + + SECTION("Insertion") { + cb.SetUTF8Substance(true); + + cb.AllocateLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16 | SC_LINECHARACTERINDEX_UTF32); + + bool startSequence = false; + cb.InsertString(0, "a", 1, startSequence); + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 1); + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 1); + + const char *hwair = "\xF0\x90\x8D\x88"; + cb.InsertString(0, hwair, strlen(hwair), startSequence); + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 3); + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 2); + } + + SECTION("Deletion") { + cb.SetUTF8Substance(true); + + cb.AllocateLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16 | SC_LINECHARACTERINDEX_UTF32); + + bool startSequence = false; + const char *hwair = "a\xF0\x90\x8D\x88z"; + cb.InsertString(0, hwair, strlen(hwair), startSequence); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 4); + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 3); + + cb.DeleteChars(5, 1, startSequence); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 3); + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 2); + + cb.DeleteChars(1, 4, startSequence); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 1); + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 1); + } + + SECTION("Insert Complex") { + cb.SetUTF8Substance(true); + cb.SetLineEndTypes(1); + cb.AllocateLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16 | SC_LINECHARACTERINDEX_UTF32); + + bool startSequence = false; + // 3 lines of text containing 8 bytes + const char *data = "a\n\xF0\x90\x8D\x88\nz"; + cb.InsertString(0, data, strlen(data), startSequence); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 2); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 5); + REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 6); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 2); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 4); + REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF32) == 5); + + // Insert a new line at end -> "a\n\xF0\x90\x8D\x88\nz\n" 4 lines + // Last line empty + cb.InsertString(strlen(data), "\n", 1, startSequence); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 2); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 5); + REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 7); + REQUIRE(cb.IndexLineStart(4, SC_LINECHARACTERINDEX_UTF16) == 7); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 2); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 4); + REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF32) == 6); + REQUIRE(cb.IndexLineStart(4, SC_LINECHARACTERINDEX_UTF32) == 6); + + // Insert a new line before end -> "a\n\xF0\x90\x8D\x88\nz\n\n" 5 lines + cb.InsertString(strlen(data), "\n", 1, startSequence); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 2); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 5); + REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 7); + REQUIRE(cb.IndexLineStart(4, SC_LINECHARACTERINDEX_UTF16) == 8); + REQUIRE(cb.IndexLineStart(5, SC_LINECHARACTERINDEX_UTF16) == 8); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 2); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 4); + REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF32) == 6); + REQUIRE(cb.IndexLineStart(4, SC_LINECHARACTERINDEX_UTF32) == 7); + REQUIRE(cb.IndexLineStart(5, SC_LINECHARACTERINDEX_UTF32) == 7); + + // Insert a valid 3-byte UTF-8 character at start -> + // "\xE2\x82\xACa\n\xF0\x90\x8D\x88\nz\n\n" 5 lines + + const char *euro = "\xE2\x82\xAC"; + cb.InsertString(0, euro, strlen(euro), startSequence); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 3); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 6); + REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 8); + REQUIRE(cb.IndexLineStart(4, SC_LINECHARACTERINDEX_UTF16) == 9); + REQUIRE(cb.IndexLineStart(5, SC_LINECHARACTERINDEX_UTF16) == 9); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 3); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 5); + REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF32) == 7); + REQUIRE(cb.IndexLineStart(4, SC_LINECHARACTERINDEX_UTF32) == 8); + REQUIRE(cb.IndexLineStart(5, SC_LINECHARACTERINDEX_UTF32) == 8); + + // Insert a lone lead byte implying a 3 byte character at start of line 2 -> + // "\xE2\x82\xACa\n\EF\xF0\x90\x8D\x88\nz\n\n" 5 lines + // Should be treated as a single byte character + + const char *lead = "\xEF"; + cb.InsertString(5, lead, strlen(lead), startSequence); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 3); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 7); + REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 9); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 3); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 6); + REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF32) == 8); + + // Insert an ASCII lead byte inside the 3-byte initial character -> + // "\xE2!\x82\xACa\n\EF\xF0\x90\x8D\x88\nz\n\n" 5 lines + // It should b treated as a single character and should cause the + // byte before and the 2 bytes after also be each treated as singles + // so 3 more characters on line 0. + + const char *ascii = "!"; + cb.InsertString(1, ascii, strlen(ascii), startSequence); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 6); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 10); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 6); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 9); + + // Insert a NEL after the '!' to trigger the utf8 line end case -> + // "\xE2!\xC2\x85 \x82\xACa\n \EF\xF0\x90\x8D\x88\n z\n\n" 5 lines + + const char *nel = "\xC2\x85"; + cb.InsertString(2, nel, strlen(nel), startSequence); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 3); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 7); + REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 11); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 3); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 7); + REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF32) == 10); + } + + SECTION("Delete Multiple lines") { + cb.SetUTF8Substance(true); + cb.AllocateLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16 | SC_LINECHARACTERINDEX_UTF32); + + bool startSequence = false; + // 3 lines of text containing 8 bytes + const char *data = "a\n\xF0\x90\x8D\x88\nz\nc"; + cb.InsertString(0, data, strlen(data), startSequence); + + // Delete first 2 new lines -> "az\nc" + cb.DeleteChars(1, strlen(data) - 4, startSequence); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 3); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 4); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 3); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 4); + } + + SECTION("Delete Complex") { + cb.SetUTF8Substance(true); + cb.AllocateLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16 | SC_LINECHARACTERINDEX_UTF32); + + bool startSequence = false; + // 3 lines of text containing 8 bytes + const char *data = "a\n\xF0\x90\x8D\x88\nz"; + cb.InsertString(0, data, strlen(data), startSequence); + + // Delete lead byte from character on line 1 -> + // "a\n\x90\x8D\x88\nz" + // line 1 becomes 4 single byte characters + cb.DeleteChars(2, 1, startSequence); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 2); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 6); + REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 7); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 2); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 6); + REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF32) == 7); + + // Delete first new line -> + // "a\x90\x8D\x88\nz" + // Only 2 lines with line 0 containing 5 single byte characters + cb.DeleteChars(1, 1, startSequence); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 5); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 6); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 5); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 6); + + // Restore lead byte from character on line 0 making a 4-byte character -> + // "a\xF0\x90\x8D\x88\nz" + + const char *lead4 = "\xF0"; + cb.InsertString(1, lead4, strlen(lead4), startSequence); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 4); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 5); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 3); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 4); + } + + SECTION("Insert separates new line bytes") { + cb.SetUTF8Substance(true); + cb.AllocateLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16 | SC_LINECHARACTERINDEX_UTF32); + + bool startSequence = false; + // 2 lines of text containing 4 bytes + const char *data = "a\r\nb"; + cb.InsertString(0, data, strlen(data), startSequence); + + // 3 lines of text containing 5 bytes -> + // "a\r!\nb" + const char *ascii = "!"; + cb.InsertString(2, ascii, strlen(ascii), startSequence); + + REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0); + REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 2); + REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 4); + REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 5); + } +} |