aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorNeil <nyamatongwe@gmail.com>2018-07-10 15:06:50 +1000
committerNeil <nyamatongwe@gmail.com>2018-07-10 15:06:50 +1000
commit72b5df15f33da27c59efd54eb0c84e173ca8c692 (patch)
treea65cbcf60c89542255a27672302e5de5e715624e
parent34540c84e31840787054652b72be7709d79eb1a2 (diff)
downloadscintilla-mirror-72b5df15f33da27c59efd54eb0c84e173ca8c692.tar.gz
Backport: Optional indexing of line starts in UTF-8 documents by UTF-32 code points and
UTF-16 code units added. Converted instances of C++17 std::string_view to C++11. Also used const_casts where appropriate to fix compile errors. Backport of changeset 7063:0d5edc93e280.
-rw-r--r--doc/ScintillaDoc.html126
-rw-r--r--doc/ScintillaHistory.html5
-rw-r--r--include/Scintilla.h12
-rw-r--r--include/Scintilla.iface30
-rw-r--r--src/CellBuffer.cxx334
-rw-r--r--src/CellBuffer.h9
-rw-r--r--src/Document.cxx22
-rw-r--r--src/Document.h5
-rw-r--r--src/Editor.cxx28
-rw-r--r--src/UniConversion.cxx16
-rw-r--r--src/UniConversion.h1
-rw-r--r--test/simpleTests.py95
-rw-r--r--test/unit/testCellBuffer.cxx288
13 files changed, 923 insertions, 48 deletions
diff --git a/doc/ScintillaDoc.html b/doc/ScintillaDoc.html
index 41ece6fa9..69cbd9e35 100644
--- a/doc/ScintillaDoc.html
+++ b/doc/ScintillaDoc.html
@@ -267,136 +267,140 @@
<tr>
<td>&cir; <a class="toc" href="#SelectionAndInformation">Selection and information</a></td>
- <td>&cir; <a class="toc" href="#MultipleSelectionAndVirtualSpace">Multiple Selection and Virtual Space</a></td>
+ <td>&cir; <a class="toc" href="#ByCharacterOrCodeUnit">By character or UTF-16 code unit</a></td>
- <td>&cir; <a class="toc" href="#ScrollingAndAutomaticScrolling">Scrolling and automatic
+ <td>&cir; <a class="toc" href="#MultipleSelectionAndVirtualSpace">Multiple Selection and Virtual Space</a></td>
scrolling</a></td>
</tr>
<tr>
+ <td>&cir; <a class="toc" href="#ScrollingAndAutomaticScrolling">Scrolling and automatic
+
<td>&cir; <a class="toc" href="#WhiteSpace">White space</a></td>
<td>&cir; <a class="toc" href="#Cursor">Cursor</a></td>
- <td>&cir; <a class="toc" href="#MouseCapture">Mouse capture</a></td>
-
</tr>
<tr>
+ <td>&cir; <a class="toc" href="#MouseCapture">Mouse capture</a></td>
+
<td>&cir; <a class="toc" href="#LineEndings">Line endings</a></td>
<td>&cir; <a class="toc" href="#Words">Words</a></td>
- <td>&cir; <a class="toc" href="#Styling">Styling</a></td>
-
</tr>
<tr>
+ <td>&cir; <a class="toc" href="#Styling">Styling</a></td>
+
<td>&cir; <a class="toc" href="#StyleDefinition">Style definition</a></td>
<td>&cir; <a class="toc" href="#CaretAndSelectionStyles">Caret, selection, and hotspot styles</a></td>
- <td>&cir; <a class="toc" href="#CharacterRepresentations">Character representations</a></td>
-
</tr>
<tr>
+ <td>&cir; <a class="toc" href="#CharacterRepresentations">Character representations</a></td>
+
<td>&cir; <a class="toc" href="#Margins">Margins</a></td>
<td>&cir; <a class="toc" href="#Annotations">Annotations</a></td>
- <td>&cir; <a class="toc" href="#OtherSettings">Other settings</a></td>
-
</tr>
<tr>
+ <td>&cir; <a class="toc" href="#OtherSettings">Other settings</a></td>
+
<td>&cir; <a class="toc" href="#BraceHighlighting">Brace highlighting</a></td>
<td>&cir; <a class="toc" href="#TabsAndIndentationGuides">Tabs and Indentation
Guides</a></td>
- <td>&cir; <a class="toc" href="#Markers">Markers</a></td>
-
</tr>
<tr>
+ <td>&cir; <a class="toc" href="#Markers">Markers</a></td>
+
<td>&cir; <a class="toc" href="#Indicators">Indicators</a></td>
<td>&cir; <a class="toc" href="#Autocompletion">Autocompletion</a></td>
- <td>&cir; <a class="toc" href="#UserLists">User lists</a></td>
-
</tr>
<tr>
+ <td>&cir; <a class="toc" href="#UserLists">User lists</a></td>
+
<td>&cir; <a class="toc" href="#CallTips">Call tips</a></td>
<td>&cir; <a class="toc" href="#KeyboardCommands">Keyboard commands</a></td>
- <td>&cir; <a class="toc" href="#KeyBindings">Key bindings</a></td>
-
</tr>
<tr>
+ <td>&cir; <a class="toc" href="#KeyBindings">Key bindings</a></td>
+
<td>&cir; <a class="toc" href="#PopupEditMenu">Popup edit menu</a></td>
<td>&cir; <a class="toc" href="#MacroRecording">Macro recording</a></td>
- <td>&cir; <a class="toc" href="#Printing">Printing</a></td>
-
</tr>
<tr>
+ <td>&cir; <a class="toc" href="#Printing">Printing</a></td>
+
<td>&cir; <a class="toc" href="#DirectAccess">Direct access</a></td>
<td>&cir; <a class="toc" href="#MultipleViews">Multiple views</a></td>
- <td>&cir; <a class="toc" href="#BackgroundLoadSave">Background loading and saving</a></td>
-
</tr>
<tr>
+ <td>&cir; <a class="toc" href="#BackgroundLoadSave">Background loading and saving</a></td>
+
<td>&cir; <a class="toc" href="#Folding">Folding</a></td>
<td>&cir; <a class="toc" href="#LineWrapping">Line wrapping</a></td>
- <td>&cir; <a class="toc" href="#Zooming">Zooming</a></td>
-
</tr>
<tr>
+ <td>&cir; <a class="toc" href="#Zooming">Zooming</a></td>
+
<td>&cir; <a class="toc" href="#LongLines">Long lines</a></td>
<td>&cir; <a class="toc" href="#Accessibility">Accessibility</a></td>
- <td>&cir; <a class="toc" href="#Lexer">Lexer</a></td>
-
</tr>
<tr>
+ <td>&cir; <a class="toc" href="#Lexer">Lexer</a></td>
+
<td>&cir; <a class="toc" href="#LexerObjects">Lexer objects</a></td>
<td>&cir; <a class="toc" href="#Notifications">Notifications</a></td>
- <td>&cir; <a class="toc" href="#Images">Images</a></td>
-
</tr>
<tr>
+ <td>&cir; <a class="toc" href="#Images">Images</a></td>
+
<td>&cir; <a class="toc" href="#GTK">GTK+</a></td>
<td>&cir; <a class="toc" href="#ProvisionalMessages"><span class="provisional">Provisional messages</span></a></td>
- <td>&cir; <a class="toc" href="#DeprecatedMessages">Deprecated messages</a></td>
-
</tr>
<tr>
+ <td>&cir; <a class="toc" href="#DeprecatedMessages">Deprecated messages</a></td>
+
<td>&cir; <a class="toc" href="#EditMessagesNeverSupportedByScintilla">Edit messages never
supported by Scintilla</a></td>
<td>&cir; <a class="toc" href="#RemovedFeatures">Removed features</a></td>
+ </tr>
+ <tr>
<td>&cir; <a class="toc" href="#BuildingScintilla">Building Scintilla</a></td>
</tr>
@@ -1226,8 +1230,6 @@ struct Sci_TextToFind {
<a class="message" href="#SCI_MOVECARETINSIDEVIEW">SCI_MOVECARETINSIDEVIEW</a><br />
<a class="message" href="#SCI_POSITIONBEFORE">SCI_POSITIONBEFORE(int pos) &rarr; position</a><br />
<a class="message" href="#SCI_POSITIONAFTER">SCI_POSITIONAFTER(int pos) &rarr; position</a><br />
- <a class="message" href="#SCI_POSITIONRELATIVE">SCI_POSITIONRELATIVE(int pos, int relative) &rarr; position</a><br />
- <a class="message" href="#SCI_COUNTCHARACTERS">SCI_COUNTCHARACTERS(int start, int end) &rarr; int</a><br />
<a class="message" href="#SCI_TEXTWIDTH">SCI_TEXTWIDTH(int style, const char *text) &rarr; int</a><br />
<a class="message" href="#SCI_TEXTHEIGHT">SCI_TEXTHEIGHT(int line) &rarr; int</a><br />
<a class="message" href="#SCI_CHOOSECARETX">SCI_CHOOSECARETX</a><br />
@@ -1445,15 +1447,6 @@ struct Sci_TextToFind {
If called with a position within a multi byte character will return the position
of the start/end of that character.</p>
- <p><b id="SCI_POSITIONRELATIVE">SCI_POSITIONRELATIVE(int pos, int relative) &rarr; position</b><br />
- Count a number of whole characters before or after the argument position and return that position.
- The minimum position returned is 0 and the maximum is the last position in the document.
- If the position goes past the document end then 0 is returned.
- </p>
-
- <p><b id="SCI_COUNTCHARACTERS">SCI_COUNTCHARACTERS(int start, int end) &rarr; int</b><br />
- Returns the number of whole characters between two positions..</p>
-
<p><b id="SCI_TEXTWIDTH">SCI_TEXTWIDTH(int style, const char *text) &rarr; int</b><br />
This returns the pixel width of a string drawn in the given <code class="parameter">style</code> which can
be used, for example, to decide how wide to make the line number margin in order to display a
@@ -1524,6 +1517,61 @@ struct Sci_TextToFind {
When this option is turned off, mouse selections will always stick to the mode the selection was started in. It
is off by default.</p>
+ <h2 id="ByCharacterOrCodeUnit">By character or UTF-16 code unit</h2>
+
+ <p>Most Scintilla APIs use byte positions but some applications want to use positions based on counting
+ (UTF-32) characters or (UTF-16) code units
+ or need to communicate with other code written in terms of characters or code units.
+ With only byte positions, this may require examining many bytes to count characters or code units in the document
+ but this may be sped up in some cases by indexing the line starts by character or code unit.</p>
+
+ <code>
+ <a class="message" href="#SCI_POSITIONRELATIVE">SCI_POSITIONRELATIVE(int pos, int relative) &rarr; position</a><br />
+ <a class="message" href="#SCI_POSITIONRELATIVECODEUNITS">SCI_POSITIONRELATIVECODEUNITS(int pos, int relative) &rarr; position</a><br />
+ <a class="message" href="#SCI_COUNTCHARACTERS">SCI_COUNTCHARACTERS(int start, int end) &rarr; int</a><br />
+ <a class="message" href="#SCI_COUNTCODEUNITS">SCI_COUNTCODEUNITS(int start, int end) &rarr; int</a><br />
+ <a class="message" href="#SCI_GETLINECHARACTERINDEX">SCI_GETLINECHARACTERINDEX &rarr; int</a><br />
+ <a class="message" href="#SCI_ALLOCATELINECHARACTERINDEX">SCI_ALLOCATELINECHARACTERINDEX(int lineCharacterIndex)</a><br />
+ <a class="message" href="#SCI_RELEASELINECHARACTERINDEX">SCI_RELEASELINECHARACTERINDEX(int lineCharacterIndex)</a><br />
+ <a class="message" href="#SCI_LINEFROMINDEXPOSITION">SCI_LINEFROMINDEXPOSITION(int pos, int lineCharacterIndex) &rarr; int</a><br />
+ <a class="message" href="#SCI_INDEXPOSITIONFROMLINE">SCI_INDEXPOSITIONFROMLINE(int line, int lineCharacterIndex) &rarr; int</a><br />
+ </code>
+
+ <p><b id="SCI_POSITIONRELATIVE">SCI_POSITIONRELATIVE(int pos, int relative) &rarr; position</b><br />
+ Count a number of whole characters before or after the argument position and return that position.
+ The minimum position returned is 0 and the maximum is the last position in the document.
+ If the position goes past the document end then 0 is returned.
+ </p>
+
+ <p><b id="SCI_COUNTCHARACTERS">SCI_COUNTCHARACTERS(int start, int end) &rarr; int</b><br />
+ Returns the number of whole characters between two positions.</p>
+
+ <p><b id="SCI_POSITIONRELATIVECODEUNITS">SCI_POSITIONRELATIVECODEUNITS(int pos, int relative) &rarr; int</b><br />
+ <b id="SCI_COUNTCODEUNITS">SCI_COUNTCODEUNITS(int start, int end) &rarr; int</b><br />
+ These are the UTF-16 versions of <code>SCI_POSITIONRELATIVE</code> and <code>SCI_COUNTCHARACTERS</code>
+ working in terms of UTF-16 code units.</p>
+
+ <p><b id="SCI_GETLINECHARACTERINDEX">SCI_GETLINECHARACTERINDEX &rarr; int</b><br />
+ Returns which if any indexes are active. It may be <code>SC_LINECHARACTERINDEX_NONE(0)</code> or one or more
+ of <code>SC_LINECHARACTERINDEX_UTF32(1)</code> if whole characters are indexed or
+ <code>SC_LINECHARACTERINDEX_UTF16(2)</code> if UTF-16 code units are indexed.
+ Character indexes are currently only supported for UTF-8 documents.</p>
+
+ <p><b id="SCI_ALLOCATELINECHARACTERINDEX">SCI_ALLOCATELINECHARACTERINDEX(int lineCharacterIndex)</b><br />
+ <b id="SCI_RELEASELINECHARACTERINDEX">SCI_RELEASELINECHARACTERINDEX(int lineCharacterIndex)</b><br />
+ Allocate or release one or more indexes using same enumeration as <code>SCI_GETLINECHARACTERINDEX</code>.
+ Different aspects of an application may need indexes for different periods and should allocate for those periods.
+ Indexes use additional memory so releasing them can help minimize memory but they also take time to recalculate.
+ Scintilla may also allocate indexes to support features like accessibility or input method editors.
+ Only one index of each type is created for a document at a time.</p>
+
+ <p><b id="SCI_LINEFROMINDEXPOSITION">SCI_LINEFROMINDEXPOSITION(int pos, int lineCharacterIndex) &rarr; int</b><br />
+ <b id="SCI_INDEXPOSITIONFROMLINE">SCI_INDEXPOSITIONFROMLINE(int line, int lineCharacterIndex) &rarr; int</b><br />
+ The document line of a particular character or code unit may be found by calling <code>SCI_LINEFROMINDEXPOSITION</code> with one of
+ <code>SC_LINECHARACTERINDEX_UTF32(1)</code> or <code>SC_LINECHARACTERINDEX_UTF16(2)</code>.
+ The inverse action, finds the starting position of a document line either in characters or code units from the document start by calling
+ <code>SCI_INDEXPOSITIONFROMLINE</code> with the same <code class="parameter">lineCharacterIndex</code> argument.</p>
+
<h2 id="MultipleSelectionAndVirtualSpace">Multiple Selection and Virtual Space</h2>
<code>
diff --git a/doc/ScintillaHistory.html b/doc/ScintillaHistory.html
index 14b30a121..8f4d17920 100644
--- a/doc/ScintillaHistory.html
+++ b/doc/ScintillaHistory.html
@@ -544,6 +544,11 @@
Released 19 June 2018.
</li>
<li>
+ Optional indexing of line starts in UTF-8 documents by UTF-32 code points and UTF-16 code units added.
+ This can improve performance for clients that provide UTF-32 or UTF-16 interfaces or that need to interoperate
+ with UTF-32 or UTF-16 components.
+ </li>
+ <li>
Lexers added for SAS and Stata.
<a href="https://sourceforge.net/p/scintilla/feature-requests/1185/">Feature #1185.</a>
</li>
diff --git a/include/Scintilla.h b/include/Scintilla.h
index 70f17918b..305e64c88 100644
--- a/include/Scintilla.h
+++ b/include/Scintilla.h
@@ -365,6 +365,7 @@ typedef sptr_t (*SciFnDirect)(sptr_t ptr, unsigned int iMessage, uptr_t wParam,
#define SCI_GETLINEINDENTPOSITION 2128
#define SCI_GETCOLUMN 2129
#define SCI_COUNTCHARACTERS 2633
+#define SCI_COUNTCODEUNITS 2715
#define SCI_SETHSCROLLBAR 2130
#define SCI_GETHSCROLLBAR 2131
#define SC_IV_NONE 0
@@ -755,6 +756,7 @@ typedef sptr_t (*SciFnDirect)(sptr_t ptr, unsigned int iMessage, uptr_t wParam,
#define SCI_POSITIONBEFORE 2417
#define SCI_POSITIONAFTER 2418
#define SCI_POSITIONRELATIVE 2670
+#define SCI_POSITIONRELATIVECODEUNITS 2716
#define SCI_COPYRANGE 2419
#define SCI_COPYTEXT 2420
#define SC_SEL_STREAM 0
@@ -1108,6 +1110,16 @@ typedef sptr_t (*SciFnDirect)(sptr_t ptr, unsigned int iMessage, uptr_t wParam,
#define SCN_AUTOCCOMPLETED 2030
#define SCN_MARGINRIGHTCLICK 2031
#define SCN_AUTOCSELECTIONCHANGE 2032
+#ifndef SCI_DISABLE_PROVISIONAL
+#define SC_LINECHARACTERINDEX_NONE 0
+#define SC_LINECHARACTERINDEX_UTF32 1
+#define SC_LINECHARACTERINDEX_UTF16 2
+#define SCI_GETLINECHARACTERINDEX 2710
+#define SCI_ALLOCATELINECHARACTERINDEX 2711
+#define SCI_RELEASELINECHARACTERINDEX 2712
+#define SCI_LINEFROMINDEXPOSITION 2713
+#define SCI_INDEXPOSITIONFROMLINE 2714
+#endif
/* --Autogenerated -- end of section automatically generated from Scintilla.iface */
/* These structures are defined to be exactly the same shape as the Win32
diff --git a/include/Scintilla.iface b/include/Scintilla.iface
index 4146d162d..4dc08d4e1 100644
--- a/include/Scintilla.iface
+++ b/include/Scintilla.iface
@@ -862,6 +862,9 @@ get int GetColumn=2129(position pos,)
# Count characters between two positions.
fun int CountCharacters=2633(position start, position end)
+# Count code units between two positions.
+fun int CountCodeUnits=2715(position start, position end)
+
# Show or hide the horizontal scroll bar.
set void SetHScrollBar=2130(bool visible,)
# Is the horizontal scroll bar visible?
@@ -1966,6 +1969,11 @@ fun position PositionAfter=2418(position pos,)
# of characters. Returned value is always between 0 and last position in document.
fun position PositionRelative=2670(position pos, int relative)
+# Given a valid document position, return a position that differs in a number
+# of UTF-16 code units. Returned value is always between 0 and last position in document.
+# The result may point half way (2 bytes) inside a non-BMP character.
+fun position PositionRelativeCodeUnits=2716(position pos, int relative)
+
# Copy a range of text to the clipboard. Positions are clipped into the document.
fun void CopyRange=2419(position start, position end)
@@ -4932,10 +4940,28 @@ evt void AutoCCompleted=2030(string text, int position, int ch, CompletionMethod
evt void MarginRightClick=2031(int modifiers, int position, int margin)
evt void AutoCSelectionChange=2032(int listType, string text, int position)
-# There are no provisional APIs currently.
-
cat Provisional
+enu LineCharacterIndexType=SC_LINECHARACTERINDEX_
+val SC_LINECHARACTERINDEX_NONE=0
+val SC_LINECHARACTERINDEX_UTF32=1
+val SC_LINECHARACTERINDEX_UTF16=2
+
+# Retrieve line character index state.
+get int GetLineCharacterIndex=2710(,)
+
+# Request line character index be created or its use count increased.
+fun void AllocateLineCharacterIndex=2711(int lineCharacterIndex,)
+
+# Decrease use count of line character index and remove if 0.
+fun void ReleaseLineCharacterIndex=2712(int lineCharacterIndex,)
+
+# Retrieve the document line containing a position measured in index units.
+fun int LineFromIndexPosition=2713(position posUTF32, int lineCharacterIndex)
+
+# Retrieve the position measured in index units at the start of a document line.
+fun position IndexPositionFromLine=2714(int line, int lineCharacterIndex)
+
cat Deprecated
# Divide each styling byte into lexical class bits (default: 5) and indicator
diff --git a/src/CellBuffer.cxx b/src/CellBuffer.cxx
index 5229dee61..2b1ba74f8 100644
--- a/src/CellBuffer.cxx
+++ b/src/CellBuffer.cxx
@@ -7,6 +7,7 @@
#include <cstddef>
#include <cstdlib>
+#include <cassert>
#include <cstring>
#include <cstdio>
#include <cstdarg>
@@ -27,17 +28,53 @@
namespace Scintilla {
+struct CountWidths {
+ // Measures the number of characters in a string divided into those
+ // from the Base Multilingual Plane and those from other planes.
+ Sci::Position countBasePlane;
+ Sci::Position countOtherPlanes;
+ CountWidths(Sci::Position countBasePlane_=0, Sci::Position countOtherPlanes_=0) noexcept :
+ countBasePlane(countBasePlane_),
+ countOtherPlanes(countOtherPlanes_) {
+ }
+ CountWidths operator-() const noexcept {
+ return CountWidths(-countBasePlane , -countOtherPlanes);
+ }
+ Sci::Position WidthUTF32() const noexcept {
+ // All code points take one code unit in UTF-32.
+ return countBasePlane + countOtherPlanes;
+ }
+ Sci::Position WidthUTF16() const noexcept {
+ // UTF-16 takes 2 code units for other planes
+ return countBasePlane + 2 * countOtherPlanes;
+ }
+ void CountChar(int lenChar) noexcept {
+ if (lenChar == 4) {
+ countOtherPlanes++;
+ } else {
+ countBasePlane++;
+ }
+ }
+};
+
class ILineVector {
public:
virtual void Init() = 0;
virtual void SetPerLine(PerLine *pl) = 0;
virtual void InsertText(Sci::Line line, Sci::Position delta) = 0;
virtual void InsertLine(Sci::Line line, Sci::Position position, bool lineStart) = 0;
- virtual void SetLineStart(Sci::Line line, Sci::Position position) = 0;
+ virtual void SetLineStart(Sci::Line line, Sci::Position position) noexcept = 0;
virtual void RemoveLine(Sci::Line line) = 0;
virtual Sci::Line Lines() const noexcept = 0;
virtual Sci::Line LineFromPosition(Sci::Position pos) const noexcept = 0;
virtual Sci::Position LineStart(Sci::Line line) const noexcept = 0;
+ virtual void InsertCharacters(Sci::Line line, CountWidths delta) = 0;
+ virtual void SetLineCharactersWidth(Sci::Line line, CountWidths width) = 0;
+ virtual int LineCharacterIndex() const noexcept = 0;
+ virtual bool AllocateLineCharacterIndex(int lineCharacterIndex, Sci::Line lines) = 0;
+ virtual bool ReleaseLineCharacterIndex(int lineCharacterIndex) = 0;
+ virtual Sci::Position IndexLineStart(Sci::Line line, int lineCharacterIndex) const noexcept = 0;
+ virtual Sci::Line LineFromPositionIndex(Sci::Position pos, int lineCharacterIndex) const noexcept = 0;
virtual ~ILineVector() {}
};
@@ -46,9 +83,58 @@ public:
using namespace Scintilla;
template <typename POS>
+class LineStartIndex {
+public:
+ int refCount;
+ Partitioning<POS> starts;
+
+ LineStartIndex() : refCount(0), starts(4) {
+ // Minimal initial allocation
+ }
+ // Deleted so LineStartIndex objects can not be copied.
+ LineStartIndex(const LineStartIndex &) = delete;
+ LineStartIndex(LineStartIndex &&) = delete;
+ void operator=(const LineStartIndex &) = delete;
+ void operator=(LineStartIndex &&) = delete;
+ virtual ~LineStartIndex() {
+ starts.DeleteAll();
+ }
+ bool Allocate(Sci::Line lines) {
+ refCount++;
+ Sci::Position length = starts.PositionFromPartition(starts.Partitions());
+ for (Sci::Line line = starts.Partitions(); line < lines; line++) {
+ // Produce an ascending sequence that will be filled in with correct widths later
+ length++;
+ starts.InsertPartition(static_cast<POS>(line), static_cast<POS>(length));
+ }
+ return refCount == 1;
+ }
+ bool Release() {
+ if (refCount == 1) {
+ starts.DeleteAll();
+ }
+ refCount--;
+ return refCount == 0;
+ }
+ bool Active() const noexcept {
+ return refCount > 0;
+ }
+ Sci::Position LineWidth(Sci::Line line) const noexcept {
+ return starts.PositionFromPartition(static_cast<POS>(line) + 1) -
+ starts.PositionFromPartition(static_cast<POS>(line));
+ }
+ void SetLineWidth(Sci::Line line, Sci::Position width) {
+ const Sci::Position widthCurrent = LineWidth(line);
+ starts.InsertText(static_cast<POS>(line), static_cast<POS>(width - widthCurrent));
+ }
+};
+
+template <typename POS>
class LineVector : public ILineVector {
Partitioning<POS> starts;
PerLine *perLine;
+ LineStartIndex<POS> startsUTF16;
+ LineStartIndex<POS> startsUTF32;
public:
LineVector() : starts(256), perLine(0) {
Init();
@@ -65,7 +151,9 @@ public:
if (perLine) {
perLine->Init();
}
- }
+ startsUTF32.starts.DeleteAll();
+ startsUTF16.starts.DeleteAll();
+ }
void SetPerLine(PerLine *pl) override {
perLine = pl;
}
@@ -73,18 +161,33 @@ public:
starts.InsertText(static_cast<POS>(line), static_cast<POS>(delta));
}
void InsertLine(Sci::Line line, Sci::Position position, bool lineStart) override {
- starts.InsertPartition(static_cast<POS>(line), static_cast<POS>(position));
+ const POS lineAsPos = static_cast<POS>(line);
+ starts.InsertPartition(lineAsPos, static_cast<POS>(position));
+ if (startsUTF32.Active()) {
+ startsUTF32.starts.InsertPartition(lineAsPos,
+ static_cast<POS>(startsUTF32.starts.PositionFromPartition(lineAsPos - 1) + 1));
+ }
+ if (startsUTF16.Active()) {
+ startsUTF16.starts.InsertPartition(lineAsPos,
+ static_cast<POS>(startsUTF16.starts.PositionFromPartition(lineAsPos - 1) + 1));
+ }
if (perLine) {
if ((line > 0) && lineStart)
line--;
perLine->InsertLine(line);
}
}
- void SetLineStart(Sci::Line line, Sci::Position position) override {
+ void SetLineStart(Sci::Line line, Sci::Position position) noexcept override {
starts.SetPartitionStartPosition(static_cast<POS>(line), static_cast<POS>(position));
}
void RemoveLine(Sci::Line line) override {
starts.RemovePartition(static_cast<POS>(line));
+ if (startsUTF32.Active()) {
+ startsUTF32.starts.RemovePartition(static_cast<POS>(line));
+ }
+ if (startsUTF16.Active()) {
+ startsUTF16.starts.RemovePartition(static_cast<POS>(line));
+ }
if (perLine) {
perLine->RemoveLine(line);
}
@@ -98,6 +201,71 @@ public:
Sci::Position LineStart(Sci::Line line) const noexcept override {
return starts.PositionFromPartition(static_cast<POS>(line));
}
+ void InsertCharacters(Sci::Line line, CountWidths delta) override {
+ if (startsUTF32.Active()) {
+ startsUTF32.starts.InsertText(static_cast<POS>(line), static_cast<POS>(delta.WidthUTF32()));
+ }
+ if (startsUTF16.Active()) {
+ startsUTF16.starts.InsertText(static_cast<POS>(line), static_cast<POS>(delta.WidthUTF16()));
+ }
+ }
+ void SetLineCharactersWidth(Sci::Line line, CountWidths width) override {
+ if (startsUTF32.Active()) {
+ assert(startsUTF32.starts.Partitions() == starts.Partitions());
+ startsUTF32.SetLineWidth(line, width.WidthUTF32());
+ }
+ if (startsUTF16.Active()) {
+ assert(startsUTF16.starts.Partitions() == starts.Partitions());
+ startsUTF16.SetLineWidth(line, width.WidthUTF16());
+ }
+ }
+
+ int LineCharacterIndex() const noexcept override {
+ int retVal = 0;
+ if (startsUTF32.Active()) {
+ retVal |= SC_LINECHARACTERINDEX_UTF32;
+ }
+ if (startsUTF16.Active()) {
+ retVal |= SC_LINECHARACTERINDEX_UTF16;
+ }
+ return retVal;
+ }
+ bool AllocateLineCharacterIndex(int lineCharacterIndex, Sci::Line lines) override {
+ bool changed = false;
+ if ((lineCharacterIndex & SC_LINECHARACTERINDEX_UTF32) != 0) {
+ changed = startsUTF32.Allocate(lines) || changed;
+ assert(startsUTF32.starts.Partitions() == starts.Partitions());
+ }
+ if ((lineCharacterIndex & SC_LINECHARACTERINDEX_UTF16) != 0) {
+ changed = startsUTF16.Allocate(lines) || changed;
+ assert(startsUTF16.starts.Partitions() == starts.Partitions());
+ }
+ return changed;
+ }
+ bool ReleaseLineCharacterIndex(int lineCharacterIndex) override {
+ bool changed = false;
+ if ((lineCharacterIndex & SC_LINECHARACTERINDEX_UTF32) != 0) {
+ changed = startsUTF32.Release() || changed;
+ }
+ if ((lineCharacterIndex & SC_LINECHARACTERINDEX_UTF16) != 0) {
+ changed = startsUTF16.Release() || changed;
+ }
+ return changed;
+ }
+ Sci::Position IndexLineStart(Sci::Line line, int lineCharacterIndex) const noexcept override {
+ if (lineCharacterIndex == SC_LINECHARACTERINDEX_UTF32) {
+ return startsUTF32.starts.PositionFromPartition(static_cast<POS>(line));
+ } else {
+ return startsUTF16.starts.PositionFromPartition(static_cast<POS>(line));
+ }
+ }
+ Sci::Line LineFromPositionIndex(Sci::Position pos, int lineCharacterIndex) const noexcept override {
+ if (lineCharacterIndex == SC_LINECHARACTERINDEX_UTF32) {
+ return static_cast<Sci::Line>(startsUTF32.starts.PartitionFromPosition(static_cast<POS>(pos)));
+ } else {
+ return static_cast<Sci::Line>(startsUTF16.starts.PartitionFromPosition(static_cast<POS>(pos)));
+ }
+ }
};
Action::Action() {
@@ -363,6 +531,7 @@ void UndoHistory::CompletedRedoStep() {
CellBuffer::CellBuffer(bool hasStyles_, bool largeDocument_) :
hasStyles(hasStyles_), largeDocument(largeDocument_) {
readOnly = false;
+ utf8Substance = false;
utf8LineEnds = 0;
collectingUndo = true;
if (largeDocument)
@@ -504,10 +673,19 @@ void CellBuffer::Allocate(Sci::Position newSize) {
}
}
+void CellBuffer::SetUTF8Substance(bool utf8Substance_) {
+ if (utf8Substance != utf8Substance_) {
+ utf8Substance = utf8Substance_;
+ ResetLineEnds();
+ }
+}
+
void CellBuffer::SetLineEndTypes(int utf8LineEnds_) {
if (utf8LineEnds != utf8LineEnds_) {
+ const int indexes = plv->LineCharacterIndex();
utf8LineEnds = utf8LineEnds_;
ResetLineEnds();
+ AllocateLineCharacterIndex(indexes);
}
}
@@ -534,6 +712,23 @@ void CellBuffer::SetPerLine(PerLine *pl) {
plv->SetPerLine(pl);
}
+int CellBuffer::LineCharacterIndex() const noexcept {
+ return plv->LineCharacterIndex();
+}
+
+void CellBuffer::AllocateLineCharacterIndex(int lineCharacterIndex) {
+ if (utf8Substance) {
+ if (plv->AllocateLineCharacterIndex(lineCharacterIndex, Lines())) {
+ // Changed so recalculate whole file
+ RecalculateIndexLineStarts(0, Lines() - 1);
+ }
+ }
+}
+
+void CellBuffer::ReleaseLineCharacterIndex(int lineCharacterIndex) {
+ plv->ReleaseLineCharacterIndex(lineCharacterIndex);
+}
+
Sci::Line CellBuffer::Lines() const noexcept {
return plv->Lines();
}
@@ -551,6 +746,14 @@ Sci::Line CellBuffer::LineFromPosition(Sci::Position pos) const noexcept {
return plv->LineFromPosition(pos);
}
+Sci::Position CellBuffer::IndexLineStart(Sci::Line line, int lineCharacterIndex) const noexcept {
+ return plv->IndexLineStart(line, lineCharacterIndex);
+}
+
+Sci::Line CellBuffer::LineFromPositionIndex(Sci::Position pos, int lineCharacterIndex) const noexcept {
+ return plv->LineFromPositionIndex(pos, lineCharacterIndex);
+}
+
bool CellBuffer::IsReadOnly() const {
return readOnly;
}
@@ -611,6 +814,37 @@ bool CellBuffer::UTF8LineEndOverlaps(Sci::Position position) const {
return UTF8IsSeparator(bytes) || UTF8IsSeparator(bytes+1) || UTF8IsNEL(bytes+1);
}
+bool CellBuffer::UTF8IsCharacterBoundary(Sci::Position position) const {
+ assert(position >= 0 && position <= Length());
+ if (position > 0) {
+ std::string back;
+ for (int i = 0; i < UTF8MaxBytes; i++) {
+ const Sci::Position posBack = position - i;
+ if (posBack < 0) {
+ return false;
+ }
+ back.insert(0, 1, substance.ValueAt(posBack));
+ if (!UTF8IsTrailByte(back.front())) {
+ if (i > 0) {
+ // Have reached a non-trail
+ const int cla = UTF8Classify(reinterpret_cast<const unsigned char*>(back.data()), back.size());
+ if ((cla & UTF8MaskInvalid) || (cla != i)) {
+ return false;
+ }
+ }
+ break;
+ }
+ }
+ }
+ if (position < Length()) {
+ const unsigned char fore = substance.ValueAt(position);
+ if (UTF8IsTrailByte(fore)) {
+ return false;
+ }
+ }
+ return true;
+}
+
void CellBuffer::ResetLineEnds() {
// Reinitialize line data -- too much work to preserve
plv->Init();
@@ -647,6 +881,38 @@ void CellBuffer::ResetLineEnds() {
}
}
+namespace {
+
+CountWidths CountCharacterWidthsUTF8(const char *s, size_t len) noexcept {
+ CountWidths cw;
+ size_t remaining = len;
+ while (remaining > 0) {
+ const int utf8Status = UTF8Classify(reinterpret_cast<const unsigned char*>(s), len);
+ const int lenChar = utf8Status & UTF8MaskWidth;
+ cw.CountChar(lenChar);
+ s += lenChar;
+ remaining -= lenChar;
+ }
+ return cw;
+}
+
+}
+
+void CellBuffer::RecalculateIndexLineStarts(Sci::Line lineFirst, Sci::Line lineLast) {
+ std::string text;
+ Sci::Position posLineEnd = LineStart(lineFirst);
+ for (Sci::Line line = lineFirst; line <= lineLast; line++) {
+ // Find line start and end, retrieve text of line, count characters and update line width
+ const Sci::Position posLineStart = posLineEnd;
+ posLineEnd = LineStart(line+1);
+ const Sci::Position width = posLineEnd - posLineStart;
+ text.resize(width);
+ GetCharRange(const_cast<char *>(text.data()), posLineStart, width);
+ const CountWidths cw = CountCharacterWidthsUTF8(text.data(), text.size());
+ plv->SetLineCharactersWidth(line, cw);
+ }
+}
+
void CellBuffer::BasicInsertString(Sci::Position position, const char *s, Sci::Position insertLength) {
if (insertLength == 0)
return;
@@ -658,12 +924,25 @@ void CellBuffer::BasicInsertString(Sci::Position position, const char *s, Sci::P
breakingUTF8LineEnd = UTF8LineEndOverlaps(position);
}
+ const Sci::Line linePosition = plv->LineFromPosition(position);
+ Sci::Line lineInsert = linePosition + 1;
+
+ // A simple insertion is one that inserts valid text on a single line at a character boundary
+ bool simpleInsertion = false;
+
+ // Check for breaking apart a UTF-8 sequence and inserting invalid UTF-8
+ if (utf8Substance && (plv->LineCharacterIndex() != SC_LINECHARACTERINDEX_NONE)) {
+ // Actually, don't need to check that whole insertion is valid just that there
+ // are no potential fragments at ends.
+ simpleInsertion = UTF8IsCharacterBoundary(position) &&
+ UTF8IsValid(s, insertLength);
+ }
+
substance.InsertFromArray(position, s, 0, insertLength);
if (hasStyles) {
style.InsertValue(position, insertLength, 0);
}
- Sci::Line lineInsert = plv->LineFromPosition(position) + 1;
const bool atLineStart = plv->LineStart(lineInsert-1) == position;
// Point all the lines after the insertion point further along in the buffer
plv->InsertText(lineInsert-1, insertLength);
@@ -683,6 +962,7 @@ void CellBuffer::BasicInsertString(Sci::Position position, const char *s, Sci::P
if (ch == '\r') {
InsertLine(lineInsert, (position + i) + 1, atLineStart);
lineInsert++;
+ simpleInsertion = false;
} else if (ch == '\n') {
if (chPrev == '\r') {
// Patch up what was end of line
@@ -691,11 +971,13 @@ void CellBuffer::BasicInsertString(Sci::Position position, const char *s, Sci::P
InsertLine(lineInsert, (position + i) + 1, atLineStart);
lineInsert++;
}
+ simpleInsertion = false;
} else if (utf8LineEnds) {
const unsigned char back3[3] = {chBeforePrev, chPrev, ch};
if (UTF8IsSeparator(back3) || UTF8IsNEL(back3+1)) {
InsertLine(lineInsert, (position + i) + 1, atLineStart);
lineInsert++;
+ simpleInsertion = false;
}
}
chBeforePrev = chPrev;
@@ -706,6 +988,7 @@ void CellBuffer::BasicInsertString(Sci::Position position, const char *s, Sci::P
if (ch == '\r') {
// End of line already in buffer so drop the newly created one
RemoveLine(lineInsert - 1);
+ simpleInsertion = false;
}
} else if (utf8LineEnds && !UTF8IsAscii(chAfter)) {
// May have end of UTF-8 line end in buffer and start in insertion
@@ -715,21 +998,31 @@ void CellBuffer::BasicInsertString(Sci::Position position, const char *s, Sci::P
if (UTF8IsSeparator(back3)) {
InsertLine(lineInsert, (position + insertLength + j) + 1, atLineStart);
lineInsert++;
+ simpleInsertion = false;
}
if ((j == 0) && UTF8IsNEL(back3+1)) {
InsertLine(lineInsert, (position + insertLength + j) + 1, atLineStart);
lineInsert++;
+ simpleInsertion = false;
}
chBeforePrev = chPrev;
chPrev = chAt;
}
}
+ if (simpleInsertion) {
+ const CountWidths cw = CountCharacterWidthsUTF8(s, insertLength);
+ plv->InsertCharacters(linePosition, cw);
+ } else {
+ RecalculateIndexLineStarts(linePosition, lineInsert - 1);
+ }
}
void CellBuffer::BasicDeleteChars(Sci::Position position, Sci::Position deleteLength) {
if (deleteLength == 0)
return;
+ Sci::Line lineRecalculateStart = INVALID_POSITION;
+
if ((position == 0) && (deleteLength == substance.Length())) {
// If whole buffer is being deleted, faster to reinitialise lines data
// than to delete each line.
@@ -738,11 +1031,37 @@ void CellBuffer::BasicDeleteChars(Sci::Position position, Sci::Position deleteLe
// Have to fix up line positions before doing deletion as looking at text in buffer
// to work out which lines have been removed
- Sci::Line lineRemove = plv->LineFromPosition(position) + 1;
+ const Sci::Line linePosition = plv->LineFromPosition(position);
+ Sci::Line lineRemove = linePosition + 1;
+
plv->InsertText(lineRemove-1, - (deleteLength));
const unsigned char chPrev = substance.ValueAt(position - 1);
const unsigned char chBefore = chPrev;
unsigned char chNext = substance.ValueAt(position);
+
+ // Check for breaking apart a UTF-8 sequence
+ // Needs further checks that text is UTF-8 or that some other break apart is occurring
+ if (utf8Substance && (plv->LineCharacterIndex() != SC_LINECHARACTERINDEX_NONE)) {
+ const Sci::Position posEnd = position + deleteLength;
+ const Sci::Line lineEndRemove = plv->LineFromPosition(posEnd);
+ const bool simpleDeletion =
+ (linePosition == lineEndRemove) &&
+ UTF8IsCharacterBoundary(position) && UTF8IsCharacterBoundary(posEnd);
+ if (simpleDeletion) {
+ std::string text(deleteLength, '\0');
+ GetCharRange(const_cast<char *>(text.data()), position, deleteLength);
+ if (UTF8IsValid(text.data(), text.size())) {
+ // Everything is good
+ const CountWidths cw = CountCharacterWidthsUTF8(text.data(), text.size());
+ plv->InsertCharacters(linePosition, -cw);
+ } else {
+ lineRecalculateStart = linePosition;
+ }
+ } else {
+ lineRecalculateStart = linePosition;
+ }
+ }
+
bool ignoreNL = false;
if (chPrev == '\r' && chNext == '\n') {
// Move back one
@@ -791,6 +1110,9 @@ void CellBuffer::BasicDeleteChars(Sci::Position position, Sci::Position deleteLe
}
}
substance.DeleteRange(position, deleteLength);
+ if (lineRecalculateStart >= 0) {
+ RecalculateIndexLineStarts(lineRecalculateStart, lineRecalculateStart);
+ }
if (hasStyles) {
style.DeleteRange(position, deleteLength);
}
diff --git a/src/CellBuffer.h b/src/CellBuffer.h
index f360b2a23..b9f2406f1 100644
--- a/src/CellBuffer.h
+++ b/src/CellBuffer.h
@@ -113,6 +113,7 @@ private:
SplitVector<char> substance;
SplitVector<char> style;
bool readOnly;
+ bool utf8Substance;
int utf8LineEnds;
bool collectingUndo;
@@ -121,7 +122,9 @@ private:
std::unique_ptr<ILineVector> plv;
bool UTF8LineEndOverlaps(Sci::Position position) const;
+ bool UTF8IsCharacterBoundary(Sci::Position position) const;
void ResetLineEnds();
+ void RecalculateIndexLineStarts(Sci::Line lineFirst, Sci::Line lineLast);
/// Actions without undo
void BasicInsertString(Sci::Position position, const char *s, Sci::Position insertLength);
void BasicDeleteChars(Sci::Position position, Sci::Position deleteLength);
@@ -148,13 +151,19 @@ public:
Sci::Position Length() const noexcept;
void Allocate(Sci::Position newSize);
+ void SetUTF8Substance(bool utf8Substance_);
int GetLineEndTypes() const { return utf8LineEnds; }
void SetLineEndTypes(int utf8LineEnds_);
bool ContainsLineEnd(const char *s, Sci::Position length) const;
void SetPerLine(PerLine *pl);
+ int LineCharacterIndex() const noexcept;
+ void AllocateLineCharacterIndex(int lineCharacterIndex);
+ void ReleaseLineCharacterIndex(int lineCharacterIndex);
Sci::Line Lines() const noexcept;
Sci::Position LineStart(Sci::Line line) const noexcept;
+ Sci::Position IndexLineStart(Sci::Line line, int lineCharacterIndex) const noexcept;
Sci::Line LineFromPosition(Sci::Position pos) const noexcept;
+ Sci::Line LineFromPositionIndex(Sci::Position pos, int lineCharacterIndex) const noexcept;
void InsertLine(Sci::Line line, Sci::Position position, bool lineStart);
void RemoveLine(Sci::Line line);
const char *InsertString(Sci::Position position, const char *s, Sci::Position insertLength, bool &startSequence);
diff --git a/src/Document.cxx b/src/Document.cxx
index 99c15e3ef..681b3c371 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -122,6 +122,7 @@ Document::Document(int options) :
decorations = DecorationListCreate(IsLarge());
cb.SetPerLine(this);
+ cb.SetUTF8Substance(SC_CP_UTF8 == dbcsCodePage);
}
Document::~Document() {
@@ -197,6 +198,7 @@ bool Document::SetDBCSCodePage(int dbcsCodePage_) {
dbcsCodePage = dbcsCodePage_;
SetCaseFolder(nullptr);
cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported());
+ cb.SetUTF8Substance(SC_CP_UTF8 == dbcsCodePage);
return true;
} else {
return false;
@@ -423,6 +425,14 @@ Sci::Position Document::VCHomePosition(Sci::Position position) const {
return startText;
}
+Sci::Position Document::IndexLineStart(Sci::Line line, int lineCharacterIndex) const {
+ return cb.IndexLineStart(line, lineCharacterIndex);
+}
+
+Sci::Line Document::LineFromPositionIndex(Sci::Position pos, int lineCharacterIndex) const {
+ return cb.LineFromPositionIndex(pos, lineCharacterIndex);
+}
+
int SCI_METHOD Document::SetLevel(Sci_Position line, int level) {
const int prev = Levels()->SetLevel(static_cast<Sci::Line>(line), level, LinesTotal());
if (prev != level) {
@@ -2108,6 +2118,18 @@ const char *Document::SubstituteByPosition(const char *text, Sci::Position *leng
return 0;
}
+int Document::LineCharacterIndex() const {
+ return cb.LineCharacterIndex();
+}
+
+void Document::AllocateLineCharacterIndex(int lineCharacterIndex) {
+ return cb.AllocateLineCharacterIndex(lineCharacterIndex);
+}
+
+void Document::ReleaseLineCharacterIndex(int lineCharacterIndex) {
+ return cb.ReleaseLineCharacterIndex(lineCharacterIndex);
+}
+
Sci::Line Document::LinesTotal() const noexcept {
return cb.Lines();
}
diff --git a/src/Document.h b/src/Document.h
index 184da2e96..97fc7e880 100644
--- a/src/Document.h
+++ b/src/Document.h
@@ -389,6 +389,8 @@ public:
bool IsLineEndPosition(Sci::Position position) const;
bool IsPositionInLineEnd(Sci::Position position) const;
Sci::Position VCHomePosition(Sci::Position position) const;
+ Sci::Position IndexLineStart(Sci::Line line, int lineCharacterIndex) const;
+ Sci::Line LineFromPositionIndex(Sci::Position pos, int lineCharacterIndex) const;
int SCI_METHOD SetLevel(Sci_Position line, int level) override;
int SCI_METHOD GetLevel(Sci_Position line) const override;
@@ -414,6 +416,9 @@ public:
void SetCaseFolder(CaseFolder *pcf_);
Sci::Position FindText(Sci::Position minPos, Sci::Position maxPos, const char *search, int flags, Sci::Position *length);
const char *SubstituteByPosition(const char *text, Sci::Position *length);
+ int LineCharacterIndex() const;
+ void AllocateLineCharacterIndex(int lineCharacterIndex);
+ void ReleaseLineCharacterIndex(int lineCharacterIndex);
Sci::Line LinesTotal() const noexcept;
void SetDefaultCharClasses(bool includeWordClass);
diff --git a/src/Editor.cxx b/src/Editor.cxx
index 6fc49d971..53ec6794f 100644
--- a/src/Editor.cxx
+++ b/src/Editor.cxx
@@ -6017,6 +6017,11 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) {
static_cast<Sci::Position>(wParam), lParam),
static_cast<Sci::Position>(0), pdoc->Length());
+ case SCI_POSITIONRELATIVECODEUNITS:
+ return Sci::clamp(pdoc->GetRelativePositionUTF16(
+ static_cast<Sci::Position>(wParam), lParam),
+ static_cast<Sci::Position>(0), pdoc->Length());
+
case SCI_LINESCROLL:
ScrollTo(topLine + static_cast<Sci::Line>(lParam));
HorizontalScrollTo(xOffset + static_cast<int>(wParam) * static_cast<int>(vs.spaceWidth));
@@ -6773,6 +6778,23 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) {
case SCI_GETIMEINTERACTION:
return imeInteraction;
+ case SCI_GETLINECHARACTERINDEX:
+ return pdoc->LineCharacterIndex();
+
+ case SCI_ALLOCATELINECHARACTERINDEX:
+ pdoc->AllocateLineCharacterIndex(static_cast<int>(wParam));
+ break;
+
+ case SCI_RELEASELINECHARACTERINDEX:
+ pdoc->ReleaseLineCharacterIndex(static_cast<int>(wParam));
+ break;
+
+ case SCI_LINEFROMINDEXPOSITION:
+ return pdoc->LineFromPositionIndex(static_cast<Sci::Position>(wParam), static_cast<int>(lParam));
+
+ case SCI_INDEXPOSITIONFROMLINE:
+ return pdoc->IndexLineStart(static_cast<Sci::Line>(wParam), static_cast<int>(lParam));
+
// Marker definition and setting
case SCI_MARKERDEFINE:
if (wParam <= MARKER_MAX) {
@@ -7384,7 +7406,7 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) {
return pdoc->decorations->AllOnFor(static_cast<Sci::Position>(wParam));
case SCI_INDICATORVALUEAT:
- return pdoc->decorations->ValueAt(static_cast<int>(wParam), static_cast<Sci::Position>(lParam));
+ return pdoc->decorations->ValueAt(static_cast<int>(wParam), lParam);
case SCI_INDICATORSTART:
return pdoc->decorations->Start(static_cast<int>(wParam), lParam);
@@ -8178,6 +8200,10 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) {
case SCI_COUNTCHARACTERS:
return pdoc->CountCharacters(static_cast<Sci::Position>(wParam), lParam);
+ //return pdoc->CountCharacters(static_cast<Sci::Position>(wParam), static_cast<Sci::Position>(lParam));
+
+ case SCI_COUNTCODEUNITS:
+ return pdoc->CountUTF16(static_cast<Sci::Position>(wParam), lParam);
default:
return DefWndProc(iMessage, wParam, lParam);
diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx
index 58e899faa..6cd6a8ba9 100644
--- a/src/UniConversion.cxx
+++ b/src/UniConversion.cxx
@@ -327,6 +327,22 @@ int UTF8DrawBytes(const unsigned char *us, int len) noexcept {
return (utf8StatusNext & UTF8MaskInvalid) ? 1 : (utf8StatusNext & UTF8MaskWidth);
}
+bool UTF8IsValid(const char *s, size_t len) noexcept {
+ const unsigned char *us = reinterpret_cast<const unsigned char *>(s);
+ size_t remaining = len;
+ while (remaining > 0) {
+ const int utf8Status = UTF8Classify(us, remaining);
+ if (utf8Status & UTF8MaskInvalid) {
+ return false;
+ } else {
+ const int lenChar = utf8Status & UTF8MaskWidth;
+ us += lenChar;
+ remaining -= lenChar;
+ }
+ }
+ return remaining == 0;
+}
+
// Replace invalid bytes in UTF-8 with the replacement character
std::string FixInvalidUTF8(const std::string &text) {
std::string result;
diff --git a/src/UniConversion.h b/src/UniConversion.h
index 0eb9f5378..4bb8875d0 100644
--- a/src/UniConversion.h
+++ b/src/UniConversion.h
@@ -21,6 +21,7 @@ size_t UTF16Length(const char *s, size_t len);
size_t UTF16FromUTF8(const char *s, size_t len, wchar_t *tbuf, size_t tlen);
size_t UTF32FromUTF8(const char *s, size_t len, unsigned int *tbuf, size_t tlen);
unsigned int UTF16FromUTF32Character(unsigned int val, wchar_t *tbuf) noexcept;
+bool UTF8IsValid(const char *s, size_t len) noexcept;
std::string FixInvalidUTF8(const std::string &text);
extern const unsigned char UTF8BytesOfLead[256];
diff --git a/test/simpleTests.py b/test/simpleTests.py
index 3ff283dad..b1e8efdb7 100644
--- a/test/simpleTests.py
+++ b/test/simpleTests.py
@@ -1631,6 +1631,76 @@ class TestStyleAttributes(unittest.TestCase):
self.ed.StyleSetHotSpot(self.ed.STYLE_DEFAULT, 1)
self.assertEquals(self.ed.StyleGetHotSpot(self.ed.STYLE_DEFAULT), 1)
+class TestIndices(unittest.TestCase):
+ def setUp(self):
+ self.xite = Xite.xiteFrame
+ self.ed = self.xite.ed
+ self.ed.ClearAll()
+ self.ed.EmptyUndoBuffer()
+ self.ed.SetCodePage(65001)
+ # Text includes one non-BMP character
+ t = "aå\U00010348flﬔ-\n"
+ self.tv = t.encode("UTF-8")
+
+ def tearDown(self):
+ self.ed.SetCodePage(0)
+
+ def testAllocation(self):
+ self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_NONE)
+ self.ed.AllocateLineCharacterIndex(self.ed.SC_LINECHARACTERINDEX_UTF32)
+ self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_UTF32)
+ self.ed.ReleaseLineCharacterIndex(self.ed.SC_LINECHARACTERINDEX_UTF32)
+ self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_NONE)
+
+ def testUTF32(self):
+ self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_NONE)
+ self.ed.SetContents(self.tv)
+ self.ed.AllocateLineCharacterIndex(self.ed.SC_LINECHARACTERINDEX_UTF32)
+ self.assertEquals(self.ed.IndexPositionFromLine(0, self.ed.SC_LINECHARACTERINDEX_UTF32), 0)
+ self.assertEquals(self.ed.IndexPositionFromLine(1, self.ed.SC_LINECHARACTERINDEX_UTF32), 7)
+ self.ed.ReleaseLineCharacterIndex(self.ed.SC_LINECHARACTERINDEX_UTF32)
+ self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_NONE)
+
+ def testUTF16(self):
+ self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_NONE)
+ t = "aå\U00010348flﬔ-"
+ tv = t.encode("UTF-8")
+ self.ed.SetContents(self.tv)
+ self.ed.AllocateLineCharacterIndex(self.ed.SC_LINECHARACTERINDEX_UTF16)
+ self.assertEquals(self.ed.IndexPositionFromLine(0, self.ed.SC_LINECHARACTERINDEX_UTF16), 0)
+ self.assertEquals(self.ed.IndexPositionFromLine(1, self.ed.SC_LINECHARACTERINDEX_UTF16), 8)
+ self.ed.ReleaseLineCharacterIndex(self.ed.SC_LINECHARACTERINDEX_UTF16)
+ self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_NONE)
+
+ def testBoth(self):
+ # Set text before turning indices on
+ self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_NONE)
+ self.ed.SetContents(self.tv)
+ self.ed.AllocateLineCharacterIndex(self.ed.SC_LINECHARACTERINDEX_UTF32+self.ed.SC_LINECHARACTERINDEX_UTF16)
+ self.assertEquals(self.ed.IndexPositionFromLine(0, self.ed.SC_LINECHARACTERINDEX_UTF32), 0)
+ self.assertEquals(self.ed.IndexPositionFromLine(1, self.ed.SC_LINECHARACTERINDEX_UTF32), 7)
+ self.assertEquals(self.ed.IndexPositionFromLine(0, self.ed.SC_LINECHARACTERINDEX_UTF16), 0)
+ self.assertEquals(self.ed.IndexPositionFromLine(1, self.ed.SC_LINECHARACTERINDEX_UTF16), 8)
+ # Test the inverse: position->line
+ self.assertEquals(self.ed.LineFromIndexPosition(0, self.ed.SC_LINECHARACTERINDEX_UTF32), 0)
+ self.assertEquals(self.ed.LineFromIndexPosition(7, self.ed.SC_LINECHARACTERINDEX_UTF32), 1)
+ self.assertEquals(self.ed.LineFromIndexPosition(0, self.ed.SC_LINECHARACTERINDEX_UTF16), 0)
+ self.assertEquals(self.ed.LineFromIndexPosition(8, self.ed.SC_LINECHARACTERINDEX_UTF16), 1)
+ self.ed.ReleaseLineCharacterIndex(self.ed.SC_LINECHARACTERINDEX_UTF32+self.ed.SC_LINECHARACTERINDEX_UTF16)
+ self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_NONE)
+
+ def testMaintenance(self):
+ # Set text after turning indices on
+ self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_NONE)
+ self.ed.AllocateLineCharacterIndex(self.ed.SC_LINECHARACTERINDEX_UTF32+self.ed.SC_LINECHARACTERINDEX_UTF16)
+ self.ed.SetContents(self.tv)
+ self.assertEquals(self.ed.IndexPositionFromLine(0, self.ed.SC_LINECHARACTERINDEX_UTF32), 0)
+ self.assertEquals(self.ed.IndexPositionFromLine(1, self.ed.SC_LINECHARACTERINDEX_UTF32), 7)
+ self.assertEquals(self.ed.IndexPositionFromLine(0, self.ed.SC_LINECHARACTERINDEX_UTF16), 0)
+ self.assertEquals(self.ed.IndexPositionFromLine(1, self.ed.SC_LINECHARACTERINDEX_UTF16), 8)
+ self.ed.ReleaseLineCharacterIndex(self.ed.SC_LINECHARACTERINDEX_UTF32+self.ed.SC_LINECHARACTERINDEX_UTF16)
+ self.assertEquals(self.ed.GetLineCharacterIndex(), self.ed.SC_LINECHARACTERINDEX_NONE)
+
class TestCharacterNavigation(unittest.TestCase):
def setUp(self):
self.xite = Xite.xiteFrame
@@ -1677,6 +1747,31 @@ class TestCharacterNavigation(unittest.TestCase):
self.assert_(after < previous)
previous = after
+ def testRelativeNonBOM(self):
+ # \x61 \xF0\x90\x8D\x88 \xef\xac\x82 \xef\xac\x94 \x2d
+ t = "a\U00010348flﬔ-"
+ tv = t.encode("UTF-8")
+ self.ed.SetContents(tv)
+ self.assertEquals(self.ed.PositionRelative(1, 2), 8)
+ self.assertEquals(self.ed.CountCharacters(1, 8), 2)
+ self.assertEquals(self.ed.CountCodeUnits(1, 8), 3)
+ self.assertEquals(self.ed.PositionRelative(8, -2), 1)
+ self.assertEquals(self.ed.PositionRelativeCodeUnits(8, -3), 1)
+ pos = 0
+ previous = 0
+ for i in range(1, len(t)):
+ after = self.ed.PositionRelative(pos, i)
+ self.assert_(after > pos)
+ self.assert_(after > previous)
+ previous = after
+ pos = len(t)
+ previous = pos
+ for i in range(1, len(t)-1):
+ after = self.ed.PositionRelative(pos, -i)
+ self.assert_(after < pos)
+ self.assert_(after <= previous)
+ previous = after
+
def testLineEnd(self):
t = "a\r\nb\nc"
tv = t.encode("UTF-8")
diff --git a/test/unit/testCellBuffer.cxx b/test/unit/testCellBuffer.cxx
index cef88cb17..ab0b8aca9 100644
--- a/test/unit/testCellBuffer.cxx
+++ b/test/unit/testCellBuffer.cxx
@@ -9,6 +9,7 @@
#include "Platform.h"
+#include "Scintilla.h"
#include "Position.h"
#include "SplitVector.h"
#include "Partitioning.h"
@@ -144,3 +145,290 @@ TEST_CASE("CellBuffer") {
}
}
+
+TEST_CASE("CharacterIndex") {
+
+ CellBuffer cb(true, false);
+
+ SECTION("Setup") {
+ REQUIRE(cb.LineCharacterIndex() == SC_LINECHARACTERINDEX_NONE);
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 0);
+ cb.SetUTF8Substance(true);
+
+ cb.AllocateLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16);
+ REQUIRE(cb.LineCharacterIndex() == SC_LINECHARACTERINDEX_UTF16);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 0);
+
+ cb.ReleaseLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16);
+ REQUIRE(cb.LineCharacterIndex() == SC_LINECHARACTERINDEX_NONE);
+ }
+
+ SECTION("Insertion") {
+ cb.SetUTF8Substance(true);
+
+ cb.AllocateLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16 | SC_LINECHARACTERINDEX_UTF32);
+
+ bool startSequence = false;
+ cb.InsertString(0, "a", 1, startSequence);
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 1);
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 1);
+
+ const char *hwair = "\xF0\x90\x8D\x88";
+ cb.InsertString(0, hwair, strlen(hwair), startSequence);
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 3);
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 2);
+ }
+
+ SECTION("Deletion") {
+ cb.SetUTF8Substance(true);
+
+ cb.AllocateLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16 | SC_LINECHARACTERINDEX_UTF32);
+
+ bool startSequence = false;
+ const char *hwair = "a\xF0\x90\x8D\x88z";
+ cb.InsertString(0, hwair, strlen(hwair), startSequence);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 4);
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 3);
+
+ cb.DeleteChars(5, 1, startSequence);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 3);
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 2);
+
+ cb.DeleteChars(1, 4, startSequence);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 1);
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 1);
+ }
+
+ SECTION("Insert Complex") {
+ cb.SetUTF8Substance(true);
+ cb.SetLineEndTypes(1);
+ cb.AllocateLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16 | SC_LINECHARACTERINDEX_UTF32);
+
+ bool startSequence = false;
+ // 3 lines of text containing 8 bytes
+ const char *data = "a\n\xF0\x90\x8D\x88\nz";
+ cb.InsertString(0, data, strlen(data), startSequence);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 2);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 5);
+ REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 6);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 2);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 4);
+ REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF32) == 5);
+
+ // Insert a new line at end -> "a\n\xF0\x90\x8D\x88\nz\n" 4 lines
+ // Last line empty
+ cb.InsertString(strlen(data), "\n", 1, startSequence);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 2);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 5);
+ REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 7);
+ REQUIRE(cb.IndexLineStart(4, SC_LINECHARACTERINDEX_UTF16) == 7);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 2);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 4);
+ REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF32) == 6);
+ REQUIRE(cb.IndexLineStart(4, SC_LINECHARACTERINDEX_UTF32) == 6);
+
+ // Insert a new line before end -> "a\n\xF0\x90\x8D\x88\nz\n\n" 5 lines
+ cb.InsertString(strlen(data), "\n", 1, startSequence);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 2);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 5);
+ REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 7);
+ REQUIRE(cb.IndexLineStart(4, SC_LINECHARACTERINDEX_UTF16) == 8);
+ REQUIRE(cb.IndexLineStart(5, SC_LINECHARACTERINDEX_UTF16) == 8);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 2);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 4);
+ REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF32) == 6);
+ REQUIRE(cb.IndexLineStart(4, SC_LINECHARACTERINDEX_UTF32) == 7);
+ REQUIRE(cb.IndexLineStart(5, SC_LINECHARACTERINDEX_UTF32) == 7);
+
+ // Insert a valid 3-byte UTF-8 character at start ->
+ // "\xE2\x82\xACa\n\xF0\x90\x8D\x88\nz\n\n" 5 lines
+
+ const char *euro = "\xE2\x82\xAC";
+ cb.InsertString(0, euro, strlen(euro), startSequence);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 3);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 6);
+ REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 8);
+ REQUIRE(cb.IndexLineStart(4, SC_LINECHARACTERINDEX_UTF16) == 9);
+ REQUIRE(cb.IndexLineStart(5, SC_LINECHARACTERINDEX_UTF16) == 9);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 3);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 5);
+ REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF32) == 7);
+ REQUIRE(cb.IndexLineStart(4, SC_LINECHARACTERINDEX_UTF32) == 8);
+ REQUIRE(cb.IndexLineStart(5, SC_LINECHARACTERINDEX_UTF32) == 8);
+
+ // Insert a lone lead byte implying a 3 byte character at start of line 2 ->
+ // "\xE2\x82\xACa\n\EF\xF0\x90\x8D\x88\nz\n\n" 5 lines
+ // Should be treated as a single byte character
+
+ const char *lead = "\xEF";
+ cb.InsertString(5, lead, strlen(lead), startSequence);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 3);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 7);
+ REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 9);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 3);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 6);
+ REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF32) == 8);
+
+ // Insert an ASCII lead byte inside the 3-byte initial character ->
+ // "\xE2!\x82\xACa\n\EF\xF0\x90\x8D\x88\nz\n\n" 5 lines
+ // It should b treated as a single character and should cause the
+ // byte before and the 2 bytes after also be each treated as singles
+ // so 3 more characters on line 0.
+
+ const char *ascii = "!";
+ cb.InsertString(1, ascii, strlen(ascii), startSequence);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 6);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 10);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 6);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 9);
+
+ // Insert a NEL after the '!' to trigger the utf8 line end case ->
+ // "\xE2!\xC2\x85 \x82\xACa\n \EF\xF0\x90\x8D\x88\n z\n\n" 5 lines
+
+ const char *nel = "\xC2\x85";
+ cb.InsertString(2, nel, strlen(nel), startSequence);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 3);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 7);
+ REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 11);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 3);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 7);
+ REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF32) == 10);
+ }
+
+ SECTION("Delete Multiple lines") {
+ cb.SetUTF8Substance(true);
+ cb.AllocateLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16 | SC_LINECHARACTERINDEX_UTF32);
+
+ bool startSequence = false;
+ // 3 lines of text containing 8 bytes
+ const char *data = "a\n\xF0\x90\x8D\x88\nz\nc";
+ cb.InsertString(0, data, strlen(data), startSequence);
+
+ // Delete first 2 new lines -> "az\nc"
+ cb.DeleteChars(1, strlen(data) - 4, startSequence);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 3);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 4);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 3);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 4);
+ }
+
+ SECTION("Delete Complex") {
+ cb.SetUTF8Substance(true);
+ cb.AllocateLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16 | SC_LINECHARACTERINDEX_UTF32);
+
+ bool startSequence = false;
+ // 3 lines of text containing 8 bytes
+ const char *data = "a\n\xF0\x90\x8D\x88\nz";
+ cb.InsertString(0, data, strlen(data), startSequence);
+
+ // Delete lead byte from character on line 1 ->
+ // "a\n\x90\x8D\x88\nz"
+ // line 1 becomes 4 single byte characters
+ cb.DeleteChars(2, 1, startSequence);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 2);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 6);
+ REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 7);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 2);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 6);
+ REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF32) == 7);
+
+ // Delete first new line ->
+ // "a\x90\x8D\x88\nz"
+ // Only 2 lines with line 0 containing 5 single byte characters
+ cb.DeleteChars(1, 1, startSequence);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 5);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 6);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 5);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 6);
+
+ // Restore lead byte from character on line 0 making a 4-byte character ->
+ // "a\xF0\x90\x8D\x88\nz"
+
+ const char *lead4 = "\xF0";
+ cb.InsertString(1, lead4, strlen(lead4), startSequence);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 4);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 5);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF32) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF32) == 3);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF32) == 4);
+ }
+
+ SECTION("Insert separates new line bytes") {
+ cb.SetUTF8Substance(true);
+ cb.AllocateLineCharacterIndex(SC_LINECHARACTERINDEX_UTF16 | SC_LINECHARACTERINDEX_UTF32);
+
+ bool startSequence = false;
+ // 2 lines of text containing 4 bytes
+ const char *data = "a\r\nb";
+ cb.InsertString(0, data, strlen(data), startSequence);
+
+ // 3 lines of text containing 5 bytes ->
+ // "a\r!\nb"
+ const char *ascii = "!";
+ cb.InsertString(2, ascii, strlen(ascii), startSequence);
+
+ REQUIRE(cb.IndexLineStart(0, SC_LINECHARACTERINDEX_UTF16) == 0);
+ REQUIRE(cb.IndexLineStart(1, SC_LINECHARACTERINDEX_UTF16) == 2);
+ REQUIRE(cb.IndexLineStart(2, SC_LINECHARACTERINDEX_UTF16) == 4);
+ REQUIRE(cb.IndexLineStart(3, SC_LINECHARACTERINDEX_UTF16) == 5);
+ }
+}