aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorNeil <nyamatongwe@gmail.com>2018-07-10 15:06:50 +1000
committerNeil <nyamatongwe@gmail.com>2018-07-10 15:06:50 +1000
commit56e20ea0283d8018dee48d736ba9dfef3c84dc3f (patch)
tree21bdb500dfc092fadecb123b87e9799a2c46f6a9 /src
parentd27cbe587930d13d3f1802b271d0d13e7e3c6e38 (diff)
downloadscintilla-mirror-56e20ea0283d8018dee48d736ba9dfef3c84dc3f.tar.gz
Optional indexing of line starts in UTF-8 documents by UTF-32 code points and
UTF-16 code units added.
Diffstat (limited to 'src')
-rw-r--r--src/CellBuffer.cxx334
-rw-r--r--src/CellBuffer.h9
-rw-r--r--src/Document.cxx22
-rw-r--r--src/Document.h5
-rw-r--r--src/Editor.cxx26
-rw-r--r--src/UniConversion.cxx16
-rw-r--r--src/UniConversion.h4
7 files changed, 410 insertions, 6 deletions
diff --git a/src/CellBuffer.cxx b/src/CellBuffer.cxx
index e8c385f1f..ffe5fe8b3 100644
--- a/src/CellBuffer.cxx
+++ b/src/CellBuffer.cxx
@@ -7,6 +7,7 @@
#include <cstddef>
#include <cstdlib>
+#include <cassert>
#include <cstring>
#include <cstdio>
#include <cstdarg>
@@ -28,17 +29,53 @@
namespace Scintilla {
+struct CountWidths {
+ // Measures the number of characters in a string divided into those
+ // from the Base Multilingual Plane and those from other planes.
+ Sci::Position countBasePlane;
+ Sci::Position countOtherPlanes;
+ CountWidths(Sci::Position countBasePlane_=0, Sci::Position countOtherPlanes_=0) noexcept :
+ countBasePlane(countBasePlane_),
+ countOtherPlanes(countOtherPlanes_) {
+ }
+ CountWidths operator-() const noexcept {
+ return CountWidths(-countBasePlane , -countOtherPlanes);
+ }
+ Sci::Position WidthUTF32() const noexcept {
+ // All code points take one code unit in UTF-32.
+ return countBasePlane + countOtherPlanes;
+ }
+ Sci::Position WidthUTF16() const noexcept {
+ // UTF-16 takes 2 code units for other planes
+ return countBasePlane + 2 * countOtherPlanes;
+ }
+ void CountChar(int lenChar) noexcept {
+ if (lenChar == 4) {
+ countOtherPlanes++;
+ } else {
+ countBasePlane++;
+ }
+ }
+};
+
class ILineVector {
public:
virtual void Init() = 0;
virtual void SetPerLine(PerLine *pl) = 0;
virtual void InsertText(Sci::Line line, Sci::Position delta) = 0;
virtual void InsertLine(Sci::Line line, Sci::Position position, bool lineStart) = 0;
- virtual void SetLineStart(Sci::Line line, Sci::Position position) = 0;
+ virtual void SetLineStart(Sci::Line line, Sci::Position position) noexcept = 0;
virtual void RemoveLine(Sci::Line line) = 0;
virtual Sci::Line Lines() const noexcept = 0;
virtual Sci::Line LineFromPosition(Sci::Position pos) const noexcept = 0;
virtual Sci::Position LineStart(Sci::Line line) const noexcept = 0;
+ virtual void InsertCharacters(Sci::Line line, CountWidths delta) = 0;
+ virtual void SetLineCharactersWidth(Sci::Line line, CountWidths width) = 0;
+ virtual int LineCharacterIndex() const noexcept = 0;
+ virtual bool AllocateLineCharacterIndex(int lineCharacterIndex, Sci::Line lines) = 0;
+ virtual bool ReleaseLineCharacterIndex(int lineCharacterIndex) = 0;
+ virtual Sci::Position IndexLineStart(Sci::Line line, int lineCharacterIndex) const noexcept = 0;
+ virtual Sci::Line LineFromPositionIndex(Sci::Position pos, int lineCharacterIndex) const noexcept = 0;
virtual ~ILineVector() {}
};
@@ -47,9 +84,58 @@ public:
using namespace Scintilla;
template <typename POS>
+class LineStartIndex {
+public:
+ int refCount;
+ Partitioning<POS> starts;
+
+ LineStartIndex() : refCount(0), starts(4) {
+ // Minimal initial allocation
+ }
+ // Deleted so LineStartIndex objects can not be copied.
+ LineStartIndex(const LineStartIndex &) = delete;
+ LineStartIndex(LineStartIndex &&) = delete;
+ void operator=(const LineStartIndex &) = delete;
+ void operator=(LineStartIndex &&) = delete;
+ virtual ~LineStartIndex() {
+ starts.DeleteAll();
+ }
+ bool Allocate(Sci::Line lines) {
+ refCount++;
+ Sci::Position length = starts.PositionFromPartition(starts.Partitions());
+ for (Sci::Line line = starts.Partitions(); line < lines; line++) {
+ // Produce an ascending sequence that will be filled in with correct widths later
+ length++;
+ starts.InsertPartition(static_cast<POS>(line), static_cast<POS>(length));
+ }
+ return refCount == 1;
+ }
+ bool Release() {
+ if (refCount == 1) {
+ starts.DeleteAll();
+ }
+ refCount--;
+ return refCount == 0;
+ }
+ bool Active() const noexcept {
+ return refCount > 0;
+ }
+ Sci::Position LineWidth(Sci::Line line) const noexcept {
+ return starts.PositionFromPartition(static_cast<POS>(line) + 1) -
+ starts.PositionFromPartition(static_cast<POS>(line));
+ }
+ void SetLineWidth(Sci::Line line, Sci::Position width) {
+ const Sci::Position widthCurrent = LineWidth(line);
+ starts.InsertText(static_cast<POS>(line), static_cast<POS>(width - widthCurrent));
+ }
+};
+
+template <typename POS>
class LineVector : public ILineVector {
Partitioning<POS> starts;
PerLine *perLine;
+ LineStartIndex<POS> startsUTF16;
+ LineStartIndex<POS> startsUTF32;
public:
LineVector() : starts(256), perLine(0) {
Init();
@@ -66,7 +152,9 @@ public:
if (perLine) {
perLine->Init();
}
- }
+ startsUTF32.starts.DeleteAll();
+ startsUTF16.starts.DeleteAll();
+ }
void SetPerLine(PerLine *pl) override {
perLine = pl;
}
@@ -74,18 +162,33 @@ public:
starts.InsertText(static_cast<POS>(line), static_cast<POS>(delta));
}
void InsertLine(Sci::Line line, Sci::Position position, bool lineStart) override {
- starts.InsertPartition(static_cast<POS>(line), static_cast<POS>(position));
+ const POS lineAsPos = static_cast<POS>(line);
+ starts.InsertPartition(lineAsPos, static_cast<POS>(position));
+ if (startsUTF32.Active()) {
+ startsUTF32.starts.InsertPartition(lineAsPos,
+ static_cast<POS>(startsUTF32.starts.PositionFromPartition(lineAsPos - 1) + 1));
+ }
+ if (startsUTF16.Active()) {
+ startsUTF16.starts.InsertPartition(lineAsPos,
+ static_cast<POS>(startsUTF16.starts.PositionFromPartition(lineAsPos - 1) + 1));
+ }
if (perLine) {
if ((line > 0) && lineStart)
line--;
perLine->InsertLine(line);
}
}
- void SetLineStart(Sci::Line line, Sci::Position position) override {
+ void SetLineStart(Sci::Line line, Sci::Position position) noexcept override {
starts.SetPartitionStartPosition(static_cast<POS>(line), static_cast<POS>(position));
}
void RemoveLine(Sci::Line line) override {
starts.RemovePartition(static_cast<POS>(line));
+ if (startsUTF32.Active()) {
+ startsUTF32.starts.RemovePartition(static_cast<POS>(line));
+ }
+ if (startsUTF16.Active()) {
+ startsUTF16.starts.RemovePartition(static_cast<POS>(line));
+ }
if (perLine) {
perLine->RemoveLine(line);
}
@@ -99,6 +202,71 @@ public:
Sci::Position LineStart(Sci::Line line) const noexcept override {
return starts.PositionFromPartition(static_cast<POS>(line));
}
+ void InsertCharacters(Sci::Line line, CountWidths delta) override {
+ if (startsUTF32.Active()) {
+ startsUTF32.starts.InsertText(static_cast<POS>(line), static_cast<POS>(delta.WidthUTF32()));
+ }
+ if (startsUTF16.Active()) {
+ startsUTF16.starts.InsertText(static_cast<POS>(line), static_cast<POS>(delta.WidthUTF16()));
+ }
+ }
+ void SetLineCharactersWidth(Sci::Line line, CountWidths width) override {
+ if (startsUTF32.Active()) {
+ assert(startsUTF32.starts.Partitions() == starts.Partitions());
+ startsUTF32.SetLineWidth(line, width.WidthUTF32());
+ }
+ if (startsUTF16.Active()) {
+ assert(startsUTF16.starts.Partitions() == starts.Partitions());
+ startsUTF16.SetLineWidth(line, width.WidthUTF16());
+ }
+ }
+
+ int LineCharacterIndex() const noexcept override {
+ int retVal = 0;
+ if (startsUTF32.Active()) {
+ retVal |= SC_LINECHARACTERINDEX_UTF32;
+ }
+ if (startsUTF16.Active()) {
+ retVal |= SC_LINECHARACTERINDEX_UTF16;
+ }
+ return retVal;
+ }
+ bool AllocateLineCharacterIndex(int lineCharacterIndex, Sci::Line lines) override {
+ bool changed = false;
+ if ((lineCharacterIndex & SC_LINECHARACTERINDEX_UTF32) != 0) {
+ changed = startsUTF32.Allocate(lines) || changed;
+ assert(startsUTF32.starts.Partitions() == starts.Partitions());
+ }
+ if ((lineCharacterIndex & SC_LINECHARACTERINDEX_UTF16) != 0) {
+ changed = startsUTF16.Allocate(lines) || changed;
+ assert(startsUTF16.starts.Partitions() == starts.Partitions());
+ }
+ return changed;
+ }
+ bool ReleaseLineCharacterIndex(int lineCharacterIndex) override {
+ bool changed = false;
+ if ((lineCharacterIndex & SC_LINECHARACTERINDEX_UTF32) != 0) {
+ changed = startsUTF32.Release() || changed;
+ }
+ if ((lineCharacterIndex & SC_LINECHARACTERINDEX_UTF16) != 0) {
+ changed = startsUTF16.Release() || changed;
+ }
+ return changed;
+ }
+ Sci::Position IndexLineStart(Sci::Line line, int lineCharacterIndex) const noexcept override {
+ if (lineCharacterIndex == SC_LINECHARACTERINDEX_UTF32) {
+ return startsUTF32.starts.PositionFromPartition(static_cast<POS>(line));
+ } else {
+ return startsUTF16.starts.PositionFromPartition(static_cast<POS>(line));
+ }
+ }
+ Sci::Line LineFromPositionIndex(Sci::Position pos, int lineCharacterIndex) const noexcept override {
+ if (lineCharacterIndex == SC_LINECHARACTERINDEX_UTF32) {
+ return static_cast<Sci::Line>(startsUTF32.starts.PartitionFromPosition(static_cast<POS>(pos)));
+ } else {
+ return static_cast<Sci::Line>(startsUTF16.starts.PartitionFromPosition(static_cast<POS>(pos)));
+ }
+ }
};
Action::Action() {
@@ -364,6 +532,7 @@ void UndoHistory::CompletedRedoStep() {
CellBuffer::CellBuffer(bool hasStyles_, bool largeDocument_) :
hasStyles(hasStyles_), largeDocument(largeDocument_) {
readOnly = false;
+ utf8Substance = false;
utf8LineEnds = 0;
collectingUndo = true;
if (largeDocument)
@@ -505,10 +674,19 @@ void CellBuffer::Allocate(Sci::Position newSize) {
}
}
+void CellBuffer::SetUTF8Substance(bool utf8Substance_) {
+ if (utf8Substance != utf8Substance_) {
+ utf8Substance = utf8Substance_;
+ ResetLineEnds();
+ }
+}
+
void CellBuffer::SetLineEndTypes(int utf8LineEnds_) {
if (utf8LineEnds != utf8LineEnds_) {
+ const int indexes = plv->LineCharacterIndex();
utf8LineEnds = utf8LineEnds_;
ResetLineEnds();
+ AllocateLineCharacterIndex(indexes);
}
}
@@ -535,6 +713,23 @@ void CellBuffer::SetPerLine(PerLine *pl) {
plv->SetPerLine(pl);
}
+int CellBuffer::LineCharacterIndex() const noexcept {
+ return plv->LineCharacterIndex();
+}
+
+void CellBuffer::AllocateLineCharacterIndex(int lineCharacterIndex) {
+ if (utf8Substance) {
+ if (plv->AllocateLineCharacterIndex(lineCharacterIndex, Lines())) {
+ // Changed so recalculate whole file
+ RecalculateIndexLineStarts(0, Lines() - 1);
+ }
+ }
+}
+
+void CellBuffer::ReleaseLineCharacterIndex(int lineCharacterIndex) {
+ plv->ReleaseLineCharacterIndex(lineCharacterIndex);
+}
+
Sci::Line CellBuffer::Lines() const noexcept {
return plv->Lines();
}
@@ -552,6 +747,14 @@ Sci::Line CellBuffer::LineFromPosition(Sci::Position pos) const noexcept {
return plv->LineFromPosition(pos);
}
+Sci::Position CellBuffer::IndexLineStart(Sci::Line line, int lineCharacterIndex) const noexcept {
+ return plv->IndexLineStart(line, lineCharacterIndex);
+}
+
+Sci::Line CellBuffer::LineFromPositionIndex(Sci::Position pos, int lineCharacterIndex) const noexcept {
+ return plv->LineFromPositionIndex(pos, lineCharacterIndex);
+}
+
bool CellBuffer::IsReadOnly() const {
return readOnly;
}
@@ -612,6 +815,37 @@ bool CellBuffer::UTF8LineEndOverlaps(Sci::Position position) const {
return UTF8IsSeparator(bytes) || UTF8IsSeparator(bytes+1) || UTF8IsNEL(bytes+1);
}
+bool CellBuffer::UTF8IsCharacterBoundary(Sci::Position position) const {
+ assert(position >= 0 && position <= Length());
+ if (position > 0) {
+ std::string back;
+ for (int i = 0; i < UTF8MaxBytes; i++) {
+ const Sci::Position posBack = position - i;
+ if (posBack < 0) {
+ return false;
+ }
+ back.insert(0, 1, substance.ValueAt(posBack));
+ if (!UTF8IsTrailByte(back.front())) {
+ if (i > 0) {
+ // Have reached a non-trail
+ const int cla = UTF8Classify(back);
+ if ((cla & UTF8MaskInvalid) || (cla != i)) {
+ return false;
+ }
+ }
+ break;
+ }
+ }
+ }
+ if (position < Length()) {
+ const unsigned char fore = substance.ValueAt(position);
+ if (UTF8IsTrailByte(fore)) {
+ return false;
+ }
+ }
+ return true;
+}
+
void CellBuffer::ResetLineEnds() {
// Reinitialize line data -- too much work to preserve
plv->Init();
@@ -648,6 +882,38 @@ void CellBuffer::ResetLineEnds() {
}
}
+namespace {
+
+CountWidths CountCharacterWidthsUTF8(std::string_view sv) noexcept {
+ CountWidths cw;
+ size_t remaining = sv.length();
+ while (remaining > 0) {
+ const int utf8Status = UTF8Classify(sv);
+ const int lenChar = utf8Status & UTF8MaskWidth;
+ cw.CountChar(lenChar);
+ sv.remove_prefix(lenChar);
+ remaining -= lenChar;
+ }
+ return cw;
+}
+
+}
+
+void CellBuffer::RecalculateIndexLineStarts(Sci::Line lineFirst, Sci::Line lineLast) {
+ std::string text;
+ Sci::Position posLineEnd = LineStart(lineFirst);
+ for (Sci::Line line = lineFirst; line <= lineLast; line++) {
+ // Find line start and end, retrieve text of line, count characters and update line width
+ const Sci::Position posLineStart = posLineEnd;
+ posLineEnd = LineStart(line+1);
+ const Sci::Position width = posLineEnd - posLineStart;
+ text.resize(width);
+ GetCharRange(text.data(), posLineStart, width);
+ const CountWidths cw = CountCharacterWidthsUTF8(text);
+ plv->SetLineCharactersWidth(line, cw);
+ }
+}
+
void CellBuffer::BasicInsertString(Sci::Position position, const char *s, Sci::Position insertLength) {
if (insertLength == 0)
return;
@@ -659,12 +925,25 @@ void CellBuffer::BasicInsertString(Sci::Position position, const char *s, Sci::P
breakingUTF8LineEnd = UTF8LineEndOverlaps(position);
}
+ const Sci::Line linePosition = plv->LineFromPosition(position);
+ Sci::Line lineInsert = linePosition + 1;
+
+ // A simple insertion is one that inserts valid text on a single line at a character boundary
+ bool simpleInsertion = false;
+
+ // Check for breaking apart a UTF-8 sequence and inserting invalid UTF-8
+ if (utf8Substance && (plv->LineCharacterIndex() != SC_LINECHARACTERINDEX_NONE)) {
+ // Actually, don't need to check that whole insertion is valid just that there
+ // are no potential fragments at ends.
+ simpleInsertion = UTF8IsCharacterBoundary(position) &&
+ UTF8IsValid(std::string_view(s, insertLength));
+ }
+
substance.InsertFromArray(position, s, 0, insertLength);
if (hasStyles) {
style.InsertValue(position, insertLength, 0);
}
- Sci::Line lineInsert = plv->LineFromPosition(position) + 1;
const bool atLineStart = plv->LineStart(lineInsert-1) == position;
// Point all the lines after the insertion point further along in the buffer
plv->InsertText(lineInsert-1, insertLength);
@@ -684,6 +963,7 @@ void CellBuffer::BasicInsertString(Sci::Position position, const char *s, Sci::P
if (ch == '\r') {
InsertLine(lineInsert, (position + i) + 1, atLineStart);
lineInsert++;
+ simpleInsertion = false;
} else if (ch == '\n') {
if (chPrev == '\r') {
// Patch up what was end of line
@@ -692,11 +972,13 @@ void CellBuffer::BasicInsertString(Sci::Position position, const char *s, Sci::P
InsertLine(lineInsert, (position + i) + 1, atLineStart);
lineInsert++;
}
+ simpleInsertion = false;
} else if (utf8LineEnds) {
const unsigned char back3[3] = {chBeforePrev, chPrev, ch};
if (UTF8IsSeparator(back3) || UTF8IsNEL(back3+1)) {
InsertLine(lineInsert, (position + i) + 1, atLineStart);
lineInsert++;
+ simpleInsertion = false;
}
}
chBeforePrev = chPrev;
@@ -707,6 +989,7 @@ void CellBuffer::BasicInsertString(Sci::Position position, const char *s, Sci::P
if (ch == '\r') {
// End of line already in buffer so drop the newly created one
RemoveLine(lineInsert - 1);
+ simpleInsertion = false;
}
} else if (utf8LineEnds && !UTF8IsAscii(chAfter)) {
// May have end of UTF-8 line end in buffer and start in insertion
@@ -716,21 +999,31 @@ void CellBuffer::BasicInsertString(Sci::Position position, const char *s, Sci::P
if (UTF8IsSeparator(back3)) {
InsertLine(lineInsert, (position + insertLength + j) + 1, atLineStart);
lineInsert++;
+ simpleInsertion = false;
}
if ((j == 0) && UTF8IsNEL(back3+1)) {
InsertLine(lineInsert, (position + insertLength + j) + 1, atLineStart);
lineInsert++;
+ simpleInsertion = false;
}
chBeforePrev = chPrev;
chPrev = chAt;
}
}
+ if (simpleInsertion) {
+ const CountWidths cw = CountCharacterWidthsUTF8(std::string_view(s, insertLength));
+ plv->InsertCharacters(linePosition, cw);
+ } else {
+ RecalculateIndexLineStarts(linePosition, lineInsert - 1);
+ }
}
void CellBuffer::BasicDeleteChars(Sci::Position position, Sci::Position deleteLength) {
if (deleteLength == 0)
return;
+ Sci::Line lineRecalculateStart = INVALID_POSITION;
+
if ((position == 0) && (deleteLength == substance.Length())) {
// If whole buffer is being deleted, faster to reinitialise lines data
// than to delete each line.
@@ -739,11 +1032,37 @@ void CellBuffer::BasicDeleteChars(Sci::Position position, Sci::Position deleteLe
// Have to fix up line positions before doing deletion as looking at text in buffer
// to work out which lines have been removed
- Sci::Line lineRemove = plv->LineFromPosition(position) + 1;
+ const Sci::Line linePosition = plv->LineFromPosition(position);
+ Sci::Line lineRemove = linePosition + 1;
+
plv->InsertText(lineRemove-1, - (deleteLength));
const unsigned char chPrev = substance.ValueAt(position - 1);
const unsigned char chBefore = chPrev;
unsigned char chNext = substance.ValueAt(position);
+
+ // Check for breaking apart a UTF-8 sequence
+ // Needs further checks that text is UTF-8 or that some other break apart is occurring
+ if (utf8Substance && (plv->LineCharacterIndex() != SC_LINECHARACTERINDEX_NONE)) {
+ const Sci::Position posEnd = position + deleteLength;
+ const Sci::Line lineEndRemove = plv->LineFromPosition(posEnd);
+ const bool simpleDeletion =
+ (linePosition == lineEndRemove) &&
+ UTF8IsCharacterBoundary(position) && UTF8IsCharacterBoundary(posEnd);
+ if (simpleDeletion) {
+ std::string text(deleteLength, '\0');
+ GetCharRange(text.data(), position, deleteLength);
+ if (UTF8IsValid(text)) {
+ // Everything is good
+ const CountWidths cw = CountCharacterWidthsUTF8(text);
+ plv->InsertCharacters(linePosition, -cw);
+ } else {
+ lineRecalculateStart = linePosition;
+ }
+ } else {
+ lineRecalculateStart = linePosition;
+ }
+ }
+
bool ignoreNL = false;
if (chPrev == '\r' && chNext == '\n') {
// Move back one
@@ -792,6 +1111,9 @@ void CellBuffer::BasicDeleteChars(Sci::Position position, Sci::Position deleteLe
}
}
substance.DeleteRange(position, deleteLength);
+ if (lineRecalculateStart >= 0) {
+ RecalculateIndexLineStarts(lineRecalculateStart, lineRecalculateStart);
+ }
if (hasStyles) {
style.DeleteRange(position, deleteLength);
}
diff --git a/src/CellBuffer.h b/src/CellBuffer.h
index f360b2a23..b9f2406f1 100644
--- a/src/CellBuffer.h
+++ b/src/CellBuffer.h
@@ -113,6 +113,7 @@ private:
SplitVector<char> substance;
SplitVector<char> style;
bool readOnly;
+ bool utf8Substance;
int utf8LineEnds;
bool collectingUndo;
@@ -121,7 +122,9 @@ private:
std::unique_ptr<ILineVector> plv;
bool UTF8LineEndOverlaps(Sci::Position position) const;
+ bool UTF8IsCharacterBoundary(Sci::Position position) const;
void ResetLineEnds();
+ void RecalculateIndexLineStarts(Sci::Line lineFirst, Sci::Line lineLast);
/// Actions without undo
void BasicInsertString(Sci::Position position, const char *s, Sci::Position insertLength);
void BasicDeleteChars(Sci::Position position, Sci::Position deleteLength);
@@ -148,13 +151,19 @@ public:
Sci::Position Length() const noexcept;
void Allocate(Sci::Position newSize);
+ void SetUTF8Substance(bool utf8Substance_);
int GetLineEndTypes() const { return utf8LineEnds; }
void SetLineEndTypes(int utf8LineEnds_);
bool ContainsLineEnd(const char *s, Sci::Position length) const;
void SetPerLine(PerLine *pl);
+ int LineCharacterIndex() const noexcept;
+ void AllocateLineCharacterIndex(int lineCharacterIndex);
+ void ReleaseLineCharacterIndex(int lineCharacterIndex);
Sci::Line Lines() const noexcept;
Sci::Position LineStart(Sci::Line line) const noexcept;
+ Sci::Position IndexLineStart(Sci::Line line, int lineCharacterIndex) const noexcept;
Sci::Line LineFromPosition(Sci::Position pos) const noexcept;
+ Sci::Line LineFromPositionIndex(Sci::Position pos, int lineCharacterIndex) const noexcept;
void InsertLine(Sci::Line line, Sci::Position position, bool lineStart);
void RemoveLine(Sci::Line line);
const char *InsertString(Sci::Position position, const char *s, Sci::Position insertLength, bool &startSequence);
diff --git a/src/Document.cxx b/src/Document.cxx
index f3d8557ac..e53663f3e 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -119,6 +119,7 @@ Document::Document(int options) :
decorations = DecorationListCreate(IsLarge());
cb.SetPerLine(this);
+ cb.SetUTF8Substance(SC_CP_UTF8 == dbcsCodePage);
}
Document::~Document() {
@@ -194,6 +195,7 @@ bool Document::SetDBCSCodePage(int dbcsCodePage_) {
dbcsCodePage = dbcsCodePage_;
SetCaseFolder(nullptr);
cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported());
+ cb.SetUTF8Substance(SC_CP_UTF8 == dbcsCodePage);
return true;
} else {
return false;
@@ -420,6 +422,14 @@ Sci::Position Document::VCHomePosition(Sci::Position position) const {
return startText;
}
+Sci::Position Document::IndexLineStart(Sci::Line line, int lineCharacterIndex) const {
+ return cb.IndexLineStart(line, lineCharacterIndex);
+}
+
+Sci::Line Document::LineFromPositionIndex(Sci::Position pos, int lineCharacterIndex) const {
+ return cb.LineFromPositionIndex(pos, lineCharacterIndex);
+}
+
int SCI_METHOD Document::SetLevel(Sci_Position line, int level) {
const int prev = Levels()->SetLevel(static_cast<Sci::Line>(line), level, LinesTotal());
if (prev != level) {
@@ -2105,6 +2115,18 @@ const char *Document::SubstituteByPosition(const char *text, Sci::Position *leng
return 0;
}
+int Document::LineCharacterIndex() const {
+ return cb.LineCharacterIndex();
+}
+
+void Document::AllocateLineCharacterIndex(int lineCharacterIndex) {
+ return cb.AllocateLineCharacterIndex(lineCharacterIndex);
+}
+
+void Document::ReleaseLineCharacterIndex(int lineCharacterIndex) {
+ return cb.ReleaseLineCharacterIndex(lineCharacterIndex);
+}
+
Sci::Line Document::LinesTotal() const noexcept {
return cb.Lines();
}
diff --git a/src/Document.h b/src/Document.h
index e1613cb20..0ef967e09 100644
--- a/src/Document.h
+++ b/src/Document.h
@@ -389,6 +389,8 @@ public:
bool IsLineEndPosition(Sci::Position position) const;
bool IsPositionInLineEnd(Sci::Position position) const;
Sci::Position VCHomePosition(Sci::Position position) const;
+ Sci::Position IndexLineStart(Sci::Line line, int lineCharacterIndex) const;
+ Sci::Line LineFromPositionIndex(Sci::Position pos, int lineCharacterIndex) const;
int SCI_METHOD SetLevel(Sci_Position line, int level) override;
int SCI_METHOD GetLevel(Sci_Position line) const override;
@@ -414,6 +416,9 @@ public:
void SetCaseFolder(CaseFolder *pcf_);
Sci::Position FindText(Sci::Position minPos, Sci::Position maxPos, const char *search, int flags, Sci::Position *length);
const char *SubstituteByPosition(const char *text, Sci::Position *length);
+ int LineCharacterIndex() const;
+ void AllocateLineCharacterIndex(int lineCharacterIndex);
+ void ReleaseLineCharacterIndex(int lineCharacterIndex);
Sci::Line LinesTotal() const noexcept;
void SetDefaultCharClasses(bool includeWordClass);
diff --git a/src/Editor.cxx b/src/Editor.cxx
index 86c0536a1..3093e6c57 100644
--- a/src/Editor.cxx
+++ b/src/Editor.cxx
@@ -6020,6 +6020,11 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) {
static_cast<Sci::Position>(wParam), lParam),
0, pdoc->Length());
+ case SCI_POSITIONRELATIVECODEUNITS:
+ return std::clamp<Sci::Position>(pdoc->GetRelativePositionUTF16(
+ static_cast<Sci::Position>(wParam), lParam),
+ 0, pdoc->Length());
+
case SCI_LINESCROLL:
ScrollTo(topLine + static_cast<Sci::Line>(lParam));
HorizontalScrollTo(xOffset + static_cast<int>(wParam) * static_cast<int>(vs.spaceWidth));
@@ -6785,6 +6790,23 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) {
case SCI_GETBIDIRECTIONAL:
return static_cast<sptr_t>(bidirectional);
+ case SCI_GETLINECHARACTERINDEX:
+ return pdoc->LineCharacterIndex();
+
+ case SCI_ALLOCATELINECHARACTERINDEX:
+ pdoc->AllocateLineCharacterIndex(static_cast<int>(wParam));
+ break;
+
+ case SCI_RELEASELINECHARACTERINDEX:
+ pdoc->ReleaseLineCharacterIndex(static_cast<int>(wParam));
+ break;
+
+ case SCI_LINEFROMINDEXPOSITION:
+ return pdoc->LineFromPositionIndex(static_cast<Sci::Position>(wParam), static_cast<int>(lParam));
+
+ case SCI_INDEXPOSITIONFROMLINE:
+ return pdoc->IndexLineStart(static_cast<Sci::Line>(wParam), static_cast<int>(lParam));
+
// Marker definition and setting
case SCI_MARKERDEFINE:
if (wParam <= MARKER_MAX) {
@@ -8190,6 +8212,10 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) {
case SCI_COUNTCHARACTERS:
return pdoc->CountCharacters(static_cast<Sci::Position>(wParam), lParam);
+ //return pdoc->CountCharacters(static_cast<Sci::Position>(wParam), static_cast<Sci::Position>(lParam));
+
+ case SCI_COUNTCODEUNITS:
+ return pdoc->CountUTF16(static_cast<Sci::Position>(wParam), lParam);
default:
return DefWndProc(iMessage, wParam, lParam);
diff --git a/src/UniConversion.cxx b/src/UniConversion.cxx
index 3b7472638..58475687b 100644
--- a/src/UniConversion.cxx
+++ b/src/UniConversion.cxx
@@ -340,6 +340,22 @@ int UTF8DrawBytes(const unsigned char *us, int len) noexcept {
return (utf8StatusNext & UTF8MaskInvalid) ? 1 : (utf8StatusNext & UTF8MaskWidth);
}
+bool UTF8IsValid(std::string_view sv) noexcept {
+ const unsigned char *us = reinterpret_cast<const unsigned char *>(sv.data());
+ size_t remaining = sv.length();
+ while (remaining > 0) {
+ const int utf8Status = UTF8Classify(us, remaining);
+ if (utf8Status & UTF8MaskInvalid) {
+ return false;
+ } else {
+ const int lenChar = utf8Status & UTF8MaskWidth;
+ us += lenChar;
+ remaining -= lenChar;
+ }
+ }
+ return remaining == 0;
+}
+
// Replace invalid bytes in UTF-8 with the replacement character
std::string FixInvalidUTF8(const std::string &text) {
std::string result;
diff --git a/src/UniConversion.h b/src/UniConversion.h
index 6d257cd8e..c676230da 100644
--- a/src/UniConversion.h
+++ b/src/UniConversion.h
@@ -22,6 +22,7 @@ size_t UTF16Length(std::string_view sv);
size_t UTF16FromUTF8(std::string_view sv, wchar_t *tbuf, size_t tlen);
size_t UTF32FromUTF8(std::string_view sv, unsigned int *tbuf, size_t tlen);
unsigned int UTF16FromUTF32Character(unsigned int val, wchar_t *tbuf) noexcept;
+bool UTF8IsValid(std::string_view sv) noexcept;
std::string FixInvalidUTF8(const std::string &text);
extern const unsigned char UTF8BytesOfLead[256];
@@ -49,6 +50,9 @@ inline constexpr bool UTF8IsAscii(int ch) noexcept {
enum { UTF8MaskWidth=0x7, UTF8MaskInvalid=0x8 };
int UTF8Classify(const unsigned char *us, size_t len) noexcept;
+inline int UTF8Classify(std::string_view sv) noexcept {
+ return UTF8Classify(reinterpret_cast<const unsigned char *>(sv.data()), sv.length());
+}
// Similar to UTF8Classify but returns a length of 1 for invalid bytes
// instead of setting the invalid flag