// Scintilla source code edit control /** @file Document.cxx ** Text document that handles notifications, DBCS, styling, words and end of line. **/ // Copyright 1998-2011 by Neil Hodgson // The License.txt file describes the conditions under which this software may be distributed. #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef NO_CXX11_REGEX #include #endif #include "ScintillaTypes.h" #include "ILoader.h" #include "ILexer.h" #include "Debugging.h" #include "CharacterType.h" #include "CharacterCategoryMap.h" #include "Position.h" #include "SplitVector.h" #include "Partitioning.h" #include "RunStyles.h" #include "CellBuffer.h" #include "PerLine.h" #include "CharClassify.h" #include "Decoration.h" #include "CaseFolder.h" #include "Document.h" #include "RESearch.h" #include "UniConversion.h" #include "ElapsedPeriod.h" using namespace Scintilla; using namespace Scintilla::Internal; LexInterface::LexInterface(Document *pdoc_) noexcept : pdoc(pdoc_), performingStyle(false) { } LexInterface::~LexInterface() noexcept = default; void LexInterface::SetInstance(ILexer5 *instance_) noexcept { instance.reset(instance_); } void LexInterface::Colourise(Sci::Position start, Sci::Position end) { if (pdoc && instance && !performingStyle) { // Protect against reentrance, which may occur, for example, when // fold points are discovered while performing styling and the folding // code looks for child lines which may trigger styling. performingStyle = true; const Sci::Position lengthDoc = pdoc->Length(); if (end == -1) end = lengthDoc; const Sci::Position len = end - start; PLATFORM_ASSERT(len >= 0); PLATFORM_ASSERT(start + len <= lengthDoc); int styleStart = 0; if (start > 0) styleStart = pdoc->StyleAt(start - 1); if (len > 0) { instance->Lex(start, len, styleStart, pdoc); instance->Fold(start, len, styleStart, pdoc); } performingStyle = false; } } LineEndType LexInterface::LineEndTypesSupported() { if (instance) { return static_cast(instance->LineEndTypesSupported()); } return LineEndType::Default; } bool LexInterface::UseContainerLexing() const noexcept { return !instance; } ActionDuration::ActionDuration(double duration_, double minDuration_, double maxDuration_) noexcept : duration(duration_), minDuration(minDuration_), maxDuration(maxDuration_) { } void ActionDuration::AddSample(size_t numberActions, double durationOfActions) noexcept { // Only adjust for multiple actions to avoid instability if (numberActions < 8) return; // Alpha value for exponential smoothing. // Most recent value contributes 25% to smoothed value. constexpr double alpha = 0.25; const double durationOne = durationOfActions / numberActions; duration = std::clamp(alpha * durationOne + (1.0 - alpha) * duration, minDuration, maxDuration); } double ActionDuration::Duration() const noexcept { return duration; } size_t ActionDuration::ActionsInAllowedTime(double secondsAllowed) const noexcept { return std::lround(secondsAllowed / Duration()); } CharacterExtracted::CharacterExtracted(const unsigned char *charBytes, size_t widthCharBytes) noexcept { const int utf8status = UTF8Classify(charBytes, widthCharBytes); if (utf8status & UTF8MaskInvalid) { // Treat as invalid and use up just one byte character = unicodeReplacementChar; widthBytes = 1; } else { character = UnicodeFromUTF8(charBytes); widthBytes = utf8status & UTF8MaskWidth; } } Document::Document(DocumentOption options) : cb(!FlagSet(options, DocumentOption::StylesNone), FlagSet(options, DocumentOption::TextLarge)), durationStyleOneByte(0.000001, 0.0000001, 0.00001) { refCount = 0; #ifdef _WIN32 eolMode = EndOfLine::CrLf; #else eolMode = EndOfLine::Lf; #endif dbcsCodePage = CpUtf8; lineEndBitSet = LineEndType::Default; endStyled = 0; styleClock = 0; enteredModification = 0; enteredStyling = 0; enteredReadOnlyCount = 0; insertionSet = false; tabInChars = 8; indentInChars = 0; actualIndentInChars = 8; useTabs = true; tabIndents = true; backspaceUnindents = false; matchesValid = false; perLineData[ldMarkers] = std::make_unique(); perLineData[ldLevels] = std::make_unique(); perLineData[ldState] = std::make_unique(); perLineData[ldMargin] = std::make_unique(); perLineData[ldAnnotation] = std::make_unique(); perLineData[ldEOLAnnotation] = std::make_unique(); decorations = DecorationListCreate(IsLarge()); cb.SetPerLine(this); cb.SetUTF8Substance(CpUtf8 == dbcsCodePage); } Document::~Document() { for (const WatcherWithUserData &watcher : watchers) { watcher.watcher->NotifyDeleted(this, watcher.userData); } } // Increase reference count and return its previous value. int Document::AddRef() { return refCount++; } // Decrease reference count and return its previous value. // Delete the document if reference count reaches zero. int SCI_METHOD Document::Release() { const int curRefCount = --refCount; if (curRefCount == 0) delete this; return curRefCount; } void Document::Init() { for (const std::unique_ptr &pl : perLineData) { if (pl) pl->Init(); } } void Document::InsertLine(Sci::Line line) { for (const std::unique_ptr &pl : perLineData) { if (pl) pl->InsertLine(line); } } void Document::InsertLines(Sci::Line line, Sci::Line lines) { for (const auto &pl : perLineData) { if (pl) pl->InsertLines(line, lines); } } void Document::RemoveLine(Sci::Line line) { for (const std::unique_ptr &pl : perLineData) { if (pl) pl->RemoveLine(line); } } LineMarkers *Document::Markers() const noexcept { return static_cast(perLineData[ldMarkers].get()); } LineLevels *Document::Levels() const noexcept { return static_cast(perLineData[ldLevels].get()); } LineState *Document::States() const noexcept { return static_cast(perLineData[ldState].get()); } LineAnnotation *Document::Margins() const noexcept { return static_cast(perLineData[ldMargin].get()); } LineAnnotation *Document::Annotations() const noexcept { return static_cast(perLineData[ldAnnotation].get()); } LineAnnotation *Document::EOLAnnotations() const noexcept { return static_cast(perLineData[ldEOLAnnotation].get()); } LineEndType Document::LineEndTypesSupported() const { if ((CpUtf8 == dbcsCodePage) && pli) return pli->LineEndTypesSupported(); else return LineEndType::Default; } bool Document::SetDBCSCodePage(int dbcsCodePage_) { if (dbcsCodePage != dbcsCodePage_) { dbcsCodePage = dbcsCodePage_; SetCaseFolder(nullptr); cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported()); cb.SetUTF8Substance(CpUtf8 == dbcsCodePage); ModifiedAt(0); // Need to restyle whole document return true; } else { return false; } } bool Document::SetLineEndTypesAllowed(LineEndType lineEndBitSet_) { if (lineEndBitSet != lineEndBitSet_) { lineEndBitSet = lineEndBitSet_; const LineEndType lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported(); if (lineEndBitSetActive != cb.GetLineEndTypes()) { ModifiedAt(0); cb.SetLineEndTypes(lineEndBitSetActive); return true; } else { return false; } } else { return false; } } void Document::SetSavePoint() { cb.SetSavePoint(); NotifySavePoint(true); } void Document::TentativeUndo() { if (!TentativeActive()) return; CheckReadOnly(); if (enteredModification == 0) { enteredModification++; if (!cb.IsReadOnly()) { const bool startSavePoint = cb.IsSavePoint(); bool multiLine = false; const int steps = cb.TentativeSteps(); //Platform::DebugPrintf("Steps=%d\n", steps); for (int step = 0; step < steps; step++) { const Sci::Line prevLinesTotal = LinesTotal(); const Action &action = cb.GetUndoStep(); if (action.at == ActionType::remove) { NotifyModified(DocModification( ModificationFlags::BeforeInsert | ModificationFlags::Undo, action)); } else if (action.at == ActionType::container) { DocModification dm(ModificationFlags::Container | ModificationFlags::Undo); dm.token = action.position; NotifyModified(dm); } else { NotifyModified(DocModification( ModificationFlags::BeforeDelete | ModificationFlags::Undo, action)); } cb.PerformUndoStep(); if (action.at != ActionType::container) { ModifiedAt(action.position); } ModificationFlags modFlags = ModificationFlags::Undo; // With undo, an insertion action becomes a deletion notification if (action.at == ActionType::remove) { modFlags |= ModificationFlags::InsertText; } else if (action.at == ActionType::insert) { modFlags |= ModificationFlags::DeleteText; } if (steps > 1) modFlags |= ModificationFlags::MultiStepUndoRedo; const Sci::Line linesAdded = LinesTotal() - prevLinesTotal; if (linesAdded != 0) multiLine = true; if (step == steps - 1) { modFlags |= ModificationFlags::LastStepInUndoRedo; if (multiLine) modFlags |= ModificationFlags::MultilineUndoRedo; } NotifyModified(DocModification(modFlags, action.position, action.lenData, linesAdded, action.data.get())); } const bool endSavePoint = cb.IsSavePoint(); if (startSavePoint != endSavePoint) NotifySavePoint(endSavePoint); cb.TentativeCommit(); } enteredModification--; } } int Document::GetMark(Sci::Line line, bool includeChangeHistory) const { int marksHistory = 0; if (includeChangeHistory && (line < LinesTotal())) { int marksEdition = 0; const Sci::Position start = LineStart(line); const Sci::Position lineNext = LineStart(line + 1); for (Sci::Position position = start; position < lineNext;) { const int edition = EditionAt(position); if (edition) { marksEdition |= 1 << (edition-1); } position = EditionEndRun(position); } const Sci::Position lineEnd = LineEnd(line); for (Sci::Position position = start; position <= lineEnd;) { marksEdition |= EditionDeletesAt(position); position = EditionNextDelete(position); } /* Bits: RevertedToOrigin, Saved, Modified, RevertedToModified */ constexpr unsigned int editionShift = static_cast(MarkerOutline::HistoryRevertedToOrigin); marksHistory = marksEdition << editionShift; } return marksHistory | Markers()->MarkValue(line); } Sci::Line Document::MarkerNext(Sci::Line lineStart, int mask) const noexcept { return Markers()->MarkerNext(lineStart, mask); } int Document::AddMark(Sci::Line line, int markerNum) { if (line >= 0 && line < LinesTotal()) { const int prev = Markers()->AddMark(line, markerNum, LinesTotal()); const DocModification mh(ModificationFlags::ChangeMarker, LineStart(line), 0, 0, nullptr, line); NotifyModified(mh); return prev; } else { return -1; } } void Document::AddMarkSet(Sci::Line line, int valueSet) { if (line < 0 || line >= LinesTotal()) { return; } unsigned int m = valueSet; for (int i = 0; m; i++, m >>= 1) { if (m & 1) Markers()->AddMark(line, i, LinesTotal()); } const DocModification mh(ModificationFlags::ChangeMarker, LineStart(line), 0, 0, nullptr, line); NotifyModified(mh); } void Document::DeleteMark(Sci::Line line, int markerNum) { Markers()->DeleteMark(line, markerNum, false); const DocModification mh(ModificationFlags::ChangeMarker, LineStart(line), 0, 0, nullptr, line); NotifyModified(mh); } void Document::DeleteMarkFromHandle(int markerHandle) { Markers()->DeleteMarkFromHandle(markerHandle); DocModification mh(ModificationFlags::ChangeMarker); mh.line = -1; NotifyModified(mh); } void Document::DeleteAllMarks(int markerNum) { bool someChanges = false; for (Sci::Line line = 0; line < LinesTotal(); line++) { if (Markers()->DeleteMark(line, markerNum, true)) someChanges = true; } if (someChanges) { DocModification mh(ModificationFlags::ChangeMarker); mh.line = -1; NotifyModified(mh); } } Sci::Line Document::LineFromHandle(int markerHandle) const noexcept { return Markers()->LineFromHandle(markerHandle); } int Document::MarkerNumberFromLine(Sci::Line line, int which) const noexcept { return Markers()->NumberFromLine(line, which); } int Document::MarkerHandleFromLine(Sci::Line line, int which) const noexcept { return Markers()->HandleFromLine(line, which); } Sci_Position SCI_METHOD Document::LineStart(Sci_Position line) const { return cb.LineStart(line); } Range Document::LineRange(Sci::Line line) const noexcept { return {cb.LineStart(line), cb.LineStart(line + 1)}; } bool Document::IsLineStartPosition(Sci::Position position) const { return LineStart(LineFromPosition(position)) == position; } Sci_Position SCI_METHOD Document::LineEnd(Sci_Position line) const { if (line >= LinesTotal() - 1) { return LineStart(line + 1); } else { Sci::Position position = LineStart(line + 1); if (LineEndType::Unicode == cb.GetLineEndTypes()) { const unsigned char bytes[] = { cb.UCharAt(position-3), cb.UCharAt(position-2), cb.UCharAt(position-1), }; if (UTF8IsSeparator(bytes)) { return position - UTF8SeparatorLength; } if (UTF8IsNEL(bytes+1)) { return position - UTF8NELLength; } } position--; // Back over CR or LF // When line terminator is CR+LF, may need to go back one more if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) { position--; } return position; } } void SCI_METHOD Document::SetErrorStatus(int status) { // Tell the watchers an error has occurred. for (const WatcherWithUserData &watcher : watchers) { watcher.watcher->NotifyErrorOccurred(this, watcher.userData, static_cast(status)); } } Sci_Position SCI_METHOD Document::LineFromPosition(Sci_Position pos) const { return cb.LineFromPosition(pos); } Sci::Line Document::SciLineFromPosition(Sci::Position pos) const noexcept { // Avoids casting in callers for this very common function return cb.LineFromPosition(pos); } Sci::Position Document::LineEndPosition(Sci::Position position) const { return LineEnd(LineFromPosition(position)); } bool Document::IsLineEndPosition(Sci::Position position) const { return LineEnd(LineFromPosition(position)) == position; } bool Document::IsPositionInLineEnd(Sci::Position position) const { return position >= LineEnd(LineFromPosition(position)); } Sci::Position Document::VCHomePosition(Sci::Position position) const { const Sci::Line line = SciLineFromPosition(position); const Sci::Position startPosition = LineStart(line); const Sci::Position endLine = LineEnd(line); Sci::Position startText = startPosition; while (startText < endLine && IsSpaceOrTab(cb.CharAt(startText))) startText++; if (position == startText) return startPosition; else return startText; } Sci::Position Document::IndexLineStart(Sci::Line line, LineCharacterIndexType lineCharacterIndex) const noexcept { return cb.IndexLineStart(line, lineCharacterIndex); } Sci::Line Document::LineFromPositionIndex(Sci::Position pos, LineCharacterIndexType lineCharacterIndex) const noexcept { return cb.LineFromPositionIndex(pos, lineCharacterIndex); } Sci::Line Document::LineFromPositionAfter(Sci::Line line, Sci::Position length) const noexcept { const Sci::Position posAfter = cb.LineStart(line) + length; if (posAfter >= LengthNoExcept()) { return LinesTotal(); } const Sci::Line lineAfter = SciLineFromPosition(posAfter); if (lineAfter > line) { return lineAfter; } else { // Want to make some progress so return next line return lineAfter + 1; } } int SCI_METHOD Document::SetLevel(Sci_Position line, int level) { const int prev = Levels()->SetLevel(line, level, LinesTotal()); if (prev != level) { DocModification mh(ModificationFlags::ChangeFold | ModificationFlags::ChangeMarker, LineStart(line), 0, 0, nullptr, line); mh.foldLevelNow = static_cast(level); mh.foldLevelPrev = static_cast(prev); NotifyModified(mh); } return prev; } int SCI_METHOD Document::GetLevel(Sci_Position line) const { return Levels()->GetLevel(line); } FoldLevel Document::GetFoldLevel(Sci_Position line) const noexcept { return Levels()->GetFoldLevel(line); } void Document::ClearLevels() { Levels()->ClearLevels(); } static bool IsSubordinate(FoldLevel levelStart, FoldLevel levelTry) noexcept { if (LevelIsWhitespace(levelTry)) return true; else return LevelNumber(levelStart) < LevelNumber(levelTry); } Sci::Line Document::GetLastChild(Sci::Line lineParent, std::optional level, Sci::Line lastLine) { const FoldLevel levelStart = LevelNumberPart(level ? *level : GetFoldLevel(lineParent)); const Sci::Line maxLine = LinesTotal(); const Sci::Line lookLastLine = (lastLine != -1) ? std::min(LinesTotal() - 1, lastLine) : -1; Sci::Line lineMaxSubord = lineParent; while (lineMaxSubord < maxLine - 1) { EnsureStyledTo(LineStart(lineMaxSubord + 2)); if (!IsSubordinate(levelStart, GetFoldLevel(lineMaxSubord + 1))) break; if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !LevelIsWhitespace(GetFoldLevel(lineMaxSubord))) break; lineMaxSubord++; } if (lineMaxSubord > lineParent) { if (levelStart > LevelNumberPart(GetFoldLevel(lineMaxSubord + 1))) { // Have chewed up some whitespace that belongs to a parent so seek back if (LevelIsWhitespace(GetFoldLevel(lineMaxSubord))) { lineMaxSubord--; } } } return lineMaxSubord; } Sci::Line Document::GetFoldParent(Sci::Line line) const noexcept { return Levels()->GetFoldParent(line); } void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, Sci::Line line, Sci::Line lastLine) { const FoldLevel level = GetFoldLevel(line); const Sci::Line lookLastLine = std::max(line, lastLine) + 1; Sci::Line lookLine = line; FoldLevel lookLineLevel = level; FoldLevel lookLineLevelNum = LevelNumberPart(lookLineLevel); while ((lookLine > 0) && (LevelIsWhitespace(lookLineLevel) || (LevelIsHeader(lookLineLevel) && (lookLineLevelNum >= LevelNumberPart(GetFoldLevel(lookLine + 1)))))) { lookLineLevel = GetFoldLevel(--lookLine); lookLineLevelNum = LevelNumberPart(lookLineLevel); } Sci::Line beginFoldBlock = LevelIsHeader(lookLineLevel) ? lookLine : GetFoldParent(lookLine); if (beginFoldBlock == -1) { highlightDelimiter.Clear(); return; } Sci::Line endFoldBlock = GetLastChild(beginFoldBlock, {}, lookLastLine); Sci::Line firstChangeableLineBefore = -1; if (endFoldBlock < line) { lookLine = beginFoldBlock - 1; lookLineLevel = GetFoldLevel(lookLine); lookLineLevelNum = LevelNumberPart(lookLineLevel); while ((lookLine >= 0) && (lookLineLevelNum >= FoldLevel::Base)) { if (LevelIsHeader(lookLineLevel)) { if (GetLastChild(lookLine, {}, lookLastLine) == line) { beginFoldBlock = lookLine; endFoldBlock = line; firstChangeableLineBefore = line - 1; } } if ((lookLine > 0) && (lookLineLevelNum == FoldLevel::Base) && (LevelNumberPart(GetFoldLevel(lookLine - 1)) > lookLineLevelNum)) break; lookLineLevel = GetFoldLevel(--lookLine); lookLineLevelNum = LevelNumberPart(lookLineLevel); } } if (firstChangeableLineBefore == -1) { for (lookLine = line - 1, lookLineLevel = GetFoldLevel(lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel); lookLine >= beginFoldBlock; lookLineLevel = GetFoldLevel(--lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel)) { if (LevelIsWhitespace(lookLineLevel) || (lookLineLevelNum > LevelNumberPart(level))) { firstChangeableLineBefore = lookLine; break; } } } if (firstChangeableLineBefore == -1) firstChangeableLineBefore = beginFoldBlock - 1; Sci::Line firstChangeableLineAfter = -1; for (lookLine = line + 1, lookLineLevel = GetFoldLevel(lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel); lookLine <= endFoldBlock; lookLineLevel = GetFoldLevel(++lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel)) { if (LevelIsHeader(lookLineLevel) && (lookLineLevelNum < LevelNumberPart(GetFoldLevel(lookLine + 1)))) { firstChangeableLineAfter = lookLine; break; } } if (firstChangeableLineAfter == -1) firstChangeableLineAfter = endFoldBlock + 1; highlightDelimiter.beginFoldBlock = beginFoldBlock; highlightDelimiter.endFoldBlock = endFoldBlock; highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore; highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter; } Sci::Position Document::ClampPositionIntoDocument(Sci::Position pos) const noexcept { return std::clamp(pos, 0, LengthNoExcept()); } bool Document::IsCrLf(Sci::Position pos) const noexcept { if (pos < 0) return false; if (pos >= (LengthNoExcept() - 1)) return false; return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n'); } int Document::LenChar(Sci::Position pos) const noexcept { if (pos < 0 || pos >= LengthNoExcept()) { // Returning 1 instead of 0 to defend against hanging with a loop that goes (or starts) out of bounds. return 1; } else if (IsCrLf(pos)) { return 2; } const unsigned char leadByte = cb.UCharAt(pos); if (!dbcsCodePage || UTF8IsAscii(leadByte)) { // Common case: ASCII character return 1; } if (CpUtf8 == dbcsCodePage) { const int widthCharBytes = UTF8BytesOfLead[leadByte]; unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 }; for (int b = 1; b < widthCharBytes; b++) { charBytes[b] = cb.UCharAt(pos + b); } const int utf8status = UTF8Classify(charBytes, widthCharBytes); if (utf8status & UTF8MaskInvalid) { // Treat as invalid and use up just one byte return 1; } else { return utf8status & UTF8MaskWidth; } } else { if (IsDBCSLeadByteNoExcept(leadByte) && IsDBCSTrailByteNoExcept(cb.CharAt(pos + 1))) { return 2; } else { return 1; } } } bool Document::InGoodUTF8(Sci::Position pos, Sci::Position &start, Sci::Position &end) const noexcept { Sci::Position trail = pos; while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(cb.UCharAt(trail-1))) trail--; start = (trail > 0) ? trail-1 : trail; const unsigned char leadByte = cb.UCharAt(start); const int widthCharBytes = UTF8BytesOfLead[leadByte]; if (widthCharBytes == 1) { return false; } else { const int trailBytes = widthCharBytes - 1; const Sci::Position len = pos - start; if (len > trailBytes) // pos too far from lead return false; unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; for (Sci::Position b=1; b= LengthNoExcept()) return LengthNoExcept(); // PLATFORM_ASSERT(pos > 0 && pos < LengthNoExcept()); if (checkLineEnd && IsCrLf(pos - 1)) { if (moveDir > 0) return pos + 1; else return pos - 1; } if (dbcsCodePage) { if (CpUtf8 == dbcsCodePage) { const unsigned char ch = cb.UCharAt(pos); // If ch is not a trail byte then pos is valid intercharacter position if (UTF8IsTrailByte(ch)) { Sci::Position startUTF = pos; Sci::Position endUTF = pos; if (InGoodUTF8(pos, startUTF, endUTF)) { // ch is a trail byte within a UTF-8 character if (moveDir > 0) pos = endUTF; else pos = startUTF; } // Else invalid UTF-8 so return position of isolated trail byte } } else { // Anchor DBCS calculations at start of line because start of line can // not be a DBCS trail byte. const Sci::Position posStartLine = cb.LineStart(cb.LineFromPosition(pos)); if (pos == posStartLine) return pos; // Step back until a non-lead-byte is found. Sci::Position posCheck = pos; while ((posCheck > posStartLine) && IsDBCSLeadByteNoExcept(cb.CharAt(posCheck-1))) posCheck--; // Check from known start of character. while (posCheck < pos) { const int mbsize = IsDBCSDualByteAt(posCheck) ? 2 : 1; if (posCheck + mbsize == pos) { return pos; } else if (posCheck + mbsize > pos) { if (moveDir > 0) { return posCheck + mbsize; } else { return posCheck; } } posCheck += mbsize; } } } return pos; } // NextPosition moves between valid positions - it can not handle a position in the middle of a // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar. // A \r\n pair is treated as two characters. Sci::Position Document::NextPosition(Sci::Position pos, int moveDir) const noexcept { // If out of range, just return minimum/maximum value. const int increment = (moveDir > 0) ? 1 : -1; if (pos + increment <= 0) return 0; if (pos + increment >= cb.Length()) return cb.Length(); if (dbcsCodePage) { if (CpUtf8 == dbcsCodePage) { if (increment == 1) { // Simple forward movement case so can avoid some checks const unsigned char leadByte = cb.UCharAt(pos); if (UTF8IsAscii(leadByte)) { // Single byte character or invalid pos++; } else { const int widthCharBytes = UTF8BytesOfLead[leadByte]; unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; for (int b=1; b 0) { const int mbsize = IsDBCSDualByteAt(pos) ? 2 : 1; pos += mbsize; if (pos > cb.Length()) pos = cb.Length(); } else { // Anchor DBCS calculations at start of line because start of line can // not be a DBCS trail byte. const Sci::Position posStartLine = cb.LineStart(cb.LineFromPosition(pos)); // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx // http://msdn.microsoft.com/en-us/library/cc194790.aspx if ((pos - 1) <= posStartLine) { return pos - 1; } else if (IsDBCSLeadByteNoExcept(cb.CharAt(pos - 1))) { // Should actually be trail byte if (IsDBCSDualByteAt(pos - 2)) { return pos - 2; } else { // Invalid byte pair so treat as one byte wide return pos - 1; } } else { // Otherwise, step back until a non-lead-byte is found. Sci::Position posTemp = pos - 1; while (posStartLine <= --posTemp && IsDBCSLeadByteNoExcept(cb.CharAt(posTemp))) ; // Now posTemp+1 must point to the beginning of a character, // so figure out whether we went back an even or an odd // number of bytes and go back 1 or 2 bytes, respectively. const Sci::Position widthLast = ((pos - posTemp) & 1) + 1; if ((widthLast == 2) && (IsDBCSDualByteAt(pos - widthLast))) { return pos - widthLast; } // Byte before pos may be valid character or may be an invalid second byte return pos - 1; } } } } else { pos += increment; } return pos; } bool Document::NextCharacter(Sci::Position &pos, int moveDir) const noexcept { // Returns true if pos changed Sci::Position posNext = NextPosition(pos, moveDir); if (posNext == pos) { return false; } else { pos = posNext; return true; } } CharacterExtracted Document::CharacterAfter(Sci::Position position) const noexcept { if (position >= LengthNoExcept()) { return CharacterExtracted(unicodeReplacementChar, 0); } const unsigned char leadByte = cb.UCharAt(position); if (!dbcsCodePage || UTF8IsAscii(leadByte)) { // Common case: ASCII character return CharacterExtracted(leadByte, 1); } if (CpUtf8 == dbcsCodePage) { const int widthCharBytes = UTF8BytesOfLead[leadByte]; unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 }; for (int b = 1; b 0) ? 1 : -1; while (characterOffset != 0) { const Sci::Position posNext = NextPosition(pos, increment); if (posNext == pos) return Sci::invalidPosition; pos = posNext; characterOffset -= increment; } } else { pos = positionStart + characterOffset; if ((pos < 0) || (pos > Length())) return Sci::invalidPosition; } return pos; } Sci::Position Document::GetRelativePositionUTF16(Sci::Position positionStart, Sci::Position characterOffset) const noexcept { Sci::Position pos = positionStart; if (dbcsCodePage) { const int increment = (characterOffset > 0) ? 1 : -1; while (characterOffset != 0) { const Sci::Position posNext = NextPosition(pos, increment); if (posNext == pos) return Sci::invalidPosition; if (std::abs(pos-posNext) > 3) // 4 byte character = 2*UTF16. characterOffset -= increment; pos = posNext; characterOffset -= increment; } } else { pos = positionStart + characterOffset; if ((pos < 0) || (pos > LengthNoExcept())) return Sci::invalidPosition; } return pos; } int SCI_METHOD Document::GetCharacterAndWidth(Sci_Position position, Sci_Position *pWidth) const { int bytesInCharacter = 1; const unsigned char leadByte = cb.UCharAt(position); int character = leadByte; if (dbcsCodePage && !UTF8IsAscii(leadByte)) { if (CpUtf8 == dbcsCodePage) { const int widthCharBytes = UTF8BytesOfLead[leadByte]; unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; for (int b=1; b= 0x81) && (uch <= 0x9F)) || ((uch >= 0xE0) && (uch <= 0xFC)); // Lead bytes F0 to FC may be a Microsoft addition. case 936: // GBK return (uch >= 0x81) && (uch <= 0xFE); case 949: // Korean Wansung KS C-5601-1987 return (uch >= 0x81) && (uch <= 0xFE); case 950: // Big5 return (uch >= 0x81) && (uch <= 0xFE); case 1361: // Korean Johab KS C-5601-1992 return ((uch >= 0x84) && (uch <= 0xD3)) || ((uch >= 0xD8) && (uch <= 0xDE)) || ((uch >= 0xE0) && (uch <= 0xF9)); } return false; } bool Document::IsDBCSTrailByteNoExcept(char ch) const noexcept { const unsigned char trail = ch; switch (dbcsCodePage) { case 932: // Shift_jis return (trail != 0x7F) && ((trail >= 0x40) && (trail <= 0xFC)); case 936: // GBK return (trail != 0x7F) && ((trail >= 0x40) && (trail <= 0xFE)); case 949: // Korean Wansung KS C-5601-1987 return ((trail >= 0x41) && (trail <= 0x5A)) || ((trail >= 0x61) && (trail <= 0x7A)) || ((trail >= 0x81) && (trail <= 0xFE)); case 950: // Big5 return ((trail >= 0x40) && (trail <= 0x7E)) || ((trail >= 0xA1) && (trail <= 0xFE)); case 1361: // Korean Johab KS C-5601-1992 return ((trail >= 0x31) && (trail <= 0x7E)) || ((trail >= 0x81) && (trail <= 0xFE)); } return false; } int Document::DBCSDrawBytes(std::string_view text) const noexcept { if (text.length() <= 1) { return static_cast(text.length()); } if (IsDBCSLeadByteNoExcept(text[0])) { return IsDBCSTrailByteNoExcept(text[1]) ? 2 : 1; } else { return 1; } } bool Document::IsDBCSDualByteAt(Sci::Position pos) const noexcept { return IsDBCSLeadByteNoExcept(cb.CharAt(pos)) && IsDBCSTrailByteNoExcept(cb.CharAt(pos + 1)); } // Need to break text into segments near end but taking into account the // encoding to not break inside a UTF-8 or DBCS character and also trying // to avoid breaking inside a pair of combining characters, or inside // ligatures. // TODO: implement grapheme cluster boundaries, // see https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries. // // The segment length must always be long enough (more than 4 bytes) // so that there will be at least one whole character to make a segment. // For UTF-8, text must consist only of valid whole characters. // In preference order from best to worst: // 1) Break before or after spaces or controls // 2) Break at word and punctuation boundary for better kerning and ligature support // 3) Break after whole character, this may break combining characters size_t Document::SafeSegment(std::string_view text) const noexcept { // check space first as most written language use spaces. for (std::string_view::iterator it = text.end() - 1; it != text.begin(); --it) { if (IsBreakSpace(*it)) { return it - text.begin(); } } if (!dbcsCodePage || dbcsCodePage == CpUtf8) { // backward iterate for UTF-8 and single byte encoding to find word and punctuation boundary. std::string_view::iterator it = text.end() - 1; const bool punctuation = IsPunctuation(*it); do { --it; if (punctuation != IsPunctuation(*it)) { return it - text.begin() + 1; } } while (it != text.begin()); it = text.end() - 1; if (dbcsCodePage) { // for UTF-8 go back to the start of last character. for (int trail = 0; trail < UTF8MaxBytes - 1 && UTF8IsTrailByte(*it); trail++) { --it; } } return it - text.begin(); } { // forward iterate for DBCS to find word and punctuation boundary. size_t lastPunctuationBreak = 0; size_t lastEncodingAllowedBreak = 0; CharacterClass ccPrev = CharacterClass::space; for (size_t j = 0; j < text.length();) { const unsigned char ch = text[j]; lastEncodingAllowedBreak = j++; CharacterClass cc = CharacterClass::word; if (UTF8IsAscii(ch)) { if (IsPunctuation(ch)) { cc = CharacterClass::punctuation; } } else { j += IsDBCSLeadByteNoExcept(ch); } if (cc != ccPrev) { ccPrev = cc; lastPunctuationBreak = lastEncodingAllowedBreak; } } return lastPunctuationBreak ? lastPunctuationBreak : lastEncodingAllowedBreak; } } EncodingFamily Document::CodePageFamily() const noexcept { if (CpUtf8 == dbcsCodePage) return EncodingFamily::unicode; else if (dbcsCodePage) return EncodingFamily::dbcs; else return EncodingFamily::eightBit; } void Document::ModifiedAt(Sci::Position pos) noexcept { if (endStyled > pos) endStyled = pos; } void Document::CheckReadOnly() { if (cb.IsReadOnly() && enteredReadOnlyCount == 0) { enteredReadOnlyCount++; NotifyModifyAttempt(); enteredReadOnlyCount--; } } void Document::TrimReplacement(std::string_view &text, Range &range) const noexcept { while (!text.empty() && !range.Empty() && (text.front() == CharAt(range.start))) { text.remove_prefix(1); range.start++; } while (!text.empty() && !range.Empty() && (text.back() == CharAt(range.end-1))) { text.remove_suffix(1); range.end--; } } // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt. // SetStyleAt does not change the persistent state of a document bool Document::DeleteChars(Sci::Position pos, Sci::Position len) { if (pos < 0) return false; if (len <= 0) return false; if ((pos + len) > LengthNoExcept()) return false; CheckReadOnly(); if (enteredModification != 0) { return false; } else { enteredModification++; if (!cb.IsReadOnly()) { NotifyModified( DocModification( ModificationFlags::BeforeDelete | ModificationFlags::User, pos, len, 0, nullptr)); const Sci::Line prevLinesTotal = LinesTotal(); const bool startSavePoint = cb.IsSavePoint(); bool startSequence = false; const char *text = cb.DeleteChars(pos, len, startSequence); if (startSavePoint && cb.IsCollectingUndo()) NotifySavePoint(false); if ((pos < LengthNoExcept()) || (pos == 0)) ModifiedAt(pos); else ModifiedAt(pos-1); NotifyModified( DocModification( ModificationFlags::DeleteText | ModificationFlags::User | (startSequence?ModificationFlags::StartAction:ModificationFlags::None), pos, len, LinesTotal() - prevLinesTotal, text)); } enteredModification--; } return !cb.IsReadOnly(); } /** * Insert a string with a length. */ Sci::Position Document::InsertString(Sci::Position position, const char *s, Sci::Position insertLength) { if (insertLength <= 0) { return 0; } CheckReadOnly(); // Application may change read only state here if (cb.IsReadOnly()) { return 0; } if (enteredModification != 0) { return 0; } enteredModification++; insertionSet = false; insertion.clear(); NotifyModified( DocModification( ModificationFlags::InsertCheck, position, insertLength, 0, s)); if (insertionSet) { s = insertion.c_str(); insertLength = insertion.length(); } NotifyModified( DocModification( ModificationFlags::BeforeInsert | ModificationFlags::User, position, insertLength, 0, HTTP/1.1 200 OK Connection: keep-alive Connection: keep-alive Content-Disposition: inline; filename="Document.cxx" Content-Disposition: inline; filename="Document.cxx" Content-Length: 105619 Content-Length: 105619 Content-Security-Policy: default-src 'none' Content-Security-Policy: default-src 'none' Content-Type: text/plain; charset=UTF-8 Content-Type: text/plain; charset=UTF-8 Date: Sat, 18 Oct 2025 01:01:41 UTC ETag: "4450954b4de10ada5d4f3ed642921053ebdf360e" ETag: "4450954b4de10ada5d4f3ed642921053ebdf360e" Expires: Tue, 16 Oct 2035 01:01:41 GMT Expires: Tue, 16 Oct 2035 01:01:41 GMT Last-Modified: Sat, 18 Oct 2025 01:01:41 GMT Last-Modified: Sat, 18 Oct 2025 01:01:41 GMT Server: OpenBSD httpd Server: OpenBSD httpd X-Content-Type-Options: nosniff X-Content-Type-Options: nosniff // Scintilla source code edit control /** @file Document.cxx ** Text document that handles notifications, DBCS, styling, words and end of line. **/ // Copyright 1998-2011 by Neil Hodgson // The License.txt file describes the conditions under which this software may be distributed. #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef NO_CXX11_REGEX #include #endif #include "ScintillaTypes.h" #include "ILoader.h" #include "ILexer.h" #include "Debugging.h" #include "CharacterType.h" #include "CharacterCategoryMap.h" #include "Position.h" #include "SplitVector.h" #include "Partitioning.h" #include "RunStyles.h" #include "CellBuffer.h" #include "PerLine.h" #include "CharClassify.h" #include "Decoration.h" #include "CaseFolder.h" #include "Document.h" #include "RESearch.h" #include "UniConversion.h" #include "ElapsedPeriod.h" using namespace Scintilla; using namespace Scintilla::Internal; LexInterface::LexInterface(Document *pdoc_) noexcept : pdoc(pdoc_), performingStyle(false) { } LexInterface::~LexInterface() noexcept = default; void LexInterface::SetInstance(ILexer5 *instance_) noexcept { instance.reset(instance_); } void LexInterface::Colourise(Sci::Position start, Sci::Position end) { if (pdoc && instance && !performingStyle) { // Protect against reentrance, which may occur, for example, when // fold points are discovered while performing styling and the folding // code looks for child lines which may trigger styling. performingStyle = true; const Sci::Position lengthDoc = pdoc->Length(); if (end == -1) end = lengthDoc; const Sci::Position len = end - start; PLATFORM_ASSERT(len >= 0); PLATFORM_ASSERT(start + len <= lengthDoc); int styleStart = 0; if (start > 0) styleStart = pdoc->StyleAt(start - 1); if (len > 0) { instance->Lex(start, len, styleStart, pdoc); instance->Fold(start, len, styleStart, pdoc); } performingStyle = false; } } LineEndType LexInterface::LineEndTypesSupported() { if (instance) { return static_cast(instance->LineEndTypesSupported()); } return LineEndType::Default; } bool LexInterface::UseContainerLexing() const noexcept { return !instance; } ActionDuration::ActionDuration(double duration_, double minDuration_, double maxDuration_) noexcept : duration(duration_), minDuration(minDuration_), maxDuration(maxDuration_) { } void ActionDuration::AddSample(size_t numberActions, double durationOfActions) noexcept { // Only adjust for multiple actions to avoid instability if (numberActions < 8) return; // Alpha value for exponential smoothing. // Most recent value contributes 25% to smoothed value. constexpr double alpha = 0.25; const double durationOne = durationOfActions / numberActions; duration = std::clamp(alpha * durationOne + (1.0 - alpha) * duration, minDuration, maxDuration); } double ActionDuration::Duration() const noexcept { return duration; } size_t ActionDuration::ActionsInAllowedTime(double secondsAllowed) const noexcept { return std::lround(secondsAllowed / Duration()); } CharacterExtracted::CharacterExtracted(const unsigned char *charBytes, size_t widthCharBytes) noexcept { const int utf8status = UTF8Classify(charBytes, widthCharBytes); if (utf8status & UTF8MaskInvalid) { // Treat as invalid and use up just one byte character = unicodeReplacementChar; widthBytes = 1; } else { character = UnicodeFromUTF8(charBytes); widthBytes = utf8status & UTF8MaskWidth; } } Document::Document(DocumentOption options) : cb(!FlagSet(options, DocumentOption::StylesNone), FlagSet(options, DocumentOption::TextLarge)), durationStyleOneByte(0.000001, 0.0000001, 0.00001) { refCount = 0; #ifdef _WIN32 eolMode = EndOfLine::CrLf; #else eolMode = EndOfLine::Lf; #endif dbcsCodePage = CpUtf8; lineEndBitSet = LineEndType::Default; endStyled = 0; styleClock = 0; enteredModification = 0; enteredStyling = 0; enteredReadOnlyCount = 0; insertionSet = false; tabInChars = 8; indentInChars = 0; actualIndentInChars = 8; useTabs = true; tabIndents = true; backspaceUnindents = false; matchesValid = false; perLineData[ldMarkers] = std::make_unique(); perLineData[ldLevels] = std::make_unique(); perLineData[ldState] = std::make_unique(); perLineData[ldMargin] = std::make_unique(); perLineData[ldAnnotation] = std::make_unique(); perLineData[ldEOLAnnotation] = std::make_unique(); decorations = DecorationListCreate(IsLarge()); cb.SetPerLine(this); cb.SetUTF8Substance(CpUtf8 == dbcsCodePage); } Document::~Document() { for (const WatcherWithUserData &watcher : watchers) { watcher.watcher->NotifyDeleted(this, watcher.userData); } } // Increase reference count and return its previous value. int Document::AddRef() { return refCount++; } // Decrease reference count and return its previous value. // Delete the document if reference count reaches zero. int SCI_METHOD Document::Release() { const int curRefCount = --refCount; if (curRefCount == 0) delete this; return curRefCount; } void Document::Init() { for (const std::unique_ptr &pl : perLineData) { if (pl) pl->Init(); } } void Document::InsertLine(Sci::Line line) { for (const std::unique_ptr &pl : perLineData) { if (pl) pl->InsertLine(line); } } void Document::InsertLines(Sci::Line line, Sci::Line lines) { for (const auto &pl : perLineData) { if (pl) pl->InsertLines(line, lines); } } void Document::RemoveLine(Sci::Line line) { for (const std::unique_ptr &pl : perLineData) { if (pl) pl->RemoveLine(line); } } LineMarkers *Document::Markers() const noexcept { return static_cast(perLineData[ldMarkers].get()); } LineLevels *Document::Levels() const noexcept { return static_cast(perLineData[ldLevels].get()); } LineState *Document::States() const noexcept { return static_cast(perLineData[ldState].get()); } LineAnnotation *Document::Margins() const noexcept { return static_cast(perLineData[ldMargin].get()); } LineAnnotation *Document::Annotations() const noexcept { return static_cast(perLineData[ldAnnotation].get()); } LineAnnotation *Document::EOLAnnotations() const noexcept { return static_cast(perLineData[ldEOLAnnotation].get()); } LineEndType Document::LineEndTypesSupported() const { if ((CpUtf8 == dbcsCodePage) && pli) return pli->LineEndTypesSupported(); else return LineEndType::Default; } bool Document::SetDBCSCodePage(int dbcsCodePage_) { if (dbcsCodePage != dbcsCodePage_) { dbcsCodePage = dbcsCodePage_; SetCaseFolder(nullptr); cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported()); cb.SetUTF8Substance(CpUtf8 == dbcsCodePage); ModifiedAt(0); // Need to restyle whole document return true; } else { return false; } } bool Document::SetLineEndTypesAllowed(LineEndType lineEndBitSet_) { if (lineEndBitSet != lineEndBitSet_) { lineEndBitSet = lineEndBitSet_; const LineEndType lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported(); if (lineEndBitSetActive != cb.GetLineEndTypes()) { ModifiedAt(0); cb.SetLineEndTypes(lineEndBitSetActive); return true; } else { return false; } } else { return false; } } void Document::SetSavePoint() { cb.SetSavePoint(); NotifySavePoint(true); } void Document::TentativeUndo() { if (!TentativeActive()) return; CheckReadOnly(); if (enteredModification == 0) { enteredModification++; if (!cb.IsReadOnly()) { const bool startSavePoint = cb.IsSavePoint(); bool multiLine = false; const int steps = cb.TentativeSteps(); //Platform::DebugPrintf("Steps=%d\n", steps); for (int step = 0; step < steps; step++) { const Sci::Line prevLinesTotal = LinesTotal(); const Action &action = cb.GetUndoStep(); if (action.at == ActionType::remove) { NotifyModified(DocModification( ModificationFlags::BeforeInsert | ModificationFlags::Undo, action)); } else if (action.at == ActionType::container) { DocModification dm(ModificationFlags::Container | ModificationFlags::Undo); dm.token = action.position; NotifyModified(dm); } else { NotifyModified(DocModification( ModificationFlags::BeforeDelete | ModificationFlags::Undo, action)); } cb.PerformUndoStep(); if (action.at != ActionType::container) { ModifiedAt(action.position); } ModificationFlags modFlags = ModificationFlags::Undo; // With undo, an insertion action becomes a deletion notification if (action.at == ActionType::remove) { modFlags |= ModificationFlags::InsertText; } else if (action.at == ActionType::insert) { modFlags |= ModificationFlags::DeleteText; } if (steps > 1) modFlags |= ModificationFlags::MultiStepUndoRedo; const Sci::Line linesAdded = LinesTotal() - prevLinesTotal; if (linesAdded != 0) multiLine = true; if (step == steps - 1) { modFlags |= ModificationFlags::LastStepInUndoRedo; if (multiLine) modFlags |= ModificationFlags::MultilineUndoRedo; } NotifyModified(DocModification(modFlags, action.position, action.lenData, linesAdded, action.data.get())); } const bool endSavePoint = cb.IsSavePoint(); if (startSavePoint != endSavePoint) NotifySavePoint(endSavePoint); cb.TentativeCommit(); } enteredModification--; } } int Document::GetMark(Sci::Line line, bool includeChangeHistory) const { int marksHistory = 0; if (includeChangeHistory && (line < LinesTotal())) { int marksEdition = 0; const Sci::Position start = LineStart(line); const Sci::Position lineNext = LineStart(line + 1); for (Sci::Position position = start; position < lineNext;) { const int edition = EditionAt(position); if (edition) { marksEdition |= 1 << (edition-1); } position = EditionEndRun(position); } const Sci::Position lineEnd = LineEnd(line); for (Sci::Position position = start; position <= lineEnd;) { marksEdition |= EditionDeletesAt(position); position = EditionNextDelete(position); } /* Bits: RevertedToOrigin, Saved, Modified, RevertedToModified */ constexpr unsigned int editionShift = static_cast(MarkerOutline::HistoryRevertedToOrigin); marksHistory = marksEdition << editionShift; } return marksHistory | Markers()->MarkValue(line); } Sci::Line Document::MarkerNext(Sci::Line lineStart, int mask) const noexcept { return Markers()->MarkerNext(lineStart, mask); } int Document::AddMark(Sci::Line line, int markerNum) { if (line >= 0 && line < LinesTotal()) { const int prev = Markers()->AddMark(line, markerNum, LinesTotal()); const DocModification mh(ModificationFlags::ChangeMarker, LineStart(line), 0, 0, nullptr, line); NotifyModified(mh); return prev; } else { return -1; } } void Document::AddMarkSet(Sci::Line line, int valueSet) { if (line < 0 || line >= LinesTotal()) { return; } unsigned int m = valueSet; for (int i = 0; m; i++, m >>= 1) { if (m & 1) Markers()->AddMark(line, i, LinesTotal()); } const DocModification mh(ModificationFlags::ChangeMarker, LineStart(line), 0, 0, nullptr, line); NotifyModified(mh); } void Document::DeleteMark(Sci::Line line, int markerNum) { Markers()->DeleteMark(line, markerNum, false); const DocModification mh(ModificationFlags::ChangeMarker, LineStart(line), 0, 0, nullptr, line); NotifyModified(mh); } void Document::DeleteMarkFromHandle(int markerHandle) { Markers()->DeleteMarkFromHandle(markerHandle); DocModification mh(ModificationFlags::ChangeMarker); mh.line = -1; NotifyModified(mh); } void Document::DeleteAllMarks(int markerNum) { bool someChanges = false; for (Sci::Line line = 0; line < LinesTotal(); line++) { if (Markers()->DeleteMark(line, markerNum, true)) someChanges = true; } if (someChanges) { DocModification mh(ModificationFlags::ChangeMarker); mh.line = -1; NotifyModified(mh); } } Sci::Line Document::LineFromHandle(int markerHandle) const noexcept { return Markers()->LineFromHandle(markerHandle); } int Document::MarkerNumberFromLine(Sci::Line line, int which) const noexcept { return Markers()->NumberFromLine(line, which); } int Document::MarkerHandleFromLine(Sci::Line line, int which) const noexcept { return Markers()->HandleFromLine(line, which); } Sci_Position SCI_METHOD Document::LineStart(Sci_Position line) const { return cb.LineStart(line); } Range Document::LineRange(Sci::Line line) const noexcept { return {cb.LineStart(line), cb.LineStart(line + 1)}; } bool Document::IsLineStartPosition(Sci::Position position) const { return LineStart(LineFromPosition(position)) == position; } Sci_Position SCI_METHOD Document::LineEnd(Sci_Position line) const { if (line >= LinesTotal() - 1) { return LineStart(line + 1); } else { Sci::Position position = LineStart(line + 1); if (LineEndType::Unicode == cb.GetLineEndTypes()) { const unsigned char bytes[] = { cb.UCharAt(position-3), cb.UCharAt(position-2), cb.UCharAt(position-1), }; if (UTF8IsSeparator(bytes)) { return position - UTF8SeparatorLength; } if (UTF8IsNEL(bytes+1)) { return position - UTF8NELLength; } } position--; // Back over CR or LF // When line terminator is CR+LF, may need to go back one more if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) { position--; } return position; } } void SCI_METHOD Document::SetErrorStatus(int status) { // Tell the watchers an error has occurred. for (const WatcherWithUserData &watcher : watchers) { watcher.watcher->NotifyErrorOccurred(this, watcher.userData, static_cast(status)); } } Sci_Position SCI_METHOD Document::LineFromPosition(Sci_Position pos) const { return cb.LineFromPosition(pos); } Sci::Line Document::SciLineFromPosition(Sci::Position pos) const noexcept { // Avoids casting in callers for this very common function return cb.LineFromPosition(pos); } Sci::Position Document::LineEndPosition(Sci::Position position) const { return LineEnd(LineFromPosition(position)); } bool Document::IsLineEndPosition(Sci::Position position) const { return LineEnd(LineFromPosition(position)) == position; } bool Document::IsPositionInLineEnd(Sci::Position position) const { return position >= LineEnd(LineFromPosition(position)); } Sci::Position Document::VCHomePosition(Sci::Position position) const { const Sci::Line line = SciLineFromPosition(position); const Sci::Position startPosition = LineStart(line); const Sci::Position endLine = LineEnd(line); Sci::Position startText = startPosition; while (startText < endLine && IsSpaceOrTab(cb.CharAt(startText))) startText++; if (position == startText) return startPosition; else return startText; } Sci::Position Document::IndexLineStart(Sci::Line line, LineCharacterIndexType lineCharacterIndex) const noexcept { return cb.IndexLineStart(line, lineCharacterIndex); } Sci::Line Document::LineFromPositionIndex(Sci::Position pos, LineCharacterIndexType lineCharacterIndex) const noexcept { return cb.LineFromPositionIndex(pos, lineCharacterIndex); } Sci::Line Document::LineFromPositionAfter(Sci::Line line, Sci::Position length) const noexcept { const Sci::Position posAfter = cb.LineStart(line) + length; if (posAfter >= LengthNoExcept()) { return LinesTotal(); } const Sci::Line lineAfter = SciLineFromPosition(posAfter); if (lineAfter > line) { return lineAfter; } else { // Want to make some progress so return next line return lineAfter + 1; } } int SCI_METHOD Document::SetLevel(Sci_Position line, int level) { const int prev = Levels()->SetLevel(line, level, LinesTotal()); if (prev != level) { DocModification mh(ModificationFlags::ChangeFold | ModificationFlags::ChangeMarker, LineStart(line), 0, 0, nullptr, line); mh.foldLevelNow = static_cast(level); mh.foldLevelPrev = static_cast(prev); NotifyModified(mh); } return prev; } int SCI_METHOD Document::GetLevel(Sci_Position line) const { return Levels()->GetLevel(line); } FoldLevel Document::GetFoldLevel(Sci_Position line) const noexcept { return Levels()->GetFoldLevel(line); } void Document::ClearLevels() { Levels()->ClearLevels(); } static bool IsSubordinate(FoldLevel levelStart, FoldLevel levelTry) noexcept { if (LevelIsWhitespace(levelTry)) return true; else return LevelNumber(levelStart) < LevelNumber(levelTry); } Sci::Line Document::GetLastChild(Sci::Line lineParent, std::optional level, Sci::Line lastLine) { const FoldLevel levelStart = LevelNumberPart(level ? *level : GetFoldLevel(lineParent)); const Sci::Line maxLine = LinesTotal(); const Sci::Line lookLastLine = (lastLine != -1) ? std::min(LinesTotal() - 1, lastLine) : -1; Sci::Line lineMaxSubord = lineParent; while (lineMaxSubord < maxLine - 1) { EnsureStyledTo(LineStart(lineMaxSubord + 2)); if (!IsSubordinate(levelStart, GetFoldLevel(lineMaxSubord + 1))) break; if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !LevelIsWhitespace(GetFoldLevel(lineMaxSubord))) break; lineMaxSubord++; } if (lineMaxSubord > lineParent) { if (levelStart > LevelNumberPart(GetFoldLevel(lineMaxSubord + 1))) { // Have chewed up some whitespace that belongs to a parent so seek back if (LevelIsWhitespace(GetFoldLevel(lineMaxSubord))) { lineMaxSubord--; } } } return lineMaxSubord; } Sci::Line Document::GetFoldParent(Sci::Line line) const noexcept { return Levels()->GetFoldParent(line); } void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, Sci::Line line, Sci::Line lastLine) { const FoldLevel level = GetFoldLevel(line); const Sci::Line lookLastLine = std::max(line, lastLine) + 1; Sci::Line lookLine = line; FoldLevel lookLineLevel = level; FoldLevel lookLineLevelNum = LevelNumberPart(lookLineLevel); while ((lookLine > 0) && (LevelIsWhitespace(lookLineLevel) || (LevelIsHeader(lookLineLevel) && (lookLineLevelNum >= LevelNumberPart(GetFoldLevel(lookLine + 1)))))) { lookLineLevel = GetFoldLevel(--lookLine); lookLineLevelNum = LevelNumberPart(lookLineLevel); } Sci::Line beginFoldBlock = LevelIsHeader(lookLineLevel) ? lookLine : GetFoldParent(lookLine); if (beginFoldBlock == -1) { highlightDelimiter.Clear(); return; } Sci::Line endFoldBlock = GetLastChild(beginFoldBlock, {}, lookLastLine); Sci::Line firstChangeableLineBefore = -1; if (endFoldBlock < line) { lookLine = beginFoldBlock - 1; lookLineLevel = GetFoldLevel(lookLine); lookLineLevelNum = LevelNumberPart(lookLineLevel); while ((lookLine >= 0) && (lookLineLevelNum >= FoldLevel::Base)) { if (LevelIsHeader(lookLineLevel)) { if (GetLastChild(lookLine, {}, lookLastLine) == line) { beginFoldBlock = lookLine; endFoldBlock = line; firstChangeableLineBefore = line - 1; } } if ((lookLine > 0) && (lookLineLevelNum == FoldLevel::Base) && (LevelNumberPart(GetFoldLevel(lookLine - 1)) > lookLineLevelNum)) break; lookLineLevel = GetFoldLevel(--lookLine); lookLineLevelNum = LevelNumberPart(lookLineLevel); } } if (firstChangeableLineBefore == -1) { for (lookLine = line - 1, lookLineLevel = GetFoldLevel(lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel); lookLine >= beginFoldBlock; lookLineLevel = GetFoldLevel(--lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel)) { if (LevelIsWhitespace(lookLineLevel) || (lookLineLevelNum > LevelNumberPart(level))) { firstChangeableLineBefore = lookLine; break; } } } if (firstChangeableLineBefore == -1) firstChangeableLineBefore = beginFoldBlock - 1; Sci::Line firstChangeableLineAfter = -1; for (lookLine = line + 1, lookLineLevel = GetFoldLevel(lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel); lookLine <= endFoldBlock; lookLineLevel = GetFoldLevel(++lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel)) { if (LevelIsHeader(lookLineLevel) && (lookLineLevelNum < LevelNumberPart(GetFoldLevel(lookLine + 1)))) { firstChangeableLineAfter = lookLine; break; } } if (firstChangeableLineAfter == -1) firstChangeableLineAfter = endFoldBlock + 1; highlightDelimiter.beginFoldBlock = beginFoldBlock; highlightDelimiter.endFoldBlock = endFoldBlock; highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore; highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter; } Sci::Position Document::ClampPositionIntoDocument(Sci::Position pos) const noexcept { return std::clamp(pos, 0, LengthNoExcept()); } bool Document::IsCrLf(Sci::Position pos) const noexcept { if (pos < 0) return false; if (pos >= (LengthNoExcept() - 1)) return false; return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n'); } int Document::LenChar(Sci::Position pos) const noexcept { if (pos < 0 || pos >= LengthNoExcept()) { // Returning 1 instead of 0 to defend against hanging with a loop that goes (or starts) out of bounds. return 1; } else if (IsCrLf(pos)) { return 2; } const unsigned char leadByte = cb.UCharAt(pos); if (!dbcsCodePage || UTF8IsAscii(leadByte)) { // Common case: ASCII character return 1; } if (CpUtf8 == dbcsCodePage) { const int widthCharBytes = UTF8BytesOfLead[leadByte]; unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 }; for (int b = 1; b < widthCharBytes; b++) { charBytes[b] = cb.UCharAt(pos + b); } const int utf8status = UTF8Classify(charBytes, widthCharBytes); if (utf8status & UTF8MaskInvalid) { // Treat as invalid and use up just one byte return 1; } else { return utf8status & UTF8MaskWidth; } } else { if (IsDBCSLeadByteNoExcept(leadByte) && IsDBCSTrailByteNoExcept(cb.CharAt(pos + 1))) { return 2; } else { return 1; } } } bool Document::InGoodUTF8(Sci::Position pos, Sci::Position &start, Sci::Position &end) const noexcept { Sci::Position trail = pos; while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(cb.UCharAt(trail-1))) trail--; start = (trail > 0) ? trail-1 : trail; const unsigned char leadByte = cb.UCharAt(start); const int widthCharBytes = UTF8BytesOfLead[leadByte]; if (widthCharBytes == 1) { return false; } else { const int trailBytes = widthCharBytes - 1; const Sci::Position len = pos - start; if (len > trailBytes) // pos too far from lead return false; unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; for (Sci::Position b=1; b= LengthNoExcept()) return LengthNoExcept(); // PLATFORM_ASSERT(pos > 0 && pos < LengthNoExcept()); if (checkLineEnd && IsCrLf(pos - 1)) { if (moveDir > 0) return pos + 1; else return pos - 1; } if (dbcsCodePage) { if (CpUtf8 == dbcsCodePage) { const unsigned char ch = cb.UCharAt(pos); // If ch is not a trail byte then pos is valid intercharacter position if (UTF8IsTrailByte(ch)) { Sci::Position startUTF = pos; Sci::Position endUTF = pos; if (InGoodUTF8(pos, startUTF, endUTF)) { // ch is a trail byte within a UTF-8 character if (moveDir > 0) pos = endUTF; else pos = startUTF; } // Else invalid UTF-8 so return position of isolated trail byte } } else { // Anchor DBCS calculations at start of line because start of line can // not be a DBCS trail byte. const Sci::Position posStartLine = cb.LineStart(cb.LineFromPosition(pos)); if (pos == posStartLine) return pos; // Step back until a non-lead-byte is found. Sci::Position posCheck = pos; while ((posCheck > posStartLine) && IsDBCSLeadByteNoExcept(cb.CharAt(posCheck-1))) posCheck--; // Check from known start of character. while (posCheck < pos) { const int mbsize = IsDBCSDualByteAt(posCheck) ? 2 : 1; if (posCheck + mbsize == pos) { return pos; } else if (posCheck + mbsize > pos) { if (moveDir > 0) { return posCheck + mbsize; } else { return posCheck; } } posCheck += mbsize; } } } return pos; } // NextPosition moves between valid positions - it can not handle a position in the middle of a // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar. // A \r\n pair is treated as two characters. Sci::Position Document::NextPosition(Sci::Position pos, int moveDir) const noexcept { // If out of range, just return minimum/maximum value. const int increment = (moveDir > 0) ? 1 : -1; if (pos + increment <= 0) return 0; if (pos + increment >= cb.Length()) return cb.Length(); if (dbcsCodePage) { if (CpUtf8 == dbcsCodePage) { if (increment == 1) { // Simple forward movement case so can avoid some checks const unsigned char leadByte = cb.UCharAt(pos); if (UTF8IsAscii(leadByte)) { // Single byte character or invalid pos++; } else { const int widthCharBytes = UTF8BytesOfLead[leadByte]; unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; for (int b=1; b 0) { const int mbsize = IsDBCSDualByteAt(pos) ? 2 : 1; pos += mbsize; if (pos > cb.Length()) pos = cb.Length(); } else { // Anchor DBCS calculations at start of line because start of line can // not be a DBCS trail byte. const Sci::Position posStartLine = cb.LineStart(cb.LineFromPosition(pos)); // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx // http://msdn.microsoft.com/en-us/library/cc194790.aspx if ((pos - 1) <= posStartLine) { return pos - 1; } else if (IsDBCSLeadByteNoExcept(cb.CharAt(pos - 1))) { // Should actually be trail byte if (IsDBCSDualByteAt(pos - 2)) { return pos - 2; } else { // Invalid byte pair so treat as one byte wide return pos - 1; } } else { // Otherwise, step back until a non-lead-byte is found. Sci::Position posTemp = pos - 1; while (posStartLine <= --posTemp && IsDBCSLeadByteNoExcept(cb.CharAt(posTemp))) ; // Now posTemp+1 must point to the beginning of a character, // so figure out whether we went back an even or an odd // number of bytes and go back 1 or 2 bytes, respectively. const Sci::Position widthLast = ((pos - posTemp) & 1) + 1; if ((widthLast == 2) && (IsDBCSDualByteAt(pos - widthLast))) { return pos - widthLast; } // Byte before pos may be valid character or may be an invalid second byte return pos - 1; } } } } else { pos += increment; } return pos; } bool Document::NextCharacter(Sci::Position &pos, int moveDir) const noexcept { // Returns true if pos changed Sci::Position posNext = NextPosition(pos, moveDir); if (posNext == pos) { return false; } else { pos = posNext; return true; } } CharacterExtracted Document::CharacterAfter(Sci::Position position) const noexcept { if (position >= LengthNoExcept()) { return CharacterExtracted(unicodeReplacementChar, 0); } const unsigned char leadByte = cb.UCharAt(position); if (!dbcsCodePage || UTF8IsAscii(leadByte)) { // Common case: ASCII character return CharacterExtracted(leadByte, 1); } if (CpUtf8 == dbcsCodePage) { const int widthCharBytes = UTF8BytesOfLead[leadByte]; unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 }; for (int b = 1; b 0) ? 1 : -1; while (characterOffset != 0) { const Sci::Position posNext = NextPosition(pos, increment); if (posNext == pos) return Sci::invalidPosition; pos = posNext; characterOffset -= increment; } } else { pos = positionStart + characterOffset; if ((pos < 0) || (pos > Length())) return Sci::invalidPosition; } return pos; } Sci::Position Document::GetRelativePositionUTF16(Sci::Position positionStart, Sci::Position characterOffset) const noexcept { Sci::Position pos = positionStart; if (dbcsCodePage) { const int increment = (characterOffset > 0) ? 1 : -1; while (characterOffset != 0) { const Sci::Position posNext = NextPosition(pos, increment); if (posNext == pos) return Sci::invalidPosition; if (std::abs(pos-posNext) > 3) // 4 byte character = 2*UTF16. characterOffset -= increment; pos = posNext; characterOffset -= increment; } } else { pos = positionStart + characterOffset; if ((pos < 0) || (pos > LengthNoExcept())) return Sci::invalidPosition; } return pos; } int SCI_METHOD Document::GetCharacterAndWidth(Sci_Position position, Sci_Position *pWidth) const { int bytesInCharacter = 1; const unsigned char leadByte = cb.UCharAt(position); int character = leadByte; if (dbcsCodePage && !UTF8IsAscii(leadByte)) { if (CpUtf8 == dbcsCodePage) { const int widthCharBytes = UTF8BytesOfLead[leadByte]; unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; for (int b=1; b= 0x81) && (uch <= 0x9F)) || ((uch >= 0xE0) && (uch <= 0xFC)); // Lead bytes F0 to FC may be a Microsoft addition. case 936: // GBK return (uch >= 0x81) && (uch <= 0xFE); case 949: // Korean Wansung KS C-5601-1987 return (uch >= 0x81) && (uch <= 0xFE); case 950: // Big5 return (uch >= 0x81) && (uch <= 0xFE); case 1361: // Korean Johab KS C-5601-1992 return ((uch >= 0x84) && (uch <= 0xD3)) || ((uch >= 0xD8) && (uch <= 0xDE)) || ((uch >= 0xE0) && (uch <= 0xF9)); } return false; } bool Document::IsDBCSTrailByteNoExcept(char ch) const noexcept { const unsigned char trail = ch; switch (dbcsCodePage) { case 932: // Shift_jis return (trail != 0x7F) && ((trail >= 0x40) && (trail <= 0xFC)); case 936: // GBK return (trail != 0x7F) && ((trail >= 0x40) && (trail <= 0xFE)); case 949: // Korean Wansung KS C-5601-1987 return ((trail >= 0x41) && (trail <= 0x5A)) || ((trail >= 0x61) && (trail <= 0x7A)) || ((trail >= 0x81) && (trail <= 0xFE)); case 950: // Big5 return ((trail >= 0x40) && (trail <= 0x7E)) || ((trail >= 0xA1) && (trail <= 0xFE)); case 1361: // Korean Johab KS C-5601-1992 return ((trail >= 0x31) && (trail <= 0x7E)) || ((trail >= 0x81) && (trail <= 0xFE)); } return false; } int Document::DBCSDrawBytes(std::string_view text) const noexcept { if (text.length() <= 1) { return static_cast(text.length()); } if (IsDBCSLeadByteNoExcept(text[0])) { return IsDBCSTrailByteNoExcept(text[1]) ? 2 : 1; } else { return 1; } } bool Document::IsDBCSDualByteAt(Sci::Position pos) const noexcept { return IsDBCSLeadByteNoExcept(cb.CharAt(pos)) && IsDBCSTrailByteNoExcept(cb.CharAt(pos + 1)); } // Need to break text into segments near end but taking into account the // encoding to not break inside a UTF-8 or DBCS character and also trying // to avoid breaking inside a pair of combining characters, or inside // ligatures. // TODO: implement grapheme cluster boundaries, // see https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries. // // The segment length must always be long enough (more than 4 bytes) // so that there will be at least one whole character to make a segment. // For UTF-8, text must consist only of valid whole characters. // In preference order from best to worst: // 1) Break before or after spaces or controls // 2) Break at word and punctuation boundary for better kerning and ligature support // 3) Break after whole character, this may break combining characters size_t Document::SafeSegment(std::string_view text) const noexcept { // check space first as most written language use spaces. for (std::string_view::iterator it = text.end() - 1; it != text.begin(); --it) { if (IsBreakSpace(*it)) { return it - text.begin(); } } if (!dbcsCodePage || dbcsCodePage == CpUtf8) { // backward iterate for UTF-8 and single byte encoding to find word and punctuation boundary. std::string_view::iterator it = text.end() - 1; const bool punctuation = IsPunctuation(*it); do { --it; if (punctuation != IsPunctuation(*it)) { return it - text.begin() + 1; } } while (it != text.begin()); it = text.end() - 1; if (dbcsCodePage) { // for UTF-8 go back to the start of last character. for (int trail = 0; trail < UTF8MaxBytes - 1 && UTF8IsTrailByte(*it); trail++) { --it; } } return it - text.begin(); } { // forward iterate for DBCS to find word and punctuation boundary. size_t lastPunctuationBreak = 0; size_t lastEncodingAllowedBreak = 0; CharacterClass ccPrev = CharacterClass::space; for (size_t j = 0; j < text.length();) { const unsigned char ch = text[j]; lastEncodingAllowedBreak = j++; CharacterClass cc = CharacterClass::word; if (UTF8IsAscii(ch)) { if (IsPunctuation(ch)) { cc = CharacterClass::punctuation; } } else { j += IsDBCSLeadByteNoExcept(ch); } if (cc != ccPrev) { ccPrev = cc; lastPunctuationBreak = lastEncodingAllowedBreak; } } return lastPunctuationBreak ? lastPunctuationBreak : lastEncodingAllowedBreak; } } EncodingFamily Document::CodePageFamily() const noexcept { if (CpUtf8 == dbcsCodePage) return EncodingFamily::unicode; else if (dbcsCodePage) return EncodingFamily::dbcs; else return EncodingFamily::eightBit; } void Document::ModifiedAt(Sci::Position pos) noexcept { if (endStyled > pos) endStyled = pos; } void Document::CheckReadOnly() { if (cb.IsReadOnly() && enteredReadOnlyCount == 0) { enteredReadOnlyCount++; NotifyModifyAttempt(); enteredReadOnlyCount--; } } void Document::TrimReplacement(std::string_view &text, Range &range) const noexcept { while (!text.empty() && !range.Empty() && (text.front() == CharAt(range.start))) { text.remove_prefix(1); range.start++; } while (!text.empty() && !range.Empty() && (text.back() == CharAt(range.end-1))) { text.remove_suffix(1); range.end--; } } // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt. // SetStyleAt does not change the persistent state of a document bool Document::DeleteChars(Sci::Position pos, Sci::Position len) { if (pos < 0) return false; if (len <= 0) return false; if ((pos + len) > LengthNoExcept()) return false; CheckReadOnly(); if (enteredModification != 0) { return false; } else { enteredModification++; if (!cb.IsReadOnly()) { NotifyModified( DocModification( ModificationFlags::BeforeDelete | ModificationFlags::User, pos, len, 0, nullptr)); const Sci::Line prevLinesTotal = LinesTotal(); const bool startSavePoint = cb.IsSavePoint(); bool startSequence = false; const char *text = cb.DeleteChars(pos, len, startSequence); if (startSavePoint && cb.IsCollectingUndo()) NotifySavePoint(false); if ((pos < LengthNoExcept()) || (pos == 0)) ModifiedAt(pos); else ModifiedAt(pos-1); NotifyModified( DocModification( ModificationFlags::DeleteText | ModificationFlags::User | (startSequence?ModificationFlags::StartAction:ModificationFlags::None), pos, len, LinesTotal() - prevLinesTotal, text)); } enteredModification--; } return !cb.IsReadOnly(); } /** * Insert a string with a length. */ Sci::Position Document::InsertString(Sci::Position position, const char *s, Sci::Position insertLength) { if (insertLength <= 0) { return 0; } CheckReadOnly(); // Application may change read only state here if (cb.IsReadOnly()) { return 0; } if (enteredModification != 0) { return 0; } enteredModification++; insertionSet = false; insertion.clear(); NotifyModified( DocModification( ModificationFlags::InsertCheck, position, insertLength, 0, s)); if (insertionSet) { s = insertion.c_str(); insertLength = insertion.length(); } NotifyModified( DocModification( ModificationFlags::BeforeInsert | ModificationFlags::User, position, insertLength, 0, s)); const Sci::Line prevLinesTotal = LinesTotal(); const bool startSavePoint = cb.IsSavePoint(); bool startSequence = false; const char *text = cb.InsertString(position, s, insertLength, startSequence); if (startSavePoint && cb.IsCollectingUndo()) NotifySavePoint(false); ModifiedAt(position); NotifyModified( DocModification( ModificationFlags::InsertText | ModificationFlags::User | (startSequence?ModificationFlags::StartAction:ModificationFlags::None), position, insertLength, LinesTotal() - prevLinesTotal, text)); if (insertionSet) { // Free memory as could be large std::string().swap(insertion); } enteredModification--; return insertLength; } Sci::Position Document::InsertString(Sci::Position position, std::string_view sv) { return InsertString(position, sv.data(), sv.length()); } void Document::ChangeInsertion(const char *s, Sci::Position length) { insertionSet = true; insertion.assign(s, length); } int SCI_METHOD Document::AddData(const char *data, Sci_Position length) { try { const Sci::Position position = Length(); InsertString(position, data, length); } catch (std::bad_alloc &) { return static_cast(Status::BadAlloc); } catch (...) { return static_cast(Status::Failure); } return static_cast(Status::Ok); } void * SCI_METHOD Document::ConvertToDocument() { return this; } Sci::Position Document::Undo() { Sci::Position newPos = -1; CheckReadOnly(); if ((enteredModification == 0) && (cb.IsCollectingUndo())) { enteredModification++; if (!cb.IsReadOnly()) { const bool startSavePoint = cb.IsSavePoint(); bool multiLine = false; const int steps = cb.StartUndo(); //Platform::DebugPrintf("Steps=%d\n", steps); Sci::Position coalescedRemovePos = -1; Sci::Position coalescedRemoveLen = 0; Sci::Position prevRemoveActionPos = -1; Sci::Position prevRemoveActionLen = 0; for (int step = 0; step < steps; step++) { const Sci::Line prevLinesTotal = LinesTotal(); const Action &action = cb.GetUndoStep(); if (action.at == ActionType::remove) { NotifyModified(DocModification( ModificationFlags::BeforeInsert | ModificationFlags::Undo, action)); } else if (action.at == ActionType::container) { DocModification dm(ModificationFlags::Container | ModificationFlags::Undo); dm.token = action.position; NotifyModified(dm); if (!action.mayCoalesce) { coalescedRemovePos = -1; coalescedRemoveLen = 0; prevRemoveActionPos = -1; prevRemoveActionLen = 0; } } else { NotifyModified(DocModification( ModificationFlags::BeforeDelete | ModificationFlags::Undo, action)); } cb.PerformUndoStep(); if (action.at != ActionType::container) { ModifiedAt(action.position); newPos = action.position; } ModificationFlags modFlags = ModificationFlags::Undo; // With undo, an insertion action becomes a deletion notification if (action.at == ActionType::remove) { newPos += action.lenData; modFlags |= ModificationFlags::InsertText; if ((coalescedRemoveLen > 0) && (action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) { coalescedRemoveLen += action.lenData; newPos = coalescedRemovePos + coalescedRemoveLen; } else { coalescedRemovePos = action.position; coalescedRemoveLen = action.lenData; } prevRemoveActionPos = action.position; prevRemoveActionLen = action.lenData; } else if (action.at == ActionType::insert) { modFlags |= ModificationFlags::DeleteText; coalescedRemovePos = -1; coalescedRemoveLen = 0; prevRemoveActionPos = -1; prevRemoveActionLen = 0; } if (steps > 1) modFlags |= ModificationFlags::MultiStepUndoRedo; const Sci::Line linesAdded = LinesTotal() - prevLinesTotal; if (linesAdded != 0) multiLine = true; if (step == steps - 1) { modFlags |= ModificationFlags::LastStepInUndoRedo; if (multiLine) modFlags |= ModificationFlags::MultilineUndoRedo; } NotifyModified(DocModification(modFlags, action.position, action.lenData, linesAdded, action.data.get())); } const bool endSavePoint = cb.IsSavePoint(); if (startSavePoint != endSavePoint) NotifySavePoint(endSavePoint); } enteredModification--; } return newPos; } Sci::Position Document::Redo() { Sci::Position newPos = -1; CheckReadOnly(); if ((enteredModification == 0) && (cb.IsCollectingUndo())) { enteredModification++; if (!cb.IsReadOnly()) { const bool startSavePoint = cb.IsSavePoint(); bool multiLine = false; const int steps = cb.StartRedo(); for (int step = 0; step < steps; step++) { const Sci::Line prevLinesTotal = LinesTotal(); const Action &action = cb.GetRedoStep(); if (action.at == ActionType::insert) { NotifyModified(DocModification( ModificationFlags::BeforeInsert | ModificationFlags::Redo, action)); } else if (action.at == ActionType::container) { DocModification dm(ModificationFlags::Container | ModificationFlags::Redo); dm.token = action.position; NotifyModified(dm); } else { NotifyModified(DocModification( ModificationFlags::BeforeDelete | ModificationFlags::Redo, action)); } cb.PerformRedoStep(); if (action.at != ActionType::container) { ModifiedAt(action.position); newPos = action.position; } ModificationFlags modFlags = ModificationFlags::Redo; if (action.at == ActionType::insert) { newPos += action.lenData; modFlags |= ModificationFlags::InsertText; } else if (action.at == ActionType::remove) { modFlags |= ModificationFlags::DeleteText; } if (steps > 1) modFlags |= ModificationFlags::MultiStepUndoRedo; const Sci::Line linesAdded = LinesTotal() - prevLinesTotal; if (linesAdded != 0) multiLine = true; if (step == steps - 1) { modFlags |= ModificationFlags::LastStepInUndoRedo; if (multiLine) modFlags |= ModificationFlags::MultilineUndoRedo; } NotifyModified( DocModification(modFlags, action.position, action.lenData, linesAdded, action.data.get())); } const bool endSavePoint = cb.IsSavePoint(); if (startSavePoint != endSavePoint) NotifySavePoint(endSavePoint); } enteredModification--; } return newPos; } void Document::DelChar(Sci::Position pos) { DeleteChars(pos, LenChar(pos)); } void Document::DelCharBack(Sci::Position pos) { if (pos <= 0) { return; } else if (IsCrLf(pos - 2)) { DeleteChars(pos - 2, 2); } else if (dbcsCodePage) { const Sci::Position startChar = NextPosition(pos, -1); DeleteChars(startChar, pos - startChar); } else { DeleteChars(pos - 1, 1); } } static constexpr Sci::Position NextTab(Sci::Position pos, Sci::Position tabSize) noexcept { return ((pos / tabSize) + 1) * tabSize; } static std::string CreateIndentation(Sci::Position indent, int tabSize, bool insertSpaces) { std::string indentation; if (!insertSpaces) { while (indent >= tabSize) { indentation += '\t'; indent -= tabSize; } } while (indent > 0) { indentation += ' '; indent--; } return indentation; } int SCI_METHOD Document::GetLineIndentation(Sci_Position line) { int indent = 0; if ((line >= 0) && (line < LinesTotal())) { const Sci::Position lineStart = LineStart(line); const Sci::Position length = Length(); for (Sci::Position i = lineStart; i < length; i++) { const char ch = cb.CharAt(i); if (ch == ' ') indent++; else if (ch == '\t') indent = static_cast(NextTab(indent, tabInChars)); else return indent; } } return indent; } Sci::Position Document::SetLineIndentation(Sci::Line line, Sci::Position indent) { const int indentOfLine = GetLineIndentation(line); if (indent < 0) indent = 0; if (indent != indentOfLine) { const std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs); const Sci::Position thisLineStart = LineStart(line); const Sci::Position indentPos = GetLineIndentPosition(line); UndoGroup ug(this); DeleteChars(thisLineStart, indentPos - thisLineStart); return thisLineStart + InsertString(thisLineStart, linebuf); } else { return GetLineIndentPosition(line); } } Sci::Position Document::GetLineIndentPosition(Sci::Line line) const { if (line < 0) return 0; Sci::Position pos = LineStart(line); const Sci::Position length = Length(); while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) { pos++; } return pos; } Sci::Position Document::GetColumn(Sci::Position pos) const { Sci::Position column = 0; const Sci::Line line = SciLineFromPosition(pos); if ((line >= 0) && (line < LinesTotal())) { for (Sci::Position i = LineStart(line); i < pos;) { const char ch = cb.CharAt(i); if (ch == '\t') { column = NextTab(column, tabInChars); i++; } else if (ch == '\r') { return column; } else if (ch == '\n') { return column; } else if (i >= Length()) { return column; } else if (UTF8IsAscii(ch)) { column++; i++; } else { column++; i = NextPosition(i, 1); } } } return column; } Sci::Position Document::CountCharacters(Sci::Position startPos, Sci::Position endPos) const noexcept { startPos = MovePositionOutsideChar(startPos, 1, false); endPos = MovePositionOutsideChar(endPos, -1, false); Sci::Position count = 0; Sci::Position i = startPos; while (i < endPos) { count++; i = NextPosition(i, 1); } return count; } Sci::Position Document::CountUTF16(Sci::Position startPos, Sci::Position endPos) const noexcept { startPos = MovePositionOutsideChar(startPos, 1, false); endPos = MovePositionOutsideChar(endPos, -1, false); Sci::Position count = 0; Sci::Position i = startPos; while (i < endPos) { count++; const Sci::Position next = NextPosition(i, 1); if ((next - i) > 3) count++; i = next; } return count; } Sci::Position Document::FindColumn(Sci::Line line, Sci::Position column) { Sci::Position position = LineStart(line); if ((line >= 0) && (line < LinesTotal())) { Sci::Position columnCurrent = 0; while ((columnCurrent < column) && (position < Length())) { const char ch = cb.CharAt(position); if (ch == '\t') { columnCurrent = NextTab(columnCurrent, tabInChars); if (columnCurrent > column) return position; position++; } else if (ch == '\r') { return position; } else if (ch == '\n') { return position; } else { columnCurrent++; position = NextPosition(position, 1); } } } return position; } void Document::Indent(bool forwards, Sci::Line lineBottom, Sci::Line lineTop) { // Dedent - suck white space off the front of the line to dedent by equivalent of a tab for (Sci::Line line = lineBottom; line >= lineTop; line--) { const Sci::Position indentOfLine = GetLineIndentation(line); if (forwards) { if (LineStart(line) < LineEnd(line)) { SetLineIndentation(line, indentOfLine + IndentSize()); } } else { SetLineIndentation(line, indentOfLine - IndentSize()); } } } // Convert line endings for a piece of text to a particular mode. // Stop at len or when a NUL is found. std::string Document::TransformLineEnds(const char *s, size_t len, EndOfLine eolModeWanted) { std::string dest; for (size_t i = 0; (i < len) && (s[i]); i++) { if (s[i] == '\n' || s[i] == '\r') { if (eolModeWanted == EndOfLine::Cr) { dest.push_back('\r'); } else if (eolModeWanted == EndOfLine::Lf) { dest.push_back('\n'); } else { // eolModeWanted == EndOfLine::CrLf dest.push_back('\r'); dest.push_back('\n'); } if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) { i++; } } else { dest.push_back(s[i]); } } return dest; } void Document::ConvertLineEnds(EndOfLine eolModeSet) { UndoGroup ug(this); for (Sci::Position pos = 0; pos < Length(); pos++) { const char ch = cb.CharAt(pos); if (ch == '\r') { if (cb.CharAt(pos + 1) == '\n') { // CRLF if (eolModeSet == EndOfLine::Cr) { DeleteChars(pos + 1, 1); // Delete the LF } else if (eolModeSet == EndOfLine::Lf) { DeleteChars(pos, 1); // Delete the CR } else { pos++; } } else { // CR if (eolModeSet == EndOfLine::CrLf) { pos += InsertString(pos + 1, "\n", 1); // Insert LF } else if (eolModeSet == EndOfLine::Lf) { pos += InsertString(pos, "\n", 1); // Insert LF DeleteChars(pos, 1); // Delete CR pos--; } } } else if (ch == '\n') { // LF if (eolModeSet == EndOfLine::CrLf) { pos += InsertString(pos, "\r", 1); // Insert CR } else if (eolModeSet == EndOfLine::Cr) { pos += InsertString(pos, "\r", 1); // Insert CR DeleteChars(pos, 1); // Delete LF pos--; } } } } std::string_view Document::EOLString() const noexcept { if (eolMode == EndOfLine::CrLf) { return "\r\n"; } else if (eolMode == EndOfLine::Cr) { return "\r"; } else { return "\n"; } } DocumentOption Document::Options() const noexcept { return (IsLarge() ? DocumentOption::TextLarge : DocumentOption::Default) | (cb.HasStyles() ? DocumentOption::Default : DocumentOption::StylesNone); } bool Document::IsWhiteLine(Sci::Line line) const { Sci::Position currentChar = LineStart(line); const Sci::Position endLine = LineEnd(line); while (currentChar < endLine) { if (!IsSpaceOrTab(cb.CharAt(currentChar))) { return false; } ++currentChar; } return true; } Sci::Position Document::ParaUp(Sci::Position pos) const { Sci::Line line = SciLineFromPosition(pos); const Sci::Position start = LineStart(line); if (pos == start) { line--; } while (line >= 0 && IsWhiteLine(line)) { // skip empty lines line--; } while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines line--; } line++; return LineStart(line); } Sci::Position Document::ParaDown(Sci::Position pos) const { Sci::Line line = SciLineFromPosition(pos); while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines line++; } while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines line++; } if (line < LinesTotal()) return LineStart(line); else // end of a document return LineEnd(line-1); } CharacterClass Document::WordCharacterClass(unsigned int ch) const { if (dbcsCodePage && (ch >= 0x80)) { if (CpUtf8 == dbcsCodePage) { // Use hard coded Unicode class const CharacterCategory cc = charMap.CategoryFor(ch); switch (cc) { // Separator, Line/Paragraph case ccZl: case ccZp: return CharacterClass::newLine; // Separator, Space case ccZs: // Other case ccCc: case ccCf: case ccCs: case ccCo: case ccCn: return CharacterClass::space; // Letter case ccLu: case ccLl: case ccLt: case ccLm: case ccLo: // Number case ccNd: case ccNl: case ccNo: // Mark - includes combining diacritics case ccMn: case ccMc: case ccMe: return CharacterClass::word; // Punctuation case ccPc: case ccPd: case ccPs: case ccPe: case ccPi: case ccPf: case ccPo: // Symbol case ccSm: case ccSc: case ccSk: case ccSo: return CharacterClass::punctuation; } } else { // Asian DBCS return CharacterClass::word; } } return charClass.GetClass(static_cast(ch)); } /** * Used by commands that want to select whole words. * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0. */ Sci::Position Document::ExtendWordSelect(Sci::Position pos, int delta, bool onlyWordCharacters) const { CharacterClass ccStart = CharacterClass::word; if (delta < 0) { if (!onlyWordCharacters) { const CharacterExtracted ce = CharacterBefore(pos); ccStart = WordCharacterClass(ce.character); } while (pos > 0) { const CharacterExtracted ce = CharacterBefore(pos); if (WordCharacterClass(ce.character) != ccStart) break; pos -= ce.widthBytes; } } else { if (!onlyWordCharacters && pos < LengthNoExcept()) { const CharacterExtracted ce = CharacterAfter(pos); ccStart = WordCharacterClass(ce.character); } while (pos < LengthNoExcept()) { const CharacterExtracted ce = CharacterAfter(pos); if (WordCharacterClass(ce.character) != ccStart) break; pos += ce.widthBytes; } } return MovePositionOutsideChar(pos, delta, true); } /** * Find the start of the next word in either a forward (delta >= 0) or backwards direction * (delta < 0). * This is looking for a transition between character classes although there is also some * additional movement to transit white space. * Used by cursor movement by word commands. */ Sci::Position Document::NextWordStart(Sci::Position pos, int delta) const { if (delta < 0) { while (pos > 0) { const CharacterExtracted ce = CharacterBefore(pos); if (WordCharacterClass(ce.character) != CharacterClass::space) break; pos -= ce.widthBytes; } if (pos > 0) { CharacterExtracted ce = CharacterBefore(pos); const CharacterClass ccStart = WordCharacterClass(ce.character); while (pos > 0) { ce = CharacterBefore(pos); if (WordCharacterClass(ce.character) != ccStart) break; pos -= ce.widthBytes; } } } else { CharacterExtracted ce = CharacterAfter(pos); const CharacterClass ccStart = WordCharacterClass(ce.character); while (pos < LengthNoExcept()) { ce = CharacterAfter(pos); if (WordCharacterClass(ce.character) != ccStart) break; pos += ce.widthBytes; } while (pos < LengthNoExcept()) { ce = CharacterAfter(pos); if (WordCharacterClass(ce.character) != CharacterClass::space) break; pos += ce.widthBytes; } } return pos; } /** * Find the end of the next word in either a forward (delta >= 0) or backwards direction * (delta < 0). * This is looking for a transition between character classes although there is also some * additional movement to transit white space. * Used by cursor movement by word commands. */ Sci::Position Document::NextWordEnd(Sci::Position pos, int delta) const { if (delta < 0) { if (pos > 0) { CharacterExtracted ce = CharacterBefore(pos); const CharacterClass ccStart = WordCharacterClass(ce.character); if (ccStart != CharacterClass::space) { while (pos > 0) { ce = CharacterBefore(pos); if (WordCharacterClass(ce.character) != ccStart) break; pos -= ce.widthBytes; } } while (pos > 0) { ce = CharacterBefore(pos); if (WordCharacterClass(ce.character) != CharacterClass::space) break; pos -= ce.widthBytes; } } } else { while (pos < LengthNoExcept()) { const CharacterExtracted ce = CharacterAfter(pos); if (WordCharacterClass(ce.character) != CharacterClass::space) break; pos += ce.widthBytes; } if (pos < LengthNoExcept()) { CharacterExtracted ce = CharacterAfter(pos); const CharacterClass ccStart = WordCharacterClass(ce.character); while (pos < LengthNoExcept()) { ce = CharacterAfter(pos); if (WordCharacterClass(ce.character) != ccStart) break; pos += ce.widthBytes; } } } return pos; } namespace { constexpr bool IsWordEdge(CharacterClass cc, CharacterClass ccNext) noexcept { return (cc != ccNext) && (cc == CharacterClass::word || cc == CharacterClass::punctuation); } } /** * Check that the character at the given position is a word or punctuation character and that * the previous character is of a different character class. */ bool Document::IsWordStartAt(Sci::Position pos) const { if (pos >= LengthNoExcept()) return false; if (pos >= 0) { const CharacterExtracted cePos = CharacterAfter(pos); // At start of document, treat as if space before so can be word start const CharacterExtracted cePrev = (pos > 0) ? CharacterBefore(pos) : CharacterExtracted(' ', 1); return IsWordEdge(WordCharacterClass(cePos.character), WordCharacterClass(cePrev.character)); } return true; } /** * Check that the character before the given position is a word or punctuation character and that * the next character is of a different character class. */ bool Document::IsWordEndAt(Sci::Position pos) const { if (pos <= 0) return false; if (pos <= LengthNoExcept()) { // At end of document, treat as if space after so can be word end const CharacterExtracted cePos = (pos < LengthNoExcept()) ? CharacterAfter(pos) : CharacterExtracted(' ', 1); const CharacterExtracted cePrev = CharacterBefore(pos); return IsWordEdge(WordCharacterClass(cePrev.character), WordCharacterClass(cePos.character)); } return true; } /** * Check that the given range is has transitions between character classes at both * ends and where the characters on the inside are word or punctuation characters. */ bool Document::IsWordAt(Sci::Position start, Sci::Position end) const { return (start < end) && IsWordStartAt(start) && IsWordEndAt(end); } bool Document::MatchesWordOptions(bool word, bool wordStart, Sci::Position pos, Sci::Position length) const { return (!word && !wordStart) || (word && IsWordAt(pos, pos + length)) || (wordStart && IsWordStartAt(pos)); } bool Document::HasCaseFolder() const noexcept { return pcf != nullptr; } void Document::SetCaseFolder(std::unique_ptr pcf_) noexcept { pcf = std::move(pcf_); } CharacterExtracted Document::ExtractCharacter(Sci::Position position) const noexcept { const unsigned char leadByte = cb.UCharAt(position); if (UTF8IsAscii(leadByte)) { // Common case: ASCII character return CharacterExtracted(leadByte, 1); } const int widthCharBytes = UTF8BytesOfLead[leadByte]; unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 }; for (int b=1; b(memchr(view.segment1 + start, ch, range1Length)); if (match) { return match - view.segment1; } start += range1Length; } const char *match2 = static_cast(memchr(view.segment2 + start, ch, length - range1Length)); if (match2) { return match2 - view.segment2; } return -1; } // Equivalent of memcmp over the split view // This does not call memcmp as search texts are commonly too short to overcome the // call overhead. bool SplitMatch(const SplitView &view, size_t start, std::string_view text) noexcept { for (size_t i = 0; i < text.length(); i++) { if (view.CharAt(i + start) != text[i]) { return false; } } return true; } } /** * Find text in document, supporting both forward and backwar