// Scintilla source code edit control /** @file Document.cxx ** Text document that handles notifications, DBCS, styling, words and end of line. **/ // Copyright 1998-2011 by Neil Hodgson // The License.txt file describes the conditions under which this software may be distributed. #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef NO_CXX11_REGEX #include #endif #include "Debugging.h" #include "ILoader.h" #include "ILexer.h" #include "Scintilla.h" #include "CharacterType.h" #include "CharacterCategoryMap.h" #include "Position.h" #include "SplitVector.h" #include "Partitioning.h" #include "RunStyles.h" #include "CellBuffer.h" #include "PerLine.h" #include "CharClassify.h" #include "Decoration.h" #include "CaseFolder.h" #include "Document.h" #include "RESearch.h" #include "UniConversion.h" #include "ElapsedPeriod.h" using namespace Scintilla; void LexInterface::Colourise(Sci::Position start, Sci::Position end) { if (pdoc && instance && !performingStyle) { // Protect against reentrance, which may occur, for example, when // fold points are discovered while performing styling and the folding // code looks for child lines which may trigger styling. performingStyle = true; const Sci::Position lengthDoc = pdoc->Length(); if (end == -1) end = lengthDoc; const Sci::Position len = end - start; PLATFORM_ASSERT(len >= 0); PLATFORM_ASSERT(start + len <= lengthDoc); int styleStart = 0; if (start > 0) styleStart = pdoc->StyleAt(start - 1); if (len > 0) { instance->Lex(start, len, styleStart, pdoc); instance->Fold(start, len, styleStart, pdoc); } performingStyle = false; } } int LexInterface::LineEndTypesSupported() { if (instance) { return instance->LineEndTypesSupported(); } return 0; } ActionDuration::ActionDuration(double duration_, double minDuration_, double maxDuration_) noexcept : duration(duration_), minDuration(minDuration_), maxDuration(maxDuration_) { } void ActionDuration::AddSample(size_t numberActions, double durationOfActions) noexcept { // Only adjust for multiple actions to avoid instability if (numberActions < 8) return; // Alpha value for exponential smoothing. // Most recent value contributes 25% to smoothed value. constexpr double alpha = 0.25; const double durationOne = durationOfActions / numberActions; duration = std::clamp(alpha * durationOne + (1.0 - alpha) * duration, minDuration, maxDuration); } double ActionDuration::Duration() const noexcept { return duration; } size_t ActionDuration::ActionsInAllowedTime(double secondsAllowed) const noexcept { return std::lround(secondsAllowed / Duration()); } Document::Document(int options) : cb((options & SC_DOCUMENTOPTION_STYLES_NONE) == 0, (options & SC_DOCUMENTOPTION_TEXT_LARGE) != 0), durationStyleOneByte(0.000001, 0.0000001, 0.00001) { refCount = 0; #ifdef _WIN32 eolMode = SC_EOL_CRLF; #else eolMode = SC_EOL_LF; #endif dbcsCodePage = SC_CP_UTF8; lineEndBitSet = SC_LINE_END_TYPE_DEFAULT; endStyled = 0; styleClock = 0; enteredModification = 0; enteredStyling = 0; enteredReadOnlyCount = 0; insertionSet = false; tabInChars = 8; indentInChars = 0; actualIndentInChars = 8; useTabs = true; tabIndents = true; backspaceUnindents = false; matchesValid = false; perLineData[ldMarkers] = std::make_unique(); perLineData[ldLevels] = std::make_unique(); perLineData[ldState] = std::make_unique(); perLineData[ldMargin] = std::make_unique(); perLineData[ldAnnotation] = std::make_unique(); perLineData[ldEOLAnnotation] = std::make_unique(); decorations = DecorationListCreate(IsLarge()); cb.SetPerLine(this); cb.SetUTF8Substance(SC_CP_UTF8 == dbcsCodePage); } Document::~Document() { for (const WatcherWithUserData &watcher : watchers) { watcher.watcher->NotifyDeleted(this, watcher.userData); } } // Increase reference count and return its previous value. int Document::AddRef() { return refCount++; } // Decrease reference count and return its previous value. // Delete the document if reference count reaches zero. int SCI_METHOD Document::Release() { const int curRefCount = --refCount; if (curRefCount == 0) delete this; return curRefCount; } void Document::Init() { for (const std::unique_ptr &pl : perLineData) { if (pl) pl->Init(); } } void Document::InsertLine(Sci::Line line) { for (const std::unique_ptr &pl : perLineData) { if (pl) pl->InsertLine(line); } } void Document::InsertLines(Sci::Line line, Sci::Line lines) { for (const auto &pl : perLineData) { if (pl) pl->InsertLines(line, lines); } } void Document::RemoveLine(Sci::Line line) { for (const std::unique_ptr &pl : perLineData) { if (pl) pl->RemoveLine(line); } } LineMarkers *Document::Markers() const noexcept { return dynamic_cast(perLineData[ldMarkers].get()); } LineLevels *Document::Levels() const noexcept { return dynamic_cast(perLineData[ldLevels].get()); } LineState *Document::States() const noexcept { return dynamic_cast(perLineData[ldState].get()); } LineAnnotation *Document::Margins() const noexcept { return dynamic_cast(perLineData[ldMargin].get()); } LineAnnotation *Document::Annotations() const noexcept { return dynamic_cast(perLineData[ldAnnotation].get()); } LineAnnotation *Document::EOLAnnotations() const noexcept { return dynamic_cast(perLineData[ldEOLAnnotation].get()); } int Document::LineEndTypesSupported() const { if ((SC_CP_UTF8 == dbcsCodePage) && pli) return pli->LineEndTypesSupported(); else return 0; } bool Document::SetDBCSCodePage(int dbcsCodePage_) { if (dbcsCodePage != dbcsCodePage_) { dbcsCodePage = dbcsCodePage_; SetCaseFolder(nullptr); cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported()); cb.SetUTF8Substance(SC_CP_UTF8 == dbcsCodePage); ModifiedAt(0); // Need to restyle whole document return true; } else { return false; } } bool Document::SetLineEndTypesAllowed(int lineEndBitSet_) { if (lineEndBitSet != lineEndBitSet_) { lineEndBitSet = lineEndBitSet_; const int lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported(); if (lineEndBitSetActive != cb.GetLineEndTypes()) { ModifiedAt(0); cb.SetLineEndTypes(lineEndBitSetActive); return true; } else { return false; } } else { return false; } } void Document::SetSavePoint() { cb.SetSavePoint(); NotifySavePoint(true); } void Document::TentativeUndo() { if (!TentativeActive()) return; CheckReadOnly(); if (enteredModification == 0) { enteredModification++; if (!cb.IsReadOnly()) { const bool startSavePoint = cb.IsSavePoint(); bool multiLine = false; const int steps = cb.TentativeSteps(); //Platform::DebugPrintf("Steps=%d\n", steps); for (int step = 0; step < steps; step++) { const Sci::Line prevLinesTotal = LinesTotal(); const Action &action = cb.GetUndoStep(); if (action.at == ActionType::remove) { NotifyModified(DocModification( SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action)); } else if (action.at == ActionType::container) { DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO); dm.token = action.position; NotifyModified(dm); } else { NotifyModified(DocModification( SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action)); } cb.PerformUndoStep(); if (action.at != ActionType::container) { ModifiedAt(action.position); } int modFlags = SC_PERFORMED_UNDO; // With undo, an insertion action becomes a deletion notification if (action.at == ActionType::remove) { modFlags |= SC_MOD_INSERTTEXT; } else if (action.at == ActionType::insert) { modFlags |= SC_MOD_DELETETEXT; } if (steps > 1) modFlags |= SC_MULTISTEPUNDOREDO; const Sci::Line linesAdded = LinesTotal() - prevLinesTotal; if (linesAdded != 0) multiLine = true; if (step == steps - 1) { modFlags |= SC_LASTSTEPINUNDOREDO; if (multiLine) modFlags |= SC_MULTILINEUNDOREDO; } NotifyModified(DocModification(modFlags, action.position, action.lenData, linesAdded, action.data.get())); } const bool endSavePoint = cb.IsSavePoint(); if (startSavePoint != endSavePoint) NotifySavePoint(endSavePoint); cb.TentativeCommit(); } enteredModification--; } } int Document::GetMark(Sci::Line line) const noexcept { return Markers()->MarkValue(line); } Sci::Line Document::MarkerNext(Sci::Line lineStart, int mask) const noexcept { return Markers()->MarkerNext(lineStart, mask); } int Document::AddMark(Sci::Line line, int markerNum) { if (line >= 0 && line <= LinesTotal()) { const int prev = Markers()->AddMark(line, markerNum, LinesTotal()); const DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, nullptr, line); NotifyModified(mh); return prev; } else { return -1; } } void Document::AddMarkSet(Sci::Line line, int valueSet) { if (line < 0 || line > LinesTotal()) { return; } unsigned int m = valueSet; for (int i = 0; m; i++, m >>= 1) { if (m & 1) Markers()->AddMark(line, i, LinesTotal()); } const DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, nullptr, line); NotifyModified(mh); } void Document::DeleteMark(Sci::Line line, int markerNum) { Markers()->DeleteMark(line, markerNum, false); const DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, nullptr, line); NotifyModified(mh); } void Document::DeleteMarkFromHandle(int markerHandle) { Markers()->DeleteMarkFromHandle(markerHandle); DocModification mh(SC_MOD_CHANGEMARKER); mh.line = -1; NotifyModified(mh); } void Document::DeleteAllMarks(int markerNum) { bool someChanges = false; for (Sci::Line line = 0; line < LinesTotal(); line++) { if (Markers()->DeleteMark(line, markerNum, true)) someChanges = true; } if (someChanges) { DocModification mh(SC_MOD_CHANGEMARKER); mh.line = -1; NotifyModified(mh); } } Sci::Line Document::LineFromHandle(int markerHandle) const noexcept { return Markers()->LineFromHandle(markerHandle); } int Document::MarkerNumberFromLine(Sci::Line line, int which) const noexcept { return Markers()->NumberFromLine(line, which); } int Document::MarkerHandleFromLine(Sci::Line line, int which) const noexcept { return Markers()->HandleFromLine(line, which); } Sci_Position SCI_METHOD Document::LineStart(Sci_Position line) const { return cb.LineStart(line); } bool Document::IsLineStartPosition(Sci::Position position) const { return LineStart(LineFromPosition(position)) == position; } Sci_Position SCI_METHOD Document::LineEnd(Sci_Position line) const { if (line >= LinesTotal() - 1) { return LineStart(line + 1); } else { Sci::Position position = LineStart(line + 1); if (SC_LINE_END_TYPE_UNICODE == cb.GetLineEndTypes()) { const unsigned char bytes[] = { cb.UCharAt(position-3), cb.UCharAt(position-2), cb.UCharAt(position-1), }; if (UTF8IsSeparator(bytes)) { return position - UTF8SeparatorLength; } if (UTF8IsNEL(bytes+1)) { return position - UTF8NELLength; } } position--; // Back over CR or LF // When line terminator is CR+LF, may need to go back one more if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) { position--; } return position; } } void SCI_METHOD Document::SetErrorStatus(int status) { // Tell the watchers an error has occurred. for (const WatcherWithUserData &watcher : watchers) { watcher.watcher->NotifyErrorOccurred(this, watcher.userData, status); } } Sci_Position SCI_METHOD Document::LineFromPosition(Sci_Position pos) const { return cb.LineFromPosition(pos); } Sci::Line Document::SciLineFromPosition(Sci::Position pos) const noexcept { // Avoids casting in callers for this very common function return cb.LineFromPosition(pos); } Sci::Position Document::LineEndPosition(Sci::Position position) const { return LineEnd(LineFromPosition(position)); } bool Document::IsLineEndPosition(Sci::Position position) const { return LineEnd(LineFromPosition(position)) == position; } bool Document::IsPositionInLineEnd(Sci::Position position) const { return position >= LineEnd(LineFromPosition(position)); } Sci::Position Document::VCHomePosition(Sci::Position position) const { const Sci::Line line = SciLineFromPosition(position); const Sci::Position startPosition = LineStart(line); const Sci::Position endLine = LineEnd(line); Sci::Position startText = startPosition; while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t')) startText++; if (position == startText) return startPosition; else return startText; } Sci::Position Document::IndexLineStart(Sci::Line line, int lineCharacterIndex) const noexcept { return cb.IndexLineStart(line, lineCharacterIndex); } Sci::Line Document::LineFromPositionIndex(Sci::Position pos, int lineCharacterIndex) const noexcept { return cb.LineFromPositionIndex(pos, lineCharacterIndex); } Sci::Line Document::LineFromPositionAfter(Sci::Line line, Sci::Position length) const noexcept { const Sci::Position posAfter = cb.LineStart(line) + length; if (posAfter >= LengthNoExcept()) { return LinesTotal(); } const Sci::Line lineAfter = SciLineFromPosition(posAfter); if (lineAfter > line) { return lineAfter; } else { // Want to make some progress so return next line return lineAfter + 1; } } int SCI_METHOD Document::SetLevel(Sci_Position line, int level) { const int prev = Levels()->SetLevel(line, level, LinesTotal()); if (prev != level) { DocModification mh(SC_MOD_CHANGEFOLD | SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, nullptr, line); mh.foldLevelNow = level; mh.foldLevelPrev = prev; NotifyModified(mh); } return prev; } int SCI_METHOD Document::GetLevel(Sci_Position line) const { return Levels()->GetLevel(line); } void Document::ClearLevels() { Levels()->ClearLevels(); } static bool IsSubordinate(int levelStart, int levelTry) noexcept { if (levelTry & SC_FOLDLEVELWHITEFLAG) return true; else return LevelNumber(levelStart) < LevelNumber(levelTry); } Sci::Line Document::GetLastChild(Sci::Line lineParent, int level, Sci::Line lastLine) { if (level == -1) level = LevelNumber(GetLevel(lineParent)); const Sci::Line maxLine = LinesTotal(); const Sci::Line lookLastLine = (lastLine != -1) ? std::min(LinesTotal() - 1, lastLine) : -1; Sci::Line lineMaxSubord = lineParent; while (lineMaxSubord < maxLine - 1) { EnsureStyledTo(LineStart(lineMaxSubord + 2)); if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1))) break; if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !(GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG)) break; lineMaxSubord++; } if (lineMaxSubord > lineParent) { if (level > LevelNumber(GetLevel(lineMaxSubord + 1))) { // Have chewed up some whitespace that belongs to a parent so seek back if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) { lineMaxSubord--; } } } return lineMaxSubord; } Sci::Line Document::GetFoldParent(Sci::Line line) const { const int level = LevelNumber(GetLevel(line)); Sci::Line lineLook = line - 1; while ((lineLook > 0) && ( (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) || (LevelNumber(GetLevel(lineLook)) >= level)) ) { lineLook--; } if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) && (LevelNumber(GetLevel(lineLook)) < level)) { return lineLook; } else { return -1; } } void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, Sci::Line line, Sci::Line lastLine) { const int level = GetLevel(line); const Sci::Line lookLastLine = std::max(line, lastLine) + 1; Sci::Line lookLine = line; int lookLineLevel = level; int lookLineLevelNum = LevelNumber(lookLineLevel); while ((lookLine > 0) && ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum >= LevelNumber(GetLevel(lookLine + 1)))))) { lookLineLevel = GetLevel(--lookLine); lookLineLevelNum = LevelNumber(lookLineLevel); } Sci::Line beginFoldBlock = (lookLineLevel & SC_FOLDLEVELHEADERFLAG) ? lookLine : GetFoldParent(lookLine); if (beginFoldBlock == -1) { highlightDelimiter.Clear(); return; } Sci::Line endFoldBlock = GetLastChild(beginFoldBlock, -1, lookLastLine); Sci::Line firstChangeableLineBefore = -1; if (endFoldBlock < line) { lookLine = beginFoldBlock - 1; lookLineLevel = GetLevel(lookLine); lookLineLevelNum = LevelNumber(lookLineLevel); while ((lookLine >= 0) && (lookLineLevelNum >= SC_FOLDLEVELBASE)) { if (lookLineLevel & SC_FOLDLEVELHEADERFLAG) { if (GetLastChild(lookLine, -1, lookLastLine) == line) { beginFoldBlock = lookLine; endFoldBlock = line; firstChangeableLineBefore = line - 1; } } if ((lookLine > 0) && (lookLineLevelNum == SC_FOLDLEVELBASE) && (LevelNumber(GetLevel(lookLine - 1)) > lookLineLevelNum)) break; lookLineLevel = GetLevel(--lookLine); lookLineLevelNum = LevelNumber(lookLineLevel); } } if (firstChangeableLineBefore == -1) { for (lookLine = line - 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = LevelNumber(lookLineLevel); lookLine >= beginFoldBlock; lookLineLevel = GetLevel(--lookLine), lookLineLevelNum = LevelNumber(lookLineLevel)) { if ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || (lookLineLevelNum > LevelNumber(level))) { firstChangeableLineBefore = lookLine; break; } } } if (firstChangeableLineBefore == -1) firstChangeableLineBefore = beginFoldBlock - 1; Sci::Line firstChangeableLineAfter = -1; for (lookLine = line + 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = LevelNumber(lookLineLevel); lookLine <= endFoldBlock; lookLineLevel = GetLevel(++lookLine), lookLineLevelNum = LevelNumber(lookLineLevel)) { if ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum < LevelNumber(GetLevel(lookLine + 1)))) { firstChangeableLineAfter = lookLine; break; } } if (firstChangeableLineAfter == -1) firstChangeableLineAfter = endFoldBlock + 1; highlightDelimiter.beginFoldBlock = beginFoldBlock; highlightDelimiter.endFoldBlock = endFoldBlock; highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore; highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter; } Sci::Position Document::ClampPositionIntoDocument(Sci::Position pos) const noexcept { return std::clamp(pos, 0, LengthNoExcept()); } bool Document::IsCrLf(Sci::Position pos) const noexcept { if (pos < 0) return false; if (pos >= (LengthNoExcept() - 1)) return false; return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n'); } int Document::LenChar(Sci::Position pos) const noexcept { if (pos < 0 || pos >= LengthNoExcept()) { // Returning 1 instead of 0 to defend against hanging with a loop that goes (or starts) out of bounds. return 1; } else if (IsCrLf(pos)) { return 2; } const unsigned char leadByte = cb.UCharAt(pos); if (!dbcsCodePage || UTF8IsAscii(leadByte)) { // Common case: ASCII character return 1; } if (SC_CP_UTF8 == dbcsCodePage) { const int widthCharBytes = UTF8BytesOfLead[leadByte]; unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 }; for (int b = 1; b < widthCharBytes; b++) { charBytes[b] = cb.UCharAt(pos + b); } const int utf8status = UTF8Classify(charBytes, widthCharBytes); if (utf8status & UTF8MaskInvalid) { // Treat as invalid and use up just one byte return 1; } else { return utf8status & UTF8MaskWidth; } } else { if (IsDBCSLeadByteNoExcept(leadByte) && ((pos + 1) < LengthNoExcept())) { return 2; } else { return 1; } } } bool Document::InGoodUTF8(Sci::Position pos, Sci::Position &start, Sci::Position &end) const noexcept { Sci::Position trail = pos; while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(cb.UCharAt(trail-1))) trail--; start = (trail > 0) ? trail-1 : trail; const unsigned char leadByte = cb.UCharAt(start); const int widthCharBytes = UTF8BytesOfLead[leadByte]; if (widthCharBytes == 1) { return false; } else { const int trailBytes = widthCharBytes - 1; const Sci::Position len = pos - start; if (len > trailBytes) // pos too far from lead return false; unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; for (Sci::Position b=1; b= LengthNoExcept()) return LengthNoExcept(); // PLATFORM_ASSERT(pos > 0 && pos < LengthNoExcept()); if (checkLineEnd && IsCrLf(pos - 1)) { if (moveDir > 0) return pos + 1; else return pos - 1; } if (dbcsCodePage) { if (SC_CP_UTF8 == dbcsCodePage) { const unsigned char ch = cb.UCharAt(pos); // If ch is not a trail byte then pos is valid intercharacter position if (UTF8IsTrailByte(ch)) { Sci::Position startUTF = pos; Sci::Position endUTF = pos; if (InGoodUTF8(pos, startUTF, endUTF)) { // ch is a trail byte within a UTF-8 character if (moveDir > 0) pos = endUTF; else pos = startUTF; } // Else invalid UTF-8 so return position of isolated trail byte } } else { // Anchor DBCS calculations at start of line because start of line can // not be a DBCS trail byte. const Sci::Position posStartLine = cb.LineStart(cb.LineFromPosition(pos)); if (pos == posStartLine) return pos; // Step back until a non-lead-byte is found. Sci::Position posCheck = pos; while ((posCheck > posStartLine) && IsDBCSLeadByteNoExcept(cb.CharAt(posCheck-1))) posCheck--; // Check from known start of character. while (posCheck < pos) { const int mbsize = IsDBCSLeadByteNoExcept(cb.CharAt(posCheck)) ? 2 : 1; if (posCheck + mbsize == pos) { return pos; } else if (posCheck + mbsize > pos) { if (moveDir > 0) { return posCheck + mbsize; } else { return posCheck; } } posCheck += mbsize; } } } return pos; } // NextPosition moves between valid positions - it can not handle a position in the middle of a // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar. // A \r\n pair is treated as two characters. Sci::Position Document::NextPosition(Sci::Position pos, int moveDir) const noexcept { // If out of range, just return minimum/maximum value. const int increment = (moveDir > 0) ? 1 : -1; if (pos + increment <= 0) return 0; if (pos + increment >= cb.Length()) return cb.Length(); if (dbcsCodePage) { if (SC_CP_UTF8 == dbcsCodePage) { if (increment == 1) { // Simple forward movement case so can avoid some checks const unsigned char leadByte = cb.UCharAt(pos); if (UTF8IsAscii(leadByte)) { // Single byte character or invalid pos++; } else { const int widthCharBytes = UTF8BytesOfLead[leadByte]; unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; for (int b=1; b 0) { const int mbsize = IsDBCSLeadByteNoExcept(cb.CharAt(pos)) ? 2 : 1; pos += mbsize; if (pos > cb.Length()) pos = cb.Length(); } else { // Anchor DBCS calculations at start of line because start of line can // not be a DBCS trail byte. const Sci::Position posStartLine = cb.LineStart(cb.LineFromPosition(pos)); // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx // http://msdn.microsoft.com/en-us/library/cc194790.aspx if ((pos - 1) <= posStartLine) { return pos - 1; } else if (IsDBCSLeadByteNoExcept(cb.CharAt(pos - 1))) { // Must actually be trail byte return pos - 2; } else { // Otherwise, step back until a non-lead-byte is found. Sci::Position posTemp = pos - 1; while (posStartLine <= --posTemp && IsDBCSLeadByteNoExcept(cb.CharAt(posTemp))) ; // Now posTemp+1 must point to the beginning of a character, // so figure out whether we went back an even or an odd // number of bytes and go back 1 or 2 bytes, respectively. return (pos - 1 - ((pos - posTemp) & 1)); } } } } else { pos += increment; } return pos; } bool Document::NextCharacter(Sci::Position &pos, int moveDir) const noexcept { // Returns true if pos changed Sci::Position posNext = NextPosition(pos, moveDir); if (posNext == pos) { return false; } else { pos = posNext; return true; } } Document::CharacterExtracted Document::CharacterAfter(Sci::Position position) const noexcept { if (position >= LengthNoExcept()) { return CharacterExtracted(unicodeReplacementChar, 0); } const unsigned char leadByte = cb.UCharAt(position); if (!dbcsCodePage || UTF8IsAscii(leadByte)) { // Common case: ASCII character return CharacterExtracted(leadByte, 1); } if (SC_CP_UTF8 == dbcsCodePage) { const int widthCharBytes = UTF8BytesOfLead[leadByte]; unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 }; for (int b = 1; b 0) ? 1 : -1; while (characterOffset != 0) { const Sci::Position posNext = NextPosition(pos, increment); if (posNext == pos) return INVALID_POSITION; pos = posNext; characterOffset -= increment; } } else { pos = positionStart + characterOffset; if ((pos < 0) || (pos > Length())) return INVALID_POSITION; } return pos; } Sci::Position Document::GetRelativePositionUTF16(Sci::Position positionStart, Sci::Position characterOffset) const noexcept { Sci::Position pos = positionStart; if (dbcsCodePage) { const int increment = (characterOffset > 0) ? 1 : -1; while (characterOffset != 0) { const Sci::Position posNext = NextPosition(pos, increment); if (posNext == pos) return INVALID_POSITION; if (std::abs(pos-posNext) > 3) // 4 byte character = 2*UTF16. characterOffset -= increment; pos = posNext; characterOffset -= increment; } } else { pos = positionStart + characterOffset; if ((pos < 0) || (pos > LengthNoExcept())) return INVALID_POSITION; } return pos; } int SCI_METHOD Document::GetCharacterAndWidth(Sci_Position position, Sci_Position *pWidth) const { int character; int bytesInCharacter = 1; const unsigned char leadByte = cb.UCharAt(position); if (dbcsCodePage) { if (SC_CP_UTF8 == dbcsCodePage) { if (UTF8IsAscii(leadByte)) { // Single byte character or invalid character = leadByte; } else { const int widthCharBytes = UTF8BytesOfLead[leadByte]; unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; for (int b=1; b= 0x81) && (uch <= 0x9F)) || ((uch >= 0xE0) && (uch <= 0xFC)); // Lead bytes F0 to FC may be a Microsoft addition. case 936: // GBK return (uch >= 0x81) && (uch <= 0xFE); case 949: // Korean Wansung KS C-5601-1987 return (uch >= 0x81) && (uch <= 0xFE); case 950: // Big5 return (uch >= 0x81) && (uch <= 0xFE); case 1361: // Korean Johab KS C-5601-1992 return ((uch >= 0x84) && (uch <= 0xD3)) || ((uch >= 0xD8) && (uch <= 0xDE)) || ((uch >= 0xE0) && (uch <= 0xF9)); } return false; } bool Document::IsDBCSLeadByteInvalid(char ch) const noexcept { const unsigned char lead = ch; switch (dbcsCodePage) { case 932: // Shift_jis return (lead == 0x85) || (lead == 0x86) || (lead == 0xEB) || (lead == 0xEC) || (lead == 0xEF) || (lead == 0xFA) || (lead == 0xFB) || (lead == 0xFC); case 936: // GBK return (lead == 0x80) || (lead == 0xFF); case 949: // Korean Wansung KS C-5601-1987 return (lead == 0x80) || (lead == 0xC9) || (lead >= 0xFE); case 950: // Big5 return ((lead >= 0x80) && (lead <= 0xA0)) || (lead == 0xC8) || (lead >= 0xFA); case 1361: // Korean Johab KS C-5601-1992 return ((lead >= 0x80) && (lead <= 0x83)) || ((lead >= 0xD4) && (lead <= 0xD8)) || (lead == 0xDF) || (lead >= 0xFA); } return false; } bool Document::IsDBCSTrailByteInvalid(char ch) const noexcept { const unsigned char trail = ch; switch (dbcsCodePage) { case 932: // Shift_jis return (trail <= 0x3F) || (trail == 0x7F) || (trail >= 0xFD); case 936: // GBK return (trail <= 0x3F) || (trail == 0x7F) || (trail == 0xFF); case 949: // Korean Wansung KS C-5601-1987 return (trail <= 0x40) || ((trail >= 0x5B) && (trail <= 0x60)) || ((trail >= 0x7B) && (trail <= 0x80)) || (trail == 0xFF); case 950: // Big5 return (trail <= 0x3F) || ((trail >= 0x7F) && (trail <= 0xA0)) || (trail == 0xFF); case 1361: // Korean Johab KS C-5601-1992 return (trail <= 0x30) || (trail == 0x7F) || (trail == 0x80) || (trail == 0xFF); } return false; } int Document::DBCSDrawBytes(std::string_view text) const noexcept { if (text.length() <= 1) { return static_cast(text.length()); } if (IsDBCSLeadByteNoExcept(text[0])) { return IsDBCSTrailByteInvalid(text[1]) ? 1 : 2; } else { return 1; } } static constexpr bool IsSpaceOrTab(int ch) noexcept { return ch == ' ' || ch == '\t'; } // Need to break text into segments near lengthSegment but taking into // account the encoding to not break inside a UTF-8 or DBCS character // and also trying to avoid breaking inside a pair of combining characters. // The segment length must always be long enough (more than 4 bytes) // so that there will be at least one whole character to make a segment. // For UTF-8, text must consist only of valid whole characters. // In preference order from best to worst: // 1) Break after space // 2) Break before punctuation // 3) Break after whole character int Document::SafeSegment(const char *text, int length, int lengthSegment) const noexcept { if (length <= lengthSegment) return length; int lastSpaceBreak = -1; int lastPunctuationBreak = -1; int lastEncodingAllowedBreak = 0; for (int j=0; j < lengthSegment;) { const unsigned char ch = text[j]; if (j > 0) { if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) { lastSpaceBreak = j; } if (ch < 'A') { lastPunctuationBreak = j; } } lastEncodingAllowedBreak = j; if (dbcsCodePage == SC_CP_UTF8) { j += UTF8BytesOfLead[ch]; } else if (dbcsCodePage) { j += IsDBCSLeadByteNoExcept(ch) ? 2 : 1; } else { j++; } } if (lastSpaceBreak >= 0) { return lastSpaceBreak; } else if (lastPunctuationBreak >= 0) { return lastPunctuationBreak; } return lastEncodingAllowedBreak; } EncodingFamily Document::CodePageFamily() const noexcept { if (SC_CP_UTF8 == dbcsCodePage) return EncodingFamily::unicode; else if (dbcsCodePage) return EncodingFamily::dbcs; else return EncodingFamily::eightBit; } void Document::ModifiedAt(Sci::Position pos) noexcept { if (endStyled > pos) endStyled = pos; } void Document::CheckReadOnly() { if (cb.IsReadOnly() && enteredReadOnlyCount == 0) { enteredReadOnlyCount++; NotifyModifyAttempt(); enteredReadOnlyCount--; } } // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt. // SetStyleAt does not change the persistent state of a document bool Document::DeleteChars(Sci::Position pos, Sci::Position len) { if (pos < 0) return false; if (len <= 0) return false; if ((pos + len) > LengthNoExcept()) return false; CheckReadOnly(); if (enteredModification != 0) { return false; } else { enteredModification++; if (!cb.IsReadOnly()) { NotifyModified( DocModification( SC_MOD_BEFOREDELETE | SC_PERFORMED_USER, pos, len, 0, 0)); const Sci::Line prevLinesTotal = LinesTotal(); const bool startSavePoint = cb.IsSavePoint(); bool startSequence = false; const char *text = cb.DeleteChars(pos, len, startSequence); if (startSavePoint && cb.IsCollectingUndo()) NotifySavePoint(false); if ((pos < LengthNoExcept()) || (pos == 0)) ModifiedAt(pos); else ModifiedAt(pos-1); NotifyModified( DocModification( SC_MOD_DELETETEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0), pos, len, LinesTotal() - prevLinesTotal, text)); } enteredModification--; } return !cb.IsReadOnly(); } /** * Insert a string with a length. */ Sci::Position Document::InsertString(Sci::Position position, const char *s, Sci::Position insertLength) { if (insertLength <= 0) { return 0; } CheckReadOnly(); // Application may change read only state here if (cb.IsReadOnly()) { return 0; } if (enteredModification != 0) { return 0; } enteredModification++; insertionSet = false; insertion.clear(); NotifyModified( DocModification( SC_MOD_INSERTCHECK, position, insertLength, 0, s)); if (insertionSet) { s = insertion.c_str(); insertLength = insertion.length(); } NotifyModified( DocModification( SC_MOD_BEFOREINSERT | SC_PERFORMED_USER, position, insertLength, 0, s)); const Sci::Line prevLinesTotal = LinesTotal(); const bool startSavePoint = cb.IsSavePoint(); bool startSequence = false; const char *text = cb.InsertString(position, s, insertLength, startSequence); if (startSavePoint && cb.IsCollectingUndo()) NotifySavePoint(false); ModifiedAt(position); NotifyModified( DocModification( SC_MOD_INSERTTEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0), position, insertLength, LinesTotal() - prevLinesTotal, text)); if (insertionSet) { // Free memory as could be large std::string().swap(insertion); } enteredModification--; return insertLength; } void Document::ChangeInsertion(const char *s, Sci::Position length) { insertionSet = true; insertion.assign(s, length); } int SCI_METHOD Document::AddData(const char *data, Sci_Position length) { try { const Sci::Position position = Length(); InsertString(position, data, length); } catch (std::bad_alloc &) { return SC_STATUS_BADALLOC; } catch (...) { return SC_STATUS_FAILURE; } return 0; } void * SCI_METHOD Document::ConvertToDocument() { return this; } Sci::Position Document::Undo() { Sci::Position newPos = -1; CheckReadOnly(); if ((enteredModification == 0) && (cb.IsCollectingUndo())) { enteredModification++; if (!cb.IsReadOnly()) { const bool startSavePoint = cb.IsSavePoint(); bool multiLine = false; const int steps = cb.StartUndo(); //Platform::DebugPrintf("Steps=%d\n", steps); Sci::Position coalescedRemovePos = -1; Sci::Position coalescedRemoveLen = 0; Sci::Position prevRemoveActionPos = -1; Sci::Position prevRemoveActionLen = 0; for (int step = 0; step < steps; step++) { const Sci::Line prevLinesTotal = LinesTotal(); const Action &action = cb.GetUndoStep(); if (action.at == ActionType::remove) { NotifyModified(DocModification( SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action)); } else if (action.at == ActionType::container) { DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO); dm.token = action.position; NotifyModified(dm); if (!action.mayCoalesce) { coalescedRemovePos = -1; coalescedRemoveLen = 0; prevRemoveActionPos = -1; prevRemoveActionLen = 0; } } else { NotifyModified(DocModification( SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action)); } cb.PerformUndoStep(); if (action.at != ActionType::container) { ModifiedAt(action.position); newPos = action.position; } int modFlags = SC_PERHTTP/1.1 200 OK Connection: keep-alive Connection: keep-alive Content-Disposition: inline; filename="Document.cxx" Content-Disposition: inline; filename="Document.cxx" Content-Length: 101368 Content-Length: 101368 Content-Security-Policy: default-src 'none' Content-Security-Policy: default-src 'none' Content-Type: text/plain; charset=UTF-8 Content-Type: text/plain; charset=UTF-8 Date: Sun, 19 Oct 2025 11:05:39 UTC ETag: "acb9f14bdc6c43f15bb085b0dfcf67601e083ac6" ETag: "acb9f14bdc6c43f15bb085b0dfcf67601e083ac6" Expires: Wed, 17 Oct 2035 11:05:38 GMT Expires: Wed, 17 Oct 2035 11:05:39 GMT Last-Modified: Sun, 19 Oct 2025 11:05:38 GMT Last-Modified: Sun, 19 Oct 2025 11:05:39 GMT Server: OpenBSD httpd Server: OpenBSD httpd X-Content-Type-Options: nosniff X-Content-Type-Options: nosniff // Scintilla source code edit control /** @file Document.cxx ** Text document that handles notifications, DBCS, styling, words and end of line. **/ // Copyright 1998-2011 by Neil Hodgson // The License.txt file describes the conditions under which this software may be distributed. #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef NO_CXX11_REGEX #include #endif #include "Debugging.h" #include "ILoader.h" #include "ILexer.h" #include "Scintilla.h" #include "CharacterType.h" #include "CharacterCategoryMap.h" #include "Position.h" #include "SplitVector.h" #include "Partitioning.h" #include "RunStyles.h" #include "CellBuffer.h" #include "PerLine.h" #include "CharClassify.h" #include "Decoration.h" #include "CaseFolder.h" #include "Document.h" #include "RESearch.h" #include "UniConversion.h" #include "ElapsedPeriod.h" using namespace Scintilla; void LexInterface::Colourise(Sci::Position start, Sci::Position end) { if (pdoc && instance && !performingStyle) { // Protect against reentrance, which may occur, for example, when // fold points are discovered while performing styling and the folding // code looks for child lines which may trigger styling. performingStyle = true; const Sci::Position lengthDoc = pdoc->Length(); if (end == -1) end = lengthDoc; const Sci::Position len = end - start; PLATFORM_ASSERT(len >= 0); PLATFORM_ASSERT(start + len <= lengthDoc); int styleStart = 0; if (start > 0) styleStart = pdoc->StyleAt(start - 1); if (len > 0) { instance->Lex(start, len, styleStart, pdoc); instance->Fold(start, len, styleStart, pdoc); } performingStyle = false; } } int LexInterface::LineEndTypesSupported() { if (instance) { return instance->LineEndTypesSupported(); } return 0; } ActionDuration::ActionDuration(double duration_, double minDuration_, double maxDuration_) noexcept : duration(duration_), minDuration(minDuration_), maxDuration(maxDuration_) { } void ActionDuration::AddSample(size_t numberActions, double durationOfActions) noexcept { // Only adjust for multiple actions to avoid instability if (numberActions < 8) return; // Alpha value for exponential smoothing. // Most recent value contributes 25% to smoothed value. constexpr double alpha = 0.25; const double durationOne = durationOfActions / numberActions; duration = std::clamp(alpha * durationOne + (1.0 - alpha) * duration, minDuration, maxDuration); } double ActionDuration::Duration() const noexcept { return duration; } size_t ActionDuration::ActionsInAllowedTime(double secondsAllowed) const noexcept { return std::lround(secondsAllowed / Duration()); } Document::Document(int options) : cb((options & SC_DOCUMENTOPTION_STYLES_NONE) == 0, (options & SC_DOCUMENTOPTION_TEXT_LARGE) != 0), durationStyleOneByte(0.000001, 0.0000001, 0.00001) { refCount = 0; #ifdef _WIN32 eolMode = SC_EOL_CRLF; #else eolMode = SC_EOL_LF; #endif dbcsCodePage = SC_CP_UTF8; lineEndBitSet = SC_LINE_END_TYPE_DEFAULT; endStyled = 0; styleClock = 0; enteredModification = 0; enteredStyling = 0; enteredReadOnlyCount = 0; insertionSet = false; tabInChars = 8; indentInChars = 0; actualIndentInChars = 8; useTabs = true; tabIndents = true; backspaceUnindents = false; matchesValid = false; perLineData[ldMarkers] = std::make_unique(); perLineData[ldLevels] = std::make_unique(); perLineData[ldState] = std::make_unique(); perLineData[ldMargin] = std::make_unique(); perLineData[ldAnnotation] = std::make_unique(); perLineData[ldEOLAnnotation] = std::make_unique(); decorations = DecorationListCreate(IsLarge()); cb.SetPerLine(this); cb.SetUTF8Substance(SC_CP_UTF8 == dbcsCodePage); } Document::~Document() { for (const WatcherWithUserData &watcher : watchers) { watcher.watcher->NotifyDeleted(this, watcher.userData); } } // Increase reference count and return its previous value. int Document::AddRef() { return refCount++; } // Decrease reference count and return its previous value. // Delete the document if reference count reaches zero. int SCI_METHOD Document::Release() { const int curRefCount = --refCount; if (curRefCount == 0) delete this; return curRefCount; } void Document::Init() { for (const std::unique_ptr &pl : perLineData) { if (pl) pl->Init(); } } void Document::InsertLine(Sci::Line line) { for (const std::unique_ptr &pl : perLineData) { if (pl) pl->InsertLine(line); } } void Document::InsertLines(Sci::Line line, Sci::Line lines) { for (const auto &pl : perLineData) { if (pl) pl->InsertLines(line, lines); } } void Document::RemoveLine(Sci::Line line) { for (const std::unique_ptr &pl : perLineData) { if (pl) pl->RemoveLine(line); } } LineMarkers *Document::Markers() const noexcept { return dynamic_cast(perLineData[ldMarkers].get()); } LineLevels *Document::Levels() const noexcept { return dynamic_cast(perLineData[ldLevels].get()); } LineState *Document::States() const noexcept { return dynamic_cast(perLineData[ldState].get()); } LineAnnotation *Document::Margins() const noexcept { return dynamic_cast(perLineData[ldMargin].get()); } LineAnnotation *Document::Annotations() const noexcept { return dynamic_cast(perLineData[ldAnnotation].get()); } LineAnnotation *Document::EOLAnnotations() const noexcept { return dynamic_cast(perLineData[ldEOLAnnotation].get()); } int Document::LineEndTypesSupported() const { if ((SC_CP_UTF8 == dbcsCodePage) && pli) return pli->LineEndTypesSupported(); else return 0; } bool Document::SetDBCSCodePage(int dbcsCodePage_) { if (dbcsCodePage != dbcsCodePage_) { dbcsCodePage = dbcsCodePage_; SetCaseFolder(nullptr); cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported()); cb.SetUTF8Substance(SC_CP_UTF8 == dbcsCodePage); ModifiedAt(0); // Need to restyle whole document return true; } else { return false; } } bool Document::SetLineEndTypesAllowed(int lineEndBitSet_) { if (lineEndBitSet != lineEndBitSet_) { lineEndBitSet = lineEndBitSet_; const int lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported(); if (lineEndBitSetActive != cb.GetLineEndTypes()) { ModifiedAt(0); cb.SetLineEndTypes(lineEndBitSetActive); return true; } else { return false; } } else { return false; } } void Document::SetSavePoint() { cb.SetSavePoint(); NotifySavePoint(true); } void Document::TentativeUndo() { if (!TentativeActive()) return; CheckReadOnly(); if (enteredModification == 0) { enteredModification++; if (!cb.IsReadOnly()) { const bool startSavePoint = cb.IsSavePoint(); bool multiLine = false; const int steps = cb.TentativeSteps(); //Platform::DebugPrintf("Steps=%d\n", steps); for (int step = 0; step < steps; step++) { const Sci::Line prevLinesTotal = LinesTotal(); const Action &action = cb.GetUndoStep(); if (action.at == ActionType::remove) { NotifyModified(DocModification( SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action)); } else if (action.at == ActionType::container) { DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO); dm.token = action.position; NotifyModified(dm); } else { NotifyModified(DocModification( SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action)); } cb.PerformUndoStep(); if (action.at != ActionType::container) { ModifiedAt(action.position); } int modFlags = SC_PERFORMED_UNDO; // With undo, an insertion action becomes a deletion notification if (action.at == ActionType::remove) { modFlags |= SC_MOD_INSERTTEXT; } else if (action.at == ActionType::insert) { modFlags |= SC_MOD_DELETETEXT; } if (steps > 1) modFlags |= SC_MULTISTEPUNDOREDO; const Sci::Line linesAdded = LinesTotal() - prevLinesTotal; if (linesAdded != 0) multiLine = true; if (step == steps - 1) { modFlags |= SC_LASTSTEPINUNDOREDO; if (multiLine) modFlags |= SC_MULTILINEUNDOREDO; } NotifyModified(DocModification(modFlags, action.position, action.lenData, linesAdded, action.data.get())); } const bool endSavePoint = cb.IsSavePoint(); if (startSavePoint != endSavePoint) NotifySavePoint(endSavePoint); cb.TentativeCommit(); } enteredModification--; } } int Document::GetMark(Sci::Line line) const noexcept { return Markers()->MarkValue(line); } Sci::Line Document::MarkerNext(Sci::Line lineStart, int mask) const noexcept { return Markers()->MarkerNext(lineStart, mask); } int Document::AddMark(Sci::Line line, int markerNum) { if (line >= 0 && line <= LinesTotal()) { const int prev = Markers()->AddMark(line, markerNum, LinesTotal()); const DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, nullptr, line); NotifyModified(mh); return prev; } else { return -1; } } void Document::AddMarkSet(Sci::Line line, int valueSet) { if (line < 0 || line > LinesTotal()) { return; } unsigned int m = valueSet; for (int i = 0; m; i++, m >>= 1) { if (m & 1) Markers()->AddMark(line, i, LinesTotal()); } const DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, nullptr, line); NotifyModified(mh); } void Document::DeleteMark(Sci::Line line, int markerNum) { Markers()->DeleteMark(line, markerNum, false); const DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, nullptr, line); NotifyModified(mh); } void Document::DeleteMarkFromHandle(int markerHandle) { Markers()->DeleteMarkFromHandle(markerHandle); DocModification mh(SC_MOD_CHANGEMARKER); mh.line = -1; NotifyModified(mh); } void Document::DeleteAllMarks(int markerNum) { bool someChanges = false; for (Sci::Line line = 0; line < LinesTotal(); line++) { if (Markers()->DeleteMark(line, markerNum, true)) someChanges = true; } if (someChanges) { DocModification mh(SC_MOD_CHANGEMARKER); mh.line = -1; NotifyModified(mh); } } Sci::Line Document::LineFromHandle(int markerHandle) const noexcept { return Markers()->LineFromHandle(markerHandle); } int Document::MarkerNumberFromLine(Sci::Line line, int which) const noexcept { return Markers()->NumberFromLine(line, which); } int Document::MarkerHandleFromLine(Sci::Line line, int which) const noexcept { return Markers()->HandleFromLine(line, which); } Sci_Position SCI_METHOD Document::LineStart(Sci_Position line) const { return cb.LineStart(line); } bool Document::IsLineStartPosition(Sci::Position position) const { return LineStart(LineFromPosition(position)) == position; } Sci_Position SCI_METHOD Document::LineEnd(Sci_Position line) const { if (line >= LinesTotal() - 1) { return LineStart(line + 1); } else { Sci::Position position = LineStart(line + 1); if (SC_LINE_END_TYPE_UNICODE == cb.GetLineEndTypes()) { const unsigned char bytes[] = { cb.UCharAt(position-3), cb.UCharAt(position-2), cb.UCharAt(position-1), }; if (UTF8IsSeparator(bytes)) { return position - UTF8SeparatorLength; } if (UTF8IsNEL(bytes+1)) { return position - UTF8NELLength; } } position--; // Back over CR or LF // When line terminator is CR+LF, may need to go back one more if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) { position--; } return position; } } void SCI_METHOD Document::SetErrorStatus(int status) { // Tell the watchers an error has occurred. for (const WatcherWithUserData &watcher : watchers) { watcher.watcher->NotifyErrorOccurred(this, watcher.userData, status); } } Sci_Position SCI_METHOD Document::LineFromPosition(Sci_Position pos) const { return cb.LineFromPosition(pos); } Sci::Line Document::SciLineFromPosition(Sci::Position pos) const noexcept { // Avoids casting in callers for this very common function return cb.LineFromPosition(pos); } Sci::Position Document::LineEndPosition(Sci::Position position) const { return LineEnd(LineFromPosition(position)); } bool Document::IsLineEndPosition(Sci::Position position) const { return LineEnd(LineFromPosition(position)) == position; } bool Document::IsPositionInLineEnd(Sci::Position position) const { return position >= LineEnd(LineFromPosition(position)); } Sci::Position Document::VCHomePosition(Sci::Position position) const { const Sci::Line line = SciLineFromPosition(position); const Sci::Position startPosition = LineStart(line); const Sci::Position endLine = LineEnd(line); Sci::Position startText = startPosition; while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t')) startText++; if (position == startText) return startPosition; else return startText; } Sci::Position Document::IndexLineStart(Sci::Line line, int lineCharacterIndex) const noexcept { return cb.IndexLineStart(line, lineCharacterIndex); } Sci::Line Document::LineFromPositionIndex(Sci::Position pos, int lineCharacterIndex) const noexcept { return cb.LineFromPositionIndex(pos, lineCharacterIndex); } Sci::Line Document::LineFromPositionAfter(Sci::Line line, Sci::Position length) const noexcept { const Sci::Position posAfter = cb.LineStart(line) + length; if (posAfter >= LengthNoExcept()) { return LinesTotal(); } const Sci::Line lineAfter = SciLineFromPosition(posAfter); if (lineAfter > line) { return lineAfter; } else { // Want to make some progress so return next line return lineAfter + 1; } } int SCI_METHOD Document::SetLevel(Sci_Position line, int level) { const int prev = Levels()->SetLevel(line, level, LinesTotal()); if (prev != level) { DocModification mh(SC_MOD_CHANGEFOLD | SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, nullptr, line); mh.foldLevelNow = level; mh.foldLevelPrev = prev; NotifyModified(mh); } return prev; } int SCI_METHOD Document::GetLevel(Sci_Position line) const { return Levels()->GetLevel(line); } void Document::ClearLevels() { Levels()->ClearLevels(); } static bool IsSubordinate(int levelStart, int levelTry) noexcept { if (levelTry & SC_FOLDLEVELWHITEFLAG) return true; else return LevelNumber(levelStart) < LevelNumber(levelTry); } Sci::Line Document::GetLastChild(Sci::Line lineParent, int level, Sci::Line lastLine) { if (level == -1) level = LevelNumber(GetLevel(lineParent)); const Sci::Line maxLine = LinesTotal(); const Sci::Line lookLastLine = (lastLine != -1) ? std::min(LinesTotal() - 1, lastLine) : -1; Sci::Line lineMaxSubord = lineParent; while (lineMaxSubord < maxLine - 1) { EnsureStyledTo(LineStart(lineMaxSubord + 2)); if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1))) break; if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !(GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG)) break; lineMaxSubord++; } if (lineMaxSubord > lineParent) { if (level > LevelNumber(GetLevel(lineMaxSubord + 1))) { // Have chewed up some whitespace that belongs to a parent so seek back if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) { lineMaxSubord--; } } } return lineMaxSubord; } Sci::Line Document::GetFoldParent(Sci::Line line) const { const int level = LevelNumber(GetLevel(line)); Sci::Line lineLook = line - 1; while ((lineLook > 0) && ( (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) || (LevelNumber(GetLevel(lineLook)) >= level)) ) { lineLook--; } if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) && (LevelNumber(GetLevel(lineLook)) < level)) { return lineLook; } else { return -1; } } void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, Sci::Line line, Sci::Line lastLine) { const int level = GetLevel(line); const Sci::Line lookLastLine = std::max(line, lastLine) + 1; Sci::Line lookLine = line; int lookLineLevel = level; int lookLineLevelNum = LevelNumber(lookLineLevel); while ((lookLine > 0) && ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum >= LevelNumber(GetLevel(lookLine + 1)))))) { lookLineLevel = GetLevel(--lookLine); lookLineLevelNum = LevelNumber(lookLineLevel); } Sci::Line beginFoldBlock = (lookLineLevel & SC_FOLDLEVELHEADERFLAG) ? lookLine : GetFoldParent(lookLine); if (beginFoldBlock == -1) { highlightDelimiter.Clear(); return; } Sci::Line endFoldBlock = GetLastChild(beginFoldBlock, -1, lookLastLine); Sci::Line firstChangeableLineBefore = -1; if (endFoldBlock < line) { lookLine = beginFoldBlock - 1; lookLineLevel = GetLevel(lookLine); lookLineLevelNum = LevelNumber(lookLineLevel); while ((lookLine >= 0) && (lookLineLevelNum >= SC_FOLDLEVELBASE)) { if (lookLineLevel & SC_FOLDLEVELHEADERFLAG) { if (GetLastChild(lookLine, -1, lookLastLine) == line) { beginFoldBlock = lookLine; endFoldBlock = line; firstChangeableLineBefore = line - 1; } } if ((lookLine > 0) && (lookLineLevelNum == SC_FOLDLEVELBASE) && (LevelNumber(GetLevel(lookLine - 1)) > lookLineLevelNum)) break; lookLineLevel = GetLevel(--lookLine); lookLineLevelNum = LevelNumber(lookLineLevel); } } if (firstChangeableLineBefore == -1) { for (lookLine = line - 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = LevelNumber(lookLineLevel); lookLine >= beginFoldBlock; lookLineLevel = GetLevel(--lookLine), lookLineLevelNum = LevelNumber(lookLineLevel)) { if ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || (lookLineLevelNum > LevelNumber(level))) { firstChangeableLineBefore = lookLine; break; } } } if (firstChangeableLineBefore == -1) firstChangeableLineBefore = beginFoldBlock - 1; Sci::Line firstChangeableLineAfter = -1; for (lookLine = line + 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = LevelNumber(lookLineLevel); lookLine <= endFoldBlock; lookLineLevel = GetLevel(++lookLine), lookLineLevelNum = LevelNumber(lookLineLevel)) { if ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum < LevelNumber(GetLevel(lookLine + 1)))) { firstChangeableLineAfter = lookLine; break; } } if (firstChangeableLineAfter == -1) firstChangeableLineAfter = endFoldBlock + 1; highlightDelimiter.beginFoldBlock = beginFoldBlock; highlightDelimiter.endFoldBlock = endFoldBlock; highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore; highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter; } Sci::Position Document::ClampPositionIntoDocument(Sci::Position pos) const noexcept { return std::clamp(pos, 0, LengthNoExcept()); } bool Document::IsCrLf(Sci::Position pos) const noexcept { if (pos < 0) return false; if (pos >= (LengthNoExcept() - 1)) return false; return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n'); } int Document::LenChar(Sci::Position pos) const noexcept { if (pos < 0 || pos >= LengthNoExcept()) { // Returning 1 instead of 0 to defend against hanging with a loop that goes (or starts) out of bounds. return 1; } else if (IsCrLf(pos)) { return 2; } const unsigned char leadByte = cb.UCharAt(pos); if (!dbcsCodePage || UTF8IsAscii(leadByte)) { // Common case: ASCII character return 1; } if (SC_CP_UTF8 == dbcsCodePage) { const int widthCharBytes = UTF8BytesOfLead[leadByte]; unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 }; for (int b = 1; b < widthCharBytes; b++) { charBytes[b] = cb.UCharAt(pos + b); } const int utf8status = UTF8Classify(charBytes, widthCharBytes); if (utf8status & UTF8MaskInvalid) { // Treat as invalid and use up just one byte return 1; } else { return utf8status & UTF8MaskWidth; } } else { if (IsDBCSLeadByteNoExcept(leadByte) && ((pos + 1) < LengthNoExcept())) { return 2; } else { return 1; } } } bool Document::InGoodUTF8(Sci::Position pos, Sci::Position &start, Sci::Position &end) const noexcept { Sci::Position trail = pos; while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(cb.UCharAt(trail-1))) trail--; start = (trail > 0) ? trail-1 : trail; const unsigned char leadByte = cb.UCharAt(start); const int widthCharBytes = UTF8BytesOfLead[leadByte]; if (widthCharBytes == 1) { return false; } else { const int trailBytes = widthCharBytes - 1; const Sci::Position len = pos - start; if (len > trailBytes) // pos too far from lead return false; unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; for (Sci::Position b=1; b= LengthNoExcept()) return LengthNoExcept(); // PLATFORM_ASSERT(pos > 0 && pos < LengthNoExcept()); if (checkLineEnd && IsCrLf(pos - 1)) { if (moveDir > 0) return pos + 1; else return pos - 1; } if (dbcsCodePage) { if (SC_CP_UTF8 == dbcsCodePage) { const unsigned char ch = cb.UCharAt(pos); // If ch is not a trail byte then pos is valid intercharacter position if (UTF8IsTrailByte(ch)) { Sci::Position startUTF = pos; Sci::Position endUTF = pos; if (InGoodUTF8(pos, startUTF, endUTF)) { // ch is a trail byte within a UTF-8 character if (moveDir > 0) pos = endUTF; else pos = startUTF; } // Else invalid UTF-8 so return position of isolated trail byte } } else { // Anchor DBCS calculations at start of line because start of line can // not be a DBCS trail byte. const Sci::Position posStartLine = cb.LineStart(cb.LineFromPosition(pos)); if (pos == posStartLine) return pos; // Step back until a non-lead-byte is found. Sci::Position posCheck = pos; while ((posCheck > posStartLine) && IsDBCSLeadByteNoExcept(cb.CharAt(posCheck-1))) posCheck--; // Check from known start of character. while (posCheck < pos) { const int mbsize = IsDBCSLeadByteNoExcept(cb.CharAt(posCheck)) ? 2 : 1; if (posCheck + mbsize == pos) { return pos; } else if (posCheck + mbsize > pos) { if (moveDir > 0) { return posCheck + mbsize; } else { return posCheck; } } posCheck += mbsize; } } } return pos; } // NextPosition moves between valid positions - it can not handle a position in the middle of a // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar. // A \r\n pair is treated as two characters. Sci::Position Document::NextPosition(Sci::Position pos, int moveDir) const noexcept { // If out of range, just return minimum/maximum value. const int increment = (moveDir > 0) ? 1 : -1; if (pos + increment <= 0) return 0; if (pos + increment >= cb.Length()) return cb.Length(); if (dbcsCodePage) { if (SC_CP_UTF8 == dbcsCodePage) { if (increment == 1) { // Simple forward movement case so can avoid some checks const unsigned char leadByte = cb.UCharAt(pos); if (UTF8IsAscii(leadByte)) { // Single byte character or invalid pos++; } else { const int widthCharBytes = UTF8BytesOfLead[leadByte]; unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; for (int b=1; b 0) { const int mbsize = IsDBCSLeadByteNoExcept(cb.CharAt(pos)) ? 2 : 1; pos += mbsize; if (pos > cb.Length()) pos = cb.Length(); } else { // Anchor DBCS calculations at start of line because start of line can // not be a DBCS trail byte. const Sci::Position posStartLine = cb.LineStart(cb.LineFromPosition(pos)); // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx // http://msdn.microsoft.com/en-us/library/cc194790.aspx if ((pos - 1) <= posStartLine) { return pos - 1; } else if (IsDBCSLeadByteNoExcept(cb.CharAt(pos - 1))) { // Must actually be trail byte return pos - 2; } else { // Otherwise, step back until a non-lead-byte is found. Sci::Position posTemp = pos - 1; while (posStartLine <= --posTemp && IsDBCSLeadByteNoExcept(cb.CharAt(posTemp))) ; // Now posTemp+1 must point to the beginning of a character, // so figure out whether we went back an even or an odd // number of bytes and go back 1 or 2 bytes, respectively. return (pos - 1 - ((pos - posTemp) & 1)); } } } } else { pos += increment; } return pos; } bool Document::NextCharacter(Sci::Position &pos, int moveDir) const noexcept { // Returns true if pos changed Sci::Position posNext = NextPosition(pos, moveDir); if (posNext == pos) { return false; } else { pos = posNext; return true; } } Document::CharacterExtracted Document::CharacterAfter(Sci::Position position) const noexcept { if (position >= LengthNoExcept()) { return CharacterExtracted(unicodeReplacementChar, 0); } const unsigned char leadByte = cb.UCharAt(position); if (!dbcsCodePage || UTF8IsAscii(leadByte)) { // Common case: ASCII character return CharacterExtracted(leadByte, 1); } if (SC_CP_UTF8 == dbcsCodePage) { const int widthCharBytes = UTF8BytesOfLead[leadByte]; unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 }; for (int b = 1; b 0) ? 1 : -1; while (characterOffset != 0) { const Sci::Position posNext = NextPosition(pos, increment); if (posNext == pos) return INVALID_POSITION; pos = posNext; characterOffset -= increment; } } else { pos = positionStart + characterOffset; if ((pos < 0) || (pos > Length())) return INVALID_POSITION; } return pos; } Sci::Position Document::GetRelativePositionUTF16(Sci::Position positionStart, Sci::Position characterOffset) const noexcept { Sci::Position pos = positionStart; if (dbcsCodePage) { const int increment = (characterOffset > 0) ? 1 : -1; while (characterOffset != 0) { const Sci::Position posNext = NextPosition(pos, increment); if (posNext == pos) return INVALID_POSITION; if (std::abs(pos-posNext) > 3) // 4 byte character = 2*UTF16. characterOffset -= increment; pos = posNext; characterOffset -= increment; } } else { pos = positionStart + characterOffset; if ((pos < 0) || (pos > LengthNoExcept())) return INVALID_POSITION; } return pos; } int SCI_METHOD Document::GetCharacterAndWidth(Sci_Position position, Sci_Position *pWidth) const { int character; int bytesInCharacter = 1; const unsigned char leadByte = cb.UCharAt(position); if (dbcsCodePage) { if (SC_CP_UTF8 == dbcsCodePage) { if (UTF8IsAscii(leadByte)) { // Single byte character or invalid character = leadByte; } else { const int widthCharBytes = UTF8BytesOfLead[leadByte]; unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0}; for (int b=1; b= 0x81) && (uch <= 0x9F)) || ((uch >= 0xE0) && (uch <= 0xFC)); // Lead bytes F0 to FC may be a Microsoft addition. case 936: // GBK return (uch >= 0x81) && (uch <= 0xFE); case 949: // Korean Wansung KS C-5601-1987 return (uch >= 0x81) && (uch <= 0xFE); case 950: // Big5 return (uch >= 0x81) && (uch <= 0xFE); case 1361: // Korean Johab KS C-5601-1992 return ((uch >= 0x84) && (uch <= 0xD3)) || ((uch >= 0xD8) && (uch <= 0xDE)) || ((uch >= 0xE0) && (uch <= 0xF9)); } return false; } bool Document::IsDBCSLeadByteInvalid(char ch) const noexcept { const unsigned char lead = ch; switch (dbcsCodePage) { case 932: // Shift_jis return (lead == 0x85) || (lead == 0x86) || (lead == 0xEB) || (lead == 0xEC) || (lead == 0xEF) || (lead == 0xFA) || (lead == 0xFB) || (lead == 0xFC); case 936: // GBK return (lead == 0x80) || (lead == 0xFF); case 949: // Korean Wansung KS C-5601-1987 return (lead == 0x80) || (lead == 0xC9) || (lead >= 0xFE); case 950: // Big5 return ((lead >= 0x80) && (lead <= 0xA0)) || (lead == 0xC8) || (lead >= 0xFA); case 1361: // Korean Johab KS C-5601-1992 return ((lead >= 0x80) && (lead <= 0x83)) || ((lead >= 0xD4) && (lead <= 0xD8)) || (lead == 0xDF) || (lead >= 0xFA); } return false; } bool Document::IsDBCSTrailByteInvalid(char ch) const noexcept { const unsigned char trail = ch; switch (dbcsCodePage) { case 932: // Shift_jis return (trail <= 0x3F) || (trail == 0x7F) || (trail >= 0xFD); case 936: // GBK return (trail <= 0x3F) || (trail == 0x7F) || (trail == 0xFF); case 949: // Korean Wansung KS C-5601-1987 return (trail <= 0x40) || ((trail >= 0x5B) && (trail <= 0x60)) || ((trail >= 0x7B) && (trail <= 0x80)) || (trail == 0xFF); case 950: // Big5 return (trail <= 0x3F) || ((trail >= 0x7F) && (trail <= 0xA0)) || (trail == 0xFF); case 1361: // Korean Johab KS C-5601-1992 return (trail <= 0x30) || (trail == 0x7F) || (trail == 0x80) || (trail == 0xFF); } return false; } int Document::DBCSDrawBytes(std::string_view text) const noexcept { if (text.length() <= 1) { return static_cast(text.length()); } if (IsDBCSLeadByteNoExcept(text[0])) { return IsDBCSTrailByteInvalid(text[1]) ? 1 : 2; } else { return 1; } } static constexpr bool IsSpaceOrTab(int ch) noexcept { return ch == ' ' || ch == '\t'; } // Need to break text into segments near lengthSegment but taking into // account the encoding to not break inside a UTF-8 or DBCS character // and also trying to avoid breaking inside a pair of combining characters. // The segment length must always be long enough (more than 4 bytes) // so that there will be at least one whole character to make a segment. // For UTF-8, text must consist only of valid whole characters. // In preference order from best to worst: // 1) Break after space // 2) Break before punctuation // 3) Break after whole character int Document::SafeSegment(const char *text, int length, int lengthSegment) const noexcept { if (length <= lengthSegment) return length; int lastSpaceBreak = -1; int lastPunctuationBreak = -1; int lastEncodingAllowedBreak = 0; for (int j=0; j < lengthSegment;) { const unsigned char ch = text[j]; if (j > 0) { if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) { lastSpaceBreak = j; } if (ch < 'A') { lastPunctuationBreak = j; } } lastEncodingAllowedBreak = j; if (dbcsCodePage == SC_CP_UTF8) { j += UTF8BytesOfLead[ch]; } else if (dbcsCodePage) { j += IsDBCSLeadByteNoExcept(ch) ? 2 : 1; } else { j++; } } if (lastSpaceBreak >= 0) { return lastSpaceBreak; } else if (lastPunctuationBreak >= 0) { return lastPunctuationBreak; } return lastEncodingAllowedBreak; } EncodingFamily Document::CodePageFamily() const noexcept { if (SC_CP_UTF8 == dbcsCodePage) return EncodingFamily::unicode; else if (dbcsCodePage) return EncodingFamily::dbcs; else return EncodingFamily::eightBit; } void Document::ModifiedAt(Sci::Position pos) noexcept { if (endStyled > pos) endStyled = pos; } void Document::CheckReadOnly() { if (cb.IsReadOnly() && enteredReadOnlyCount == 0) { enteredReadOnlyCount++; NotifyModifyAttempt(); enteredReadOnlyCount--; } } // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt. // SetStyleAt does not change the persistent state of a document bool Document::DeleteChars(Sci::Position pos, Sci::Position len) { if (pos < 0) return false; if (len <= 0) return false; if ((pos + len) > LengthNoExcept()) return false; CheckReadOnly(); if (enteredModification != 0) { return false; } else { enteredModification++; if (!cb.IsReadOnly()) { NotifyModified( DocModification( SC_MOD_BEFOREDELETE | SC_PERFORMED_USER, pos, len, 0, 0)); const Sci::Line prevLinesTotal = LinesTotal(); const bool startSavePoint = cb.IsSavePoint(); bool startSequence = false; const char *text = cb.DeleteChars(pos, len, startSequence); if (startSavePoint && cb.IsCollectingUndo()) NotifySavePoint(false); if ((pos < LengthNoExcept()) || (pos == 0)) ModifiedAt(pos); else ModifiedAt(pos-1); NotifyModified( DocModification( SC_MOD_DELETETEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0), pos, len, LinesTotal() - prevLinesTotal, text)); } enteredModification--; } return !cb.IsReadOnly(); } /** * Insert a string with a length. */ Sci::Position Document::InsertString(Sci::Position position, const char *s, Sci::Position insertLength) { if (insertLength <= 0) { return 0; } CheckReadOnly(); // Application may change read only state here if (cb.IsReadOnly()) { return 0; } if (enteredModification != 0) { return 0; } enteredModification++; insertionSet = false; insertion.clear(); NotifyModified( DocModification( SC_MOD_INSERTCHECK, position, insertLength, 0, s)); if (insertionSet) { s = insertion.c_str(); insertLength = insertion.length(); } NotifyModified( DocModification( SC_MOD_BEFOREINSERT | SC_PERFORMED_USER, position, insertLength, 0, s)); const Sci::Line prevLinesTotal = LinesTotal(); const bool startSavePoint = cb.IsSavePoint(); bool startSequence = false; const char *text = cb.InsertString(position, s, insertLength, startSequence); if (startSavePoint && cb.IsCollectingUndo()) NotifySavePoint(false); ModifiedAt(position); NotifyModified( DocModification( SC_MOD_INSERTTEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0), position, insertLength, LinesTotal() - prevLinesTotal, text)); if (insertionSet) { // Free memory as could be large std::string().swap(insertion); } enteredModification--; return insertLength; } void Document::ChangeInsertion(const char *s, Sci::Position length) { insertionSet = true; insertion.assign(s, length); } int SCI_METHOD Document::AddData(const char *data, Sci_Position length) { try { const Sci::Position position = Length(); InsertString(position, data, length); } catch (std::bad_alloc &) { return SC_STATUS_BADALLOC; } catch (...) { return SC_STATUS_FAILURE; } return 0; } void * SCI_METHOD Document::ConvertToDocument() { return this; } Sci::Position Document::Undo() { Sci::Position newPos = -1; CheckReadOnly(); if ((enteredModification == 0) && (cb.IsCollectingUndo())) { enteredModification++; if (!cb.IsReadOnly()) { const bool startSavePoint = cb.IsSavePoint(); bool multiLine = false; const int steps = cb.StartUndo(); //Platform::DebugPrintf("Steps=%d\n", steps); Sci::Position coalescedRemovePos = -1; Sci::Position coalescedRemoveLen = 0; Sci::Position prevRemoveActionPos = -1; Sci::Position prevRemoveActionLen = 0; for (int step = 0; step < steps; step++) { const Sci::Line prevLinesTotal = LinesTotal(); const Action &action = cb.GetUndoStep(); if (action.at == ActionType::remove) { NotifyModified(DocModification( SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action)); } else if (action.at == ActionType::container) { DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO); dm.token = action.position; NotifyModified(dm); if (!action.mayCoalesce) { coalescedRemovePos = -1; coalescedRemoveLen = 0; prevRemoveActionPos = -1; prevRemoveActionLen = 0; } } else { NotifyModified(DocModification( SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action)); } cb.PerformUndoStep(); if (action.at != ActionType::container) { ModifiedAt(action.position); newPos = action.position; } int modFlags = SC_PERFORMED_UNDO; // With undo, an insertion action becomes a deletion notification if (action.at == ActionType::remove) { newPos += action.lenData; modFlags |= SC_MOD_INSERTTEXT; if ((coalescedRemoveLen > 0) && (action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) { coalescedRemoveLen += action.lenData; newPos = coalescedRemovePos + coalescedRemoveLen; } else { coalescedRemovePos = action.position; coalescedRemoveLen = action.lenData; } prevRemoveActionPos = action.position; prevRemoveActionLen = action.lenData; } else if (action.at == ActionType::insert) { modFlags |= SC_MOD_DELETETEXT; coalescedRemovePos = -1; coalescedRemoveLen = 0; prevRemoveActionPos = -1; prevRemoveActionLen = 0; } if (steps > 1) modFlags |= SC_MULTISTEPUNDOREDO; const Sci::Line linesAdded = LinesTotal() - prevLinesTotal; if (linesAdded != 0) multiLine = true; if (step == steps - 1) { modFlags |= SC_LASTSTEPINUNDOREDO; if (multiLine) modFlags |= SC_MULTILINEUNDOREDO; } NotifyModified(DocModification(modFlags, action.position, action.lenData, linesAdded, action.data.get())); } const bool endSavePoint = cb.IsSavePoint(); if (startSavePoint != endSavePoint) NotifySavePoint(endSavePoint); } enteredModification--; } return newPos; } Sci::Position Document::Redo() { Sci::Position newPos = -1; CheckReadOnly(); if ((enteredModification == 0) && (cb.IsCollectingUndo())) { enteredModification++; if (!cb.IsReadOnly()) { const bool startSavePoint = cb.IsSavePoint(); bool multiLine = false; const int steps = cb.StartRedo(); for (int step = 0; step < steps; step++) { const Sci::Line prevLinesTotal = LinesTotal(); const Action &action = cb.GetRedoStep(); if (action.at == ActionType::insert) { NotifyModified(DocModification( SC_MOD_BEFOREINSERT | SC_PERFORMED_REDO, action)); } else if (action.at == ActionType::container) { DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_REDO); dm.token = action.position; NotifyModified(dm); } else { NotifyModified(DocModification( SC_MOD_BEFOREDELETE | SC_PERFORMED_REDO, action)); } cb.PerformRedoStep(); if (action.at != ActionType::container) { ModifiedAt(action.position); newPos = action.position; } int modFlags = SC_PERFORMED_REDO; if (action.at == ActionType::insert) { newPos += action.lenData; modFlags |= SC_MOD_INSERTTEXT; } else if (action.at == ActionType::remove) { modFlags |= SC_MOD_DELETETEXT; } if (steps > 1) modFlags |= SC_MULTISTEPUNDOREDO; const Sci::Line linesAdded = LinesTotal() - prevLinesTotal; if (linesAdded != 0) multiLine = true; if (step == steps - 1) { modFlags |= SC_LASTSTEPINUNDOREDO; if (multiLine) modFlags |= SC_MULTILINEUNDOREDO; } NotifyModified( DocModification(modFlags, action.position, action.lenData, linesAdded, action.data.get())); } const bool endSavePoint = cb.IsSavePoint(); if (startSavePoint != endSavePoint) NotifySavePoint(endSavePoint); } enteredModification--; } return newPos; } void Document::DelChar(Sci::Position pos) { DeleteChars(pos, LenChar(pos)); } void Document::DelCharBack(Sci::Position pos) { if (pos <= 0) { return; } else if (IsCrLf(pos - 2)) { DeleteChars(pos - 2, 2); } else if (dbcsCodePage) { const Sci::Position startChar = NextPosition(pos, -1); DeleteChars(startChar, pos - startChar); } else { DeleteChars(pos - 1, 1); } } static constexpr Sci::Position NextTab(Sci::Position pos, Sci::Position tabSize) noexcept { return ((pos / tabSize) + 1) * tabSize; } static std::string CreateIndentation(Sci::Position indent, int tabSize, bool insertSpaces) { std::string indentation; if (!insertSpaces) { while (indent >= tabSize) { indentation += '\t'; indent -= tabSize; } } while (indent > 0) { indentation += ' '; indent--; } return indentation; } int SCI_METHOD Document::GetLineIndentation(Sci_Position line) { int indent = 0; if ((line >= 0) && (line < LinesTotal())) { const Sci::Position lineStart = LineStart(line); const Sci::Position length = Length(); for (Sci::Position i = lineStart; i < length; i++) { const char ch = cb.CharAt(i); if (ch == ' ') indent++; else if (ch == '\t') indent = static_cast(NextTab(indent, tabInChars)); else return indent; } } return indent; } Sci::Position Document::SetLineIndentation(Sci::Line line, Sci::Position indent) { const int indentOfLine = GetLineIndentation(line); if (indent < 0) indent = 0; if (indent != indentOfLine) { std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs); const Sci::Position thisLineStart = LineStart(line); const Sci::Position indentPos = GetLineIndentPosition(line); UndoGroup ug(this); DeleteChars(thisLineStart, indentPos - thisLineStart); return thisLineStart + InsertString(thisLineStart, linebuf.c_str(), linebuf.length()); } else { return GetLineIndentPosition(line); } } Sci::Position Document::GetLineIndentPosition(Sci::Line line) const { if (line < 0) return 0; Sci::Position pos = LineStart(line); const Sci::Position length = Length(); while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) { pos++; } return pos; } Sci::Position Document::GetColumn(Sci::Position pos) { Sci::Position column = 0; const Sci::Line line = SciLineFromPosition(pos); if ((line >= 0) && (line < LinesTotal())) { for (Sci::Position i = LineStart(line); i < pos;) { const char ch = cb.CharAt(i); if (ch == '\t') { column = NextTab(column, tabInChars); i++; } else if (ch == '\r') { return column; } else if (ch == '\n') { return column; } else if (i >= Length()) { return column; } else { column++; i = NextPosition(i, 1); } } } return column; } Sci::Position Document::CountCharacters(Sci::Position startPos, Sci::Position endPos) const noexcept { startPos = MovePositionOutsideChar(startPos, 1, false); endPos = MovePositionOutsideChar(endPos, -1, false); Sci::Position count = 0; Sci::Position i = startPos; while (i < endPos) { count++; i = NextPosition(i, 1); } return count; } Sci::Position Document::CountUTF16(Sci::Position startPos, Sci::Position endPos) const noexcept { startPos = MovePositionOutsideChar(startPos, 1, false); endPos = MovePositionOutsideChar(endPos, -1, false); Sci::Position count = 0; Sci::Position i = startPos; while (i < endPos) { count++; const Sci::Position next = NextPosition(i, 1); if ((next - i) > 3) count++; i = next; } return count; } Sci::Position Document::FindColumn(Sci::Line line, Sci::Position column) { Sci::Position position = LineStart(line); if ((line >= 0) && (line < LinesTotal())) { Sci::Position columnCurrent = 0; while ((columnCurrent < column) && (position < Length())) { const char ch = cb.CharAt(position); if (ch == '\t') { columnCurrent = NextTab(columnCurrent, tabInChars); if (columnCurrent > column) return position; position++; } else if (ch == '\r') { return position; } else if (ch == '\n') { return position; } else { columnCurrent++; position = NextPosition(position, 1); } } } return position; } void Document::Indent(bool forwards, Sci::Line lineBottom, Sci::Line lineTop) { // Dedent - suck white space off the front of the line to dedent by equivalent of a tab for (Sci::Line line = lineBottom; line >= lineTop; line--) { const Sci::Position indentOfLine = GetLineIndentation(line); if (forwards) { if (LineStart(line) < LineEnd(line)) { SetLineIndentation(line, indentOfLine + IndentSize()); } } else { SetLineIndentation(line, indentOfLine - IndentSize()); } } } // Convert line endings for a piece of text to a particular mode. // Stop at len or when a NUL is found. std::string Document::TransformLineEnds(const char *s, size_t len, int eolModeWanted) { std::string dest; for (size_t i = 0; (i < len) && (s[i]); i++) { if (s[i] == '\n' || s[i] == '\r') { if (eolModeWanted == SC_EOL_CR) { dest.push_back('\r'); } else if (eolModeWanted == SC_EOL_LF) { dest.push_back('\n'); } else { // eolModeWanted == SC_EOL_CRLF dest.push_back('\r'); dest.push_back('\n'); } if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) { i++; } } else { dest.push_back(s[i]); } } return dest; } void Document::ConvertLineEnds(int eolModeSet) { UndoGroup ug(this); for (Sci::Position pos = 0; pos < Length(); pos++) { if (cb.CharAt(pos) == '\r') { if (cb.CharAt(pos + 1) == '\n') { // CRLF if (eolModeSet == SC_EOL_CR) { DeleteChars(pos + 1, 1); // Delete the LF } else if (eolModeSet == SC_EOL_LF) { DeleteChars(pos, 1); // Delete the CR } else { pos++; } } else { // CR if (eolModeSet == SC_EOL_CRLF) { pos += InsertString(pos + 1, "\n", 1); // Insert LF } else if (eolModeSet == SC_EOL_LF) { pos += InsertString(pos, "\n", 1); // Insert LF DeleteChars(pos, 1); // Delete CR pos--; } } } else if (cb.CharAt(pos) == '\n') { // LF if (eolModeSet == SC_EOL_CRLF) { pos += InsertString(pos, "\r", 1); // Insert CR } else if (eolModeSet == SC_EOL_CR) { pos += InsertString(pos, "\r", 1); // Insert CR DeleteChars(pos, 1); // Delete LF pos--; } } } } int Document::Options() const noexcept { return (IsLarge() ? SC_DOCUMENTOPTION_TEXT_LARGE : 0) | (cb.HasStyles() ? 0 : SC_DOCUMENTOPTION_STYLES_NONE); } bool Document::IsWhiteLine(Sci::Line line) const { Sci::Position currentChar = LineStart(line); const Sci::Position endLine = LineEnd(line); while (currentChar < endLine) { if (!IsSpaceOrTab(cb.CharAt(currentChar))) { return false; } ++currentChar; } return true; } Sci::Position Document::ParaUp(Sci::Position pos) const { Sci::Line line = SciLineFromPosition(pos); line--; while (line >= 0 && IsWhiteLine(line)) { // skip empty lines line--; } while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines line--; } line++; return LineStart(line); } Sci::Position Document::ParaDown(Sci::Position pos) const { Sci::Line line = SciLineFromPosition(pos); while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines line++; } while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines line++; } if (line < LinesTotal()) return LineStart(line); else // end of a document return LineEnd(line-1); } CharacterClass Document::WordCharacterClass(unsigned int ch) const { if (dbcsCodePage && (!UTF8IsAscii(ch))) { if (SC_CP_UTF8 == dbcsCodePage) { // Use hard coded Unicode class const CharacterCategory cc = charMap.CategoryFor(ch); switch (cc) { // Separator, Line/Paragraph case ccZl: case ccZp: return CharacterClass::newLine; // Separator, Space case ccZs: // Other case ccCc: case ccCf: case ccCs: case ccCo: case ccCn: return CharacterClass::space; // Letter case ccLu: case ccLl: case ccLt: case ccLm: case ccLo: // Number case ccNd: case ccNl: case ccNo: // Mark - includes combining diacritics case ccMn: case ccMc: case ccMe: return CharacterClass::word; // Punctuation case ccPc: case ccPd: case ccPs: case ccPe: case ccPi: case ccPf: case ccPo: // Symbol case ccSm: case ccSc: case ccSk: case ccSo: return CharacterClass::punctuation; } } else { // Asian DBCS return CharacterClass::word; } } return charClass.GetClass(static_cast(ch)); } /** * Used by commands that want to select whole words. * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0. */ Sci::Position Document::ExtendWordSelect(Sci::Position pos, int delta, bool onlyWordCharacters) const { CharacterClass ccStart = CharacterClass::word; if (delta < 0) { if (!onlyWordCharacters) { const CharacterExtracted ce = CharacterBefore(pos); ccStart = WordCharacterClass(ce.character); } while (pos > 0) { const CharacterExtracted ce = CharacterBefore(pos); if (WordCharacterClass(ce.character) != ccStart) break; pos -= ce.widthBytes; } } else { if (!onlyWordCharacters && pos < LengthNoExcept()) { const CharacterExtracted ce = CharacterAfter(pos); ccStart = WordCharacterClass(ce.character); } while (pos < LengthNoExcept()) { const CharacterExtracted ce = CharacterAfter(pos); if (WordCharacterClass(ce.character) != ccStart) break; pos += ce.widthBytes; } } return MovePositionOutsideChar(pos, delta, true); } /** * Find the start of the next word in either a forward (delta >= 0) or backwards direction * (delta < 0). * This is looking for a transition between character classes although there is also some * additional movement to transit white space. * Used by cursor movement by word commands. */ Sci::Position Document::NextWordStart(Sci::Position pos, int delta) const { if (delta < 0) { while (pos > 0) { const CharacterExtracted ce = CharacterBefore(pos); if (WordCharacterClass(ce.character) != CharacterClass::space) break; pos -= ce.widthBytes; } if (pos > 0) { CharacterExtracted ce = CharacterBefore(pos); const CharacterClass ccStart = WordCharacterClass(ce.character); while (pos > 0) { ce = CharacterBefore(pos); if (WordCharacterClass(ce.character) != ccStart) break; pos -= ce.widthBytes; } } } else { CharacterExtracted ce = CharacterAfter(pos); const CharacterClass ccStart = WordCharacterClass(ce.character); while (pos < LengthNoExcept()) { ce = CharacterAfter(pos); if (WordCharacterClass(ce.character) != ccStart) break; pos += ce.widthBytes; } while (pos < LengthNoExcept()) { ce = CharacterAfter(pos); if (WordCharacterClass(ce.character) != CharacterClass::space) break; pos += ce.widthBytes; } } return pos; } /** * Find the end of the next word in either a forward (delta >= 0) or backwards direction * (delta < 0). * This is looking for a transition between character classes although there is also some * additional movement to transit white space. * Used by cursor movement by word commands. */ Sci::Position Document::NextWordEnd(Sci::Position pos, int delta) const { if (delta < 0) { if (pos > 0) { CharacterExtracted ce = CharacterBefore(pos); const CharacterClass ccStart = WordCharacterClass(ce.character); if (ccStart != CharacterClass::space) { while (pos > 0) { ce = CharacterBefore(pos); if (WordCharacterClass(ce.character) != ccStart) break; pos -= ce.widthBytes; } } while (pos > 0) { ce = CharacterBefore(pos); if (WordCharacterClass(ce.character) != CharacterClass::space) break; pos -= ce.widthBytes; } } } else { while (pos < LengthNoExcept()) { const CharacterExtracted ce = CharacterAfter(pos); if (WordCharacterClass(ce.character) != CharacterClass::space) break; pos += ce.widthBytes; } if (pos < LengthNoExcept()) { CharacterExtracted ce = CharacterAfter(pos); const CharacterClass ccStart = WordCharacterClass(ce.character); while (pos < LengthNoExcept()) { ce = CharacterAfter(pos); if (WordCharacterClass(ce.character) != ccStart) break; pos += ce.widthBytes; } } } return pos; } /** * Check that the character at the given position is a word or punctuation character and that * the previous character is of a different character class. */ bool Document::IsWordStartAt(Sci::Position pos) const { if (pos >= LengthNoExcept()) return false; if (pos > 0) { const CharacterExtracted cePos = CharacterAfter(pos); const CharacterClass ccPos = WordCharacterClass(cePos.character); const CharacterExtracted cePrev = CharacterBefore(pos); const CharacterClass ccPrev = WordCharacterClass(cePrev.character); return (ccPos == CharacterClass::word || ccPos == CharacterClass::punctuation) && (ccPos != ccPrev); } return true; } /** * Check that the character at the given position is a word or punctuation character and that * the next character is of a different character class. */ bool Document::IsWordEndAt(Sci::Position pos) const { if (pos <= 0) return false; if (pos < LengthNoExcept()) { const CharacterExtracted cePos = CharacterAfter(pos); const CharacterClass ccPos = WordCharacterClass(cePos.character); const CharacterExtracted cePrev = CharacterBefore(pos); const CharacterClass ccPrev = WordCharacterClass(cePrev.character); return (ccPrev == CharacterClass::word || ccPrev == CharacterClass::punctuation) && (ccPrev != ccPos); } return true; } /** * Check that the given range is has transitions between character classes at both * ends and where the characters on the inside are word or punctuation characters. */ bool Document::IsWordAt(Sci::Position start, Sci::Position end) const { return (start < end) && IsWordStartAt(start) && IsWordEndAt(end); } bool Document::MatchesWordOptions(bool word, bool wordStart, Sci::Position pos, Sci::Position length) const { return (!word && !wordStart) || (word && IsWordAt(pos, pos + length)) || (wordStart && IsWordStartAt(pos)); } bool Document::HasCaseFolder() const noexcept { return pcf != nullptr; } void Document::SetCaseFolder(std::unique_ptr pcf_) noexcept { pcf = std::move(pcf_); } Document::CharacterExtracted Document::ExtractCharacter(Sci::Position position) const noexcept { const unsigned char leadByte = cb.UCharAt(position); if (UTF8IsAscii(leadByte)) { // Common case: ASCII character return CharacterExtracted(leadByte, 1); } const int widthCharBytes = UTF8BytesOfLead[leadByte]; unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 }; for (int b=1; b maxPos to do a backward search) * Has not been tested with back