From ef61afe500f7ba58b1fcefaa1ab0205ccb66b5c2 Mon Sep 17 00:00:00 2001 From: nyamatongwe Date: Fri, 13 May 2011 10:27:30 +1000 Subject: Break measurement of text into reasonable sized segments similar to drawing. Drawing will now always be broken up at a character boundary even when there is a large number of alphabetic characters. Fixes bug #3165743. --- src/Document.cxx | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'src/Document.cxx') diff --git a/src/Document.cxx b/src/Document.cxx index 08bc24ecf..7b718f272 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -713,6 +713,55 @@ bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const { return false; } +inline bool IsSpaceOrTab(int ch) { + return ch == ' ' || ch == '\t'; +} + +// Need to break text into segments near lengthSegment but taking into +// account the encoding to not break inside a UTF-8 or DBCS character +// and also trying to avoid breaking inside a pair of combining characters. +// The segment length must always be long enough (more than 4 bytes) +// so that there will be at least one whole character to make a segment. +// For UTF-8, text must consist only of valid whole characters. +// In preference order from best to worst: +// 1) Break after space +// 2) Break before punctuation +// 3) Break after whole character + +int Document::SafeSegment(const char *text, int length, int lengthSegment) { + if (length <= lengthSegment) + return length; + int lastSpaceBreak = -1; + int lastPunctuationBreak = -1; + int lastEncodingAllowedBreak = -1; + for (int j=0; j < lengthSegment;) { + unsigned char ch = static_cast(text[j]); + if (j > 0) { + if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) { + lastSpaceBreak = j; + } + if (ch < 'A') { + lastPunctuationBreak = j; + } + } + lastEncodingAllowedBreak = j; + + if (dbcsCodePage == SC_CP_UTF8) { + j += (ch < 0x80) ? 1 : BytesFromLead(ch); + } else if (dbcsCodePage) { + j += IsDBCSLeadByte(ch) ? 2 : 1; + } else { + j++; + } + } + if (lastSpaceBreak >= 0) { + return lastSpaceBreak; + } else if (lastPunctuationBreak >= 0) { + return lastPunctuationBreak; + } + return lastEncodingAllowedBreak; +} + void Document::ModifiedAt(int pos) { if (endStyled > pos) endStyled = pos; -- cgit v1.2.3