From fa832504c217fa68b9376c1969a7db95f6e8d24e Mon Sep 17 00:00:00 2001 From: nyamatongwe Date: Wed, 21 Mar 2012 23:39:03 +1100 Subject: Ensure segment discovery always makes progress even for invalid UTF-8. --- src/Document.cxx | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/Document.cxx b/src/Document.cxx index fd71a1363..fae97856a 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -721,7 +721,12 @@ int Document::SafeSegment(const char *text, int length, int lengthSegment) { lastEncodingAllowedBreak = j; if (dbcsCodePage == SC_CP_UTF8) { - j += (ch < 0x80) ? 1 : BytesFromLead(ch); + if (ch < 0x80) { + j++; + } else { + int bytes = BytesFromLead(ch); + j += bytes ? bytes : 1; + } } else if (dbcsCodePage) { j += IsDBCSLeadByte(ch) ? 2 : 1; } else { -- cgit v1.2.3