From 16f7ad9f9235e236747f9679337b15dda4dddb46 Mon Sep 17 00:00:00 2001 From: nyamatongwe Date: Wed, 4 Aug 2010 23:10:49 +1000 Subject: Simplified DBCS to only handle 1 and 2 byte characters in CP 932, 936, 949, or 950. Moved DBCS processing from Platform to Document. --- src/Document.cxx | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) (limited to 'src') diff --git a/src/Document.cxx b/src/Document.cxx index 577f70910..07031462b 100644 --- a/src/Document.cxx +++ b/src/Document.cxx @@ -372,8 +372,6 @@ bool Document::IsCrLf(int pos) { return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n'); } -static const int maxBytesInDBCSCharacter=5; - int Document::LenChar(int pos) { if (pos < 0) { return 1; @@ -394,13 +392,7 @@ int Document::LenChar(int pos) { else return len; } else if (dbcsCodePage) { - char mbstr[maxBytesInDBCSCharacter+1]; - int i; - for (i=0; i pos) { @@ -525,7 +510,25 @@ int SCI_METHOD Document::CodePage() const { } bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const { - return Platform::IsDBCSLeadByte(dbcsCodePage, ch); + // Byte ranges found in Wikipedia articles with relevant search strings in each case + unsigned char uch = static_cast(ch); + switch (dbcsCodePage) { + case 932: + // Shift_jis + return ((uch >= 0x81) && (uch <= 0x9F)) || + ((uch >= 0xE0) && (uch <= 0xEF)); + case 936: + // GBK + return (uch >= 0x81) && (uch <= 0xFE); + case 949: + // Korean EUC-KR + // There is also a code page 1361 for Korean Johab which appears to not be widely supported + return (uch >= 0x81) && (uch <= 0xFE); + case 950: + // Big5 + return (uch >= 0x81) && (uch <= 0xFE); + } + return false; } void Document::ModifiedAt(int pos) { -- cgit v1.2.3