blob: 062c30a5121c6af46c78c1e76e74d7effaa20967 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
|
// Scintilla source code edit control
/** @file DBCS.cxx
** Functions to handle DBCS double byte encodings like Shift-JIS.
**/
// Copyright 2017 by Neil Hodgson <neilh@scintilla.org>
// The License.txt file describes the conditions under which this software may be distributed.
#include <cstdint>
#include <vector>
#include <array>
#include <map>
#include <algorithm>
#include "DBCS.h"
using namespace Scintilla::Internal;
namespace Scintilla::Internal {
// Silence 'magic' number use since the set of DBCS lead and trail bytes differ
// between encodings and would require many constant declarations that would just
// obscure the behaviour.
// NOLINTBEGIN(*-magic-numbers)
bool DBCSIsLeadByte(int codePage, char ch) noexcept {
// Byte ranges found in Wikipedia articles with relevant search strings in each case
const unsigned char uch = ch;
switch (codePage) {
case cp932:
// Shift_jis
return ((uch >= 0x81) && (uch <= 0x9F)) ||
((uch >= 0xE0) && (uch <= 0xFC));
// Lead bytes F0 to FC may be a Microsoft addition.
case cp936:
// GBK
return (uch >= 0x81) && (uch <= 0xFE);
case cp949:
// Korean Wansung KS C-5601-1987
return (uch >= 0x81) && (uch <= 0xFE);
case cp950:
// Big5
return (uch >= 0x81) && (uch <= 0xFE);
case cp1361:
// Korean Johab KS C-5601-1992
return
((uch >= 0x84) && (uch <= 0xD3)) ||
((uch >= 0xD8) && (uch <= 0xDE)) ||
((uch >= 0xE0) && (uch <= 0xF9));
default:
break;
}
return false;
}
bool DBCSIsTrailByte(int codePage, char ch) noexcept {
const unsigned char trail = ch;
switch (codePage) {
case cp932:
// Shift_jis
return (trail != 0x7F) &&
((trail >= 0x40) && (trail <= 0xFC));
case cp936:
// GBK
return (trail != 0x7F) &&
((trail >= 0x40) && (trail <= 0xFE));
case cp949:
// Korean Wansung KS C-5601-1987
return
((trail >= 0x41) && (trail <= 0x5A)) ||
((trail >= 0x61) && (trail <= 0x7A)) ||
((trail >= 0x81) && (trail <= 0xFE));
case cp950:
// Big5
return
((trail >= 0x40) && (trail <= 0x7E)) ||
((trail >= 0xA1) && (trail <= 0xFE));
case cp1361:
// Korean Johab KS C-5601-1992
return
((trail >= 0x31) && (trail <= 0x7E)) ||
((trail >= 0x81) && (trail <= 0xFE));
default:
break;
}
return false;
}
bool IsDBCSValidSingleByte(int codePage, int ch) noexcept {
switch (codePage) {
case cp932:
// Shift_jis
return ch == 0x80
|| (ch >= 0xA0 && ch <= 0xDF)
|| (ch >= 0xFD);
case cp936:
// GBK
return ch == 0x80;
default:
return false;
}
}
// NOLINTEND(*-magic-numbers)
namespace {
struct CodePageFoldMap {
int codePage = 0;
FoldMap foldMap;
explicit CodePageFoldMap(int codePage_) noexcept : codePage {codePage_} {}
};
using CodePageToFoldMap = std::vector<CodePageFoldMap>;
CodePageToFoldMap cpToFoldMap;
}
FoldMap *DBCSCreateFoldMap(int codePage) {
cpToFoldMap.emplace_back(codePage);
return &(cpToFoldMap.back().foldMap);
}
const FoldMap *DBCSGetFoldMap(int codePage) {
const CodePageToFoldMap::iterator it = std::find_if(cpToFoldMap.begin(), cpToFoldMap.end(),
[codePage](const CodePageFoldMap &cpfm) -> bool {return cpfm.codePage == codePage; });
if (it != cpToFoldMap.end()) {
return &(it->foldMap);
}
return nullptr;
}
}
|