blob: f53b8e9ed8d3fc918a04353a4df207283e59830e (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
|
// Scintilla source code edit control
/** @file DBCS.cxx
** Functions to handle DBCS double byte encodings like Shift-JIS.
**/
// Copyright 2017 by Neil Hodgson <neilh@scintilla.org>
// The License.txt file describes the conditions under which this software may be distributed.
#include <cstdint>
#include <array>
#include <map>
#include "DBCS.h"
using namespace Scintilla::Internal;
namespace Scintilla::Internal {
// Silence 'magic' number use since the set of DBCS lead and trail bytes differ
// between encodings and would require many constant declarations that would just
// obscure the behaviour.
// NOLINTBEGIN(*-magic-numbers)
bool DBCSIsLeadByte(int codePage, char ch) noexcept {
// Byte ranges found in Wikipedia articles with relevant search strings in each case
const unsigned char uch = ch;
switch (codePage) {
case cp932:
// Shift_jis
return ((uch >= 0x81) && (uch <= 0x9F)) ||
((uch >= 0xE0) && (uch <= 0xFC));
// Lead bytes F0 to FC may be a Microsoft addition.
case cp936:
// GBK
return (uch >= 0x81) && (uch <= 0xFE);
case cp949:
// Korean Wansung KS C-5601-1987
return (uch >= 0x81) && (uch <= 0xFE);
case cp950:
// Big5
return (uch >= 0x81) && (uch <= 0xFE);
case cp1361:
// Korean Johab KS C-5601-1992
return
((uch >= 0x84) && (uch <= 0xD3)) ||
((uch >= 0xD8) && (uch <= 0xDE)) ||
((uch >= 0xE0) && (uch <= 0xF9));
default:
break;
}
return false;
}
bool DBCSIsTrailByte(int codePage, char ch) noexcept {
const unsigned char trail = ch;
switch (codePage) {
case cp932:
// Shift_jis
return (trail != 0x7F) &&
((trail >= 0x40) && (trail <= 0xFC));
case cp936:
// GBK
return (trail != 0x7F) &&
((trail >= 0x40) && (trail <= 0xFE));
case cp949:
// Korean Wansung KS C-5601-1987
return
((trail >= 0x41) && (trail <= 0x5A)) ||
((trail >= 0x61) && (trail <= 0x7A)) ||
((trail >= 0x81) && (trail <= 0xFE));
case cp950:
// Big5
return
((trail >= 0x40) && (trail <= 0x7E)) ||
((trail >= 0xA1) && (trail <= 0xFE));
case cp1361:
// Korean Johab KS C-5601-1992
return
((trail >= 0x31) && (trail <= 0x7E)) ||
((trail >= 0x81) && (trail <= 0xFE));
default:
break;
}
return false;
}
bool IsDBCSValidSingleByte(int codePage, int ch) noexcept {
switch (codePage) {
case cp932:
return ch == 0x80
|| (ch >= 0xA0 && ch <= 0xDF)
|| (ch >= 0xFD);
default:
return false;
}
}
// NOLINTEND(*-magic-numbers)
using CodePageToFoldMap = std::map<int, FoldMap>;
CodePageToFoldMap cpToFoldMap;
bool DBCSHasFoldMap(int codePage) {
const CodePageToFoldMap::const_iterator it = cpToFoldMap.find(codePage);
return it != cpToFoldMap.end();
}
void DBCSSetFoldMap(int codePage, const FoldMap &foldMap) {
cpToFoldMap[codePage] = foldMap;
}
FoldMap *DBCSGetMutableFoldMap(int codePage) {
// Constructs if needed
return &cpToFoldMap[codePage];
}
const FoldMap *DBCSGetFoldMap(int codePage) {
return &cpToFoldMap[codePage];
}
}
|