aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/DBCS.cxx
blob: 062c30a5121c6af46c78c1e76e74d7effaa20967 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
// Scintilla source code edit control
/** @file DBCS.cxx
 ** Functions to handle DBCS double byte encodings like Shift-JIS.
 **/
// Copyright 2017 by Neil Hodgson <neilh@scintilla.org>
// The License.txt file describes the conditions under which this software may be distributed.

#include <cstdint>

#include <vector>
#include <array>
#include <map>
#include <algorithm>

#include "DBCS.h"

using namespace Scintilla::Internal;

namespace Scintilla::Internal {

// Silence 'magic' number use since the set of DBCS lead and trail bytes differ
// between encodings and would require many constant declarations that would just
// obscure the behaviour.

// NOLINTBEGIN(*-magic-numbers)

bool DBCSIsLeadByte(int codePage, char ch) noexcept {
	// Byte ranges found in Wikipedia articles with relevant search strings in each case
	const unsigned char uch = ch;
	switch (codePage) {
	case cp932:
		// Shift_jis
		return ((uch >= 0x81) && (uch <= 0x9F)) ||
			((uch >= 0xE0) && (uch <= 0xFC));
		// Lead bytes F0 to FC may be a Microsoft addition.
	case cp936:
		// GBK
		return (uch >= 0x81) && (uch <= 0xFE);
	case cp949:
		// Korean Wansung KS C-5601-1987
		return (uch >= 0x81) && (uch <= 0xFE);
	case cp950:
		// Big5
		return (uch >= 0x81) && (uch <= 0xFE);
	case cp1361:
		// Korean Johab KS C-5601-1992
		return
			((uch >= 0x84) && (uch <= 0xD3)) ||
			((uch >= 0xD8) && (uch <= 0xDE)) ||
			((uch >= 0xE0) && (uch <= 0xF9));
	default:
		break;
	}
	return false;
}

bool DBCSIsTrailByte(int codePage, char ch) noexcept {
	const unsigned char trail = ch;
	switch (codePage) {
	case cp932:
		// Shift_jis
		return (trail != 0x7F) &&
			((trail >= 0x40) && (trail <= 0xFC));
	case cp936:
		// GBK
		return (trail != 0x7F) &&
			((trail >= 0x40) && (trail <= 0xFE));
	case cp949:
		// Korean Wansung KS C-5601-1987
		return
			((trail >= 0x41) && (trail <= 0x5A)) ||
			((trail >= 0x61) && (trail <= 0x7A)) ||
			((trail >= 0x81) && (trail <= 0xFE));
	case cp950:
		// Big5
		return
			((trail >= 0x40) && (trail <= 0x7E)) ||
			((trail >= 0xA1) && (trail <= 0xFE));
	case cp1361:
		// Korean Johab KS C-5601-1992
		return
			((trail >= 0x31) && (trail <= 0x7E)) ||
			((trail >= 0x81) && (trail <= 0xFE));
	default:
		break;
	}
	return false;
}

bool IsDBCSValidSingleByte(int codePage, int ch) noexcept {
	switch (codePage) {
	case cp932:
		// Shift_jis
		return ch == 0x80
			|| (ch >= 0xA0 && ch <= 0xDF)
			|| (ch >= 0xFD);
	case cp936:
		// GBK
		return ch == 0x80;
	default:
		return false;
	}
}

// NOLINTEND(*-magic-numbers)

namespace {

struct CodePageFoldMap {
	int codePage = 0;
	FoldMap foldMap;
	explicit CodePageFoldMap(int codePage_) noexcept : codePage {codePage_} {}
};

using CodePageToFoldMap = std::vector<CodePageFoldMap>;
CodePageToFoldMap cpToFoldMap;

}

FoldMap *DBCSCreateFoldMap(int codePage) {
	cpToFoldMap.emplace_back(codePage);
	return &(cpToFoldMap.back().foldMap);
}

const FoldMap *DBCSGetFoldMap(int codePage) {
	const CodePageToFoldMap::iterator it = std::find_if(cpToFoldMap.begin(), cpToFoldMap.end(),
		[codePage](const CodePageFoldMap &cpfm) -> bool {return cpfm.codePage == codePage; });
	if (it != cpToFoldMap.end()) {
		return &(it->foldMap);
	}
	return nullptr;
}

}