aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/DBCS.cxx
blob: 0f22a705a9ec386dee5dec1d86977ab453370a0d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
// Scintilla source code edit control
/** @file DBCS.cxx
 ** Functions to handle DBCS double byte encodings like Shift-JIS.
 **/
// Copyright 2017 by Neil Hodgson <neilh@scintilla.org>
// The License.txt file describes the conditions under which this software may be distributed.

#include <cstdint>

#include <array>
#include <map>

#include "DBCS.h"

using namespace Scintilla::Internal;

namespace Scintilla::Internal {

bool DBCSIsLeadByte(int codePage, char ch) noexcept {
	// Byte ranges found in Wikipedia articles with relevant search strings in each case
	const unsigned char uch = ch;
	switch (codePage) {
	case cp932:
		// Shift_jis
		return ((uch >= 0x81) && (uch <= 0x9F)) ||
			((uch >= 0xE0) && (uch <= 0xFC));
		// Lead bytes F0 to FC may be a Microsoft addition.
	case cp936:
		// GBK
		return (uch >= 0x81) && (uch <= 0xFE);
	case cp949:
		// Korean Wansung KS C-5601-1987
		return (uch >= 0x81) && (uch <= 0xFE);
	case cp950:
		// Big5
		return (uch >= 0x81) && (uch <= 0xFE);
	case cp1361:
		// Korean Johab KS C-5601-1992
		return
			((uch >= 0x84) && (uch <= 0xD3)) ||
			((uch >= 0xD8) && (uch <= 0xDE)) ||
			((uch >= 0xE0) && (uch <= 0xF9));
	default:
		break;
	}
	return false;
}

bool DBCSIsTrailByte(int codePage, char ch) noexcept {
	const unsigned char trail = ch;
	switch (codePage) {
	case cp932:
		// Shift_jis
		return (trail != 0x7F) &&
			((trail >= 0x40) && (trail <= 0xFC));
	case cp936:
		// GBK
		return (trail != 0x7F) &&
			((trail >= 0x40) && (trail <= 0xFE));
	case cp949:
		// Korean Wansung KS C-5601-1987
		return
			((trail >= 0x41) && (trail <= 0x5A)) ||
			((trail >= 0x61) && (trail <= 0x7A)) ||
			((trail >= 0x81) && (trail <= 0xFE));
	case cp950:
		// Big5
		return
			((trail >= 0x40) && (trail <= 0x7E)) ||
			((trail >= 0xA1) && (trail <= 0xFE));
	case cp1361:
		// Korean Johab KS C-5601-1992
		return
			((trail >= 0x31) && (trail <= 0x7E)) ||
			((trail >= 0x81) && (trail <= 0xFE));
	default:
		break;
	}
	return false;
}

bool IsDBCSValidSingleByte(int codePage, int ch) noexcept {
	switch (codePage) {
	case cp932:
		return ch == 0x80
			|| (ch >= 0xA0 && ch <= 0xDF)
			|| (ch >= 0xFD);

	default:
		return false;
	}
}

using CodePageToFoldMap = std::map<int, FoldMap>;
CodePageToFoldMap cpToFoldMap;

bool DBCSHasFoldMap(int codePage) {
	const CodePageToFoldMap::const_iterator it = cpToFoldMap.find(codePage);
	return it != cpToFoldMap.end();
}

void DBCSSetFoldMap(int codePage, const FoldMap &foldMap) {
	cpToFoldMap[codePage] = foldMap;
}

FoldMap *DBCSGetMutableFoldMap(int codePage) {
	// Constructs if needed
	return &cpToFoldMap[codePage];
}

const FoldMap *DBCSGetFoldMap(int codePage) {
	return &cpToFoldMap[codePage];
}

}