From 9975609bf3b39f0e1cd121995ac49aea30a6c48f Mon Sep 17 00:00:00 2001 From: Zufu Liu Date: Thu, 21 Oct 2021 22:15:57 +1100 Subject: Feature [feature-requests:#1417] Use backward iteration to find space / control character and text / punctuation boundaries in SafeSegment as will be simpler and faster in almost all cases. Simplify BreakFinder::Next calling SafeSegment. --- src/CharacterType.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src/CharacterType.h') diff --git a/src/CharacterType.h b/src/CharacterType.h index b014f1050..437fb8c5c 100644 --- a/src/CharacterType.h +++ b/src/CharacterType.h @@ -32,6 +32,13 @@ constexpr bool IsEOLCharacter(int ch) noexcept { return ch == '\r' || ch == '\n'; } +constexpr bool IsBreakSpace(int ch) noexcept { + // used for text breaking, treat C0 control character as space. + // by default C0 control character is handled as special representation, + // so not appears in normal text. 0x7F DEL is omitted to simplify the code. + return ch >= 0 && ch <= ' '; +} + constexpr bool IsADigit(int ch) noexcept { return (ch >= '0') && (ch <= '9'); } -- cgit v1.2.3