aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/CharClassify.cxx43
-rw-r--r--src/CharClassify.h25
-rw-r--r--src/Document.cxx62
-rw-r--r--src/Document.h6
-rw-r--r--src/DocumentAccessor.cxx1
-rw-r--r--src/Editor.cxx5
-rw-r--r--src/RESearch.cxx74
-rw-r--r--src/RESearch.h9
-rw-r--r--src/ScintillaBase.cxx1
9 files changed, 121 insertions, 105 deletions
diff --git a/src/CharClassify.cxx b/src/CharClassify.cxx
new file mode 100644
index 000000000..acab4b229
--- /dev/null
+++ b/src/CharClassify.cxx
@@ -0,0 +1,43 @@
+// Scintilla source code edit control
+/** @file CharClassify.cxx
+ ** Character classifications used by Document and RESearch.
+ **/
+// Copyright 2006 by Neil Hodgson <neilh@scintilla.org>
+// The License.txt file describes the conditions under which this software may be distributed.
+
+#include <ctype.h>
+
+#include "CharClassify.h"
+
+// Shut up annoying Visual C++ warnings:
+#ifdef _MSC_VER
+#pragma warning(disable: 4514)
+#endif
+
+CharClassify::CharClassify() {
+ SetDefaultCharClasses(true);
+}
+
+void CharClassify::SetDefaultCharClasses(bool includeWordClass) {
+ // Initialize all char classes to default values
+ for (int ch = 0; ch < 256; ch++) {
+ if (ch == '\r' || ch == '\n')
+ charClass[ch] = ccNewLine;
+ else if (ch < 0x20 || ch == ' ')
+ charClass[ch] = ccSpace;
+ else if (includeWordClass && (ch >= 0x80 || isalnum(ch) || ch == '_'))
+ charClass[ch] = ccWord;
+ else
+ charClass[ch] = ccPunctuation;
+ }
+}
+
+void CharClassify::SetCharClasses(const unsigned char *chars, cc newCharClass) {
+ // Apply the newCharClass to the specifed chars
+ if (chars) {
+ while (*chars) {
+ charClass[*chars] = static_cast<unsigned char>(newCharClass);
+ chars++;
+ }
+ }
+}
diff --git a/src/CharClassify.h b/src/CharClassify.h
new file mode 100644
index 000000000..881d3a114
--- /dev/null
+++ b/src/CharClassify.h
@@ -0,0 +1,25 @@
+// Scintilla source code edit control
+/** @file CharClassify.h
+ ** Character classifications used by Document and RESearch.
+ **/
+// Copyright 2006 by Neil Hodgson <neilh@scintilla.org>
+// The License.txt file describes the conditions under which this software may be distributed.
+
+#ifndef CHARCLASSIFY_H
+#define CHARCLASSIFY_H
+
+class CharClassify {
+public:
+ CharClassify();
+
+ enum cc { ccSpace, ccNewLine, ccWord, ccPunctuation };
+ void SetDefaultCharClasses(bool includeWordClass);
+ void SetCharClasses(const unsigned char *chars, cc newCharClass);
+ cc GetClass(unsigned char ch) const { return static_cast<cc>(charClass[ch]);}
+ bool IsWord(unsigned char ch) const { return static_cast<cc>(charClass[ch]) == ccWord;}
+
+private:
+ enum { maxChar=256 };
+ unsigned char charClass[maxChar]; // not type cc to save space
+};
+#endif
diff --git a/src/Document.cxx b/src/Document.cxx
index a88468cb1..92be92691 100644
--- a/src/Document.cxx
+++ b/src/Document.cxx
@@ -15,6 +15,7 @@
#include "Scintilla.h"
#include "SVector.h"
#include "CellBuffer.h"
+#include "CharClassify.h"
#include "Document.h"
#include "RESearch.h"
@@ -50,7 +51,6 @@ Document::Document() {
stylingBits = 5;
stylingBitsMask = 0x1F;
stylingMask = 0;
- SetDefaultCharClasses(true);
endStyled = 0;
styleClock = 0;
enteredCount = 0;
@@ -836,10 +836,10 @@ int Document::ParaDown(int pos) {
return LineEnd(line-1);
}
-Document::charClassification Document::WordCharClass(unsigned char ch) {
+CharClassify::cc Document::WordCharClass(unsigned char ch) {
if ((SC_CP_UTF8 == dbcsCodePage) && (ch >= 0x80))
- return ccWord;
- return charClass[ch];
+ return CharClassify::ccWord;
+ return charClass.GetClass(ch);
}
/**
@@ -847,7 +847,7 @@ Document::charClassification Document::WordCharClass(unsigned char ch) {
* Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
*/
int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) {
- charClassification ccStart = ccWord;
+ CharClassify::cc ccStart = CharClassify::ccWord;
if (delta < 0) {
if (!onlyWordCharacters)
ccStart = WordCharClass(cb.CharAt(pos-1));
@@ -871,19 +871,19 @@ int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) {
*/
int Document::NextWordStart(int pos, int delta) {
if (delta < 0) {
- while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccSpace))
+ while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace))
pos--;
if (pos > 0) {
- charClassification ccStart = WordCharClass(cb.CharAt(pos-1));
+ CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) {
pos--;
}
}
} else {
- charClassification ccStart = WordCharClass(cb.CharAt(pos));
+ CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
pos++;
- while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccSpace))
+ while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace))
pos++;
}
return pos;
@@ -899,22 +899,22 @@ int Document::NextWordStart(int pos, int delta) {
int Document::NextWordEnd(int pos, int delta) {
if (delta < 0) {
if (pos > 0) {
- charClassification ccStart = WordCharClass(cb.CharAt(pos-1));
- if (ccStart != ccSpace) {
+ CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
+ if (ccStart != CharClassify::ccSpace) {
while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccStart) {
pos--;
}
}
- while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccSpace) {
+ while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace) {
pos--;
}
}
} else {
- while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccSpace) {
+ while (pos < Length() && WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace) {
pos++;
}
if (pos < Length()) {
- charClassification ccStart = WordCharClass(cb.CharAt(pos));
+ CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccStart) {
pos++;
}
@@ -929,8 +929,8 @@ int Document::NextWordEnd(int pos, int delta) {
*/
bool Document::IsWordStartAt(int pos) {
if (pos > 0) {
- charClassification ccPos = WordCharClass(CharAt(pos));
- return (ccPos == ccWord || ccPos == ccPunctuation) &&
+ CharClassify::cc ccPos = WordCharClass(CharAt(pos));
+ return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&
(ccPos != WordCharClass(CharAt(pos - 1)));
}
return true;
@@ -942,8 +942,8 @@ bool Document::IsWordStartAt(int pos) {
*/
bool Document::IsWordEndAt(int pos) {
if (pos < Length()) {
- charClassification ccPrev = WordCharClass(CharAt(pos-1));
- return (ccPrev == ccWord || ccPrev == ccPunctuation) &&
+ CharClassify::cc ccPrev = WordCharClass(CharAt(pos-1));
+ return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&
(ccPrev != WordCharClass(CharAt(pos)));
}
return true;
@@ -1004,7 +1004,7 @@ long Document::FindText(int minPos, int maxPos, const char *s,
int *length) {
if (regExp) {
if (!pre)
- pre = new RESearch();
+ pre = new RESearch(&charClass);
if (!pre)
return -1;
@@ -1266,27 +1266,11 @@ void Document::ChangeCase(Range r, bool makeUpperCase) {
}
void Document::SetDefaultCharClasses(bool includeWordClass) {
- // Initialize all char classes to default values
- for (int ch = 0; ch < 256; ch++) {
- if (ch == '\r' || ch == '\n')
- charClass[ch] = ccNewLine;
- else if (ch < 0x20 || ch == ' ')
- charClass[ch] = ccSpace;
- else if (includeWordClass && (ch >= 0x80 || isalnum(ch) || ch == '_'))
- charClass[ch] = ccWord;
- else
- charClass[ch] = ccPunctuation;
- }
+ charClass.SetDefaultCharClasses(includeWordClass);
}
-void Document::SetCharClasses(const unsigned char *chars, charClassification newCharClass) {
- // Apply the newCharClass to the specifed chars
- if (chars) {
- while (*chars) {
- charClass[*chars] = newCharClass;
- chars++;
- }
- }
+void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) {
+ charClass.SetCharClasses(chars, newCharClass);
}
void Document::SetStylingBits(int bits) {
@@ -1430,7 +1414,7 @@ void Document::NotifyModified(DocModification mh) {
}
bool Document::IsWordPartSeparator(char ch) {
- return (WordCharClass(ch) == ccWord) && IsPunctuation(ch);
+ return (WordCharClass(ch) == CharClassify::ccWord) && IsPunctuation(ch);
}
int Document::WordPartLeft(int pos) {
diff --git a/src/Document.h b/src/Document.h
index 6ff858b92..d774d5664 100644
--- a/src/Document.h
+++ b/src/Document.h
@@ -93,7 +93,7 @@ public:
private:
int refCount;
CellBuffer cb;
- charClassification charClass[256];
+ CharClassify charClass;
char stylingMask;
int endStyled;
int styleClock;
@@ -207,7 +207,7 @@ public:
void ChangeCase(Range r, bool makeUpperCase);
void SetDefaultCharClasses(bool includeWordClass);
- void SetCharClasses(const unsigned char *chars, charClassification newCharClass);
+ void SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass);
void SetStylingBits(int bits);
void StartStyling(int position, char mask);
bool SetStyleFor(int length, char style);
@@ -239,7 +239,7 @@ public:
private:
void CheckReadOnly();
- charClassification WordCharClass(unsigned char ch);
+ CharClassify::cc WordCharClass(unsigned char ch);
bool IsWordStartAt(int pos);
bool IsWordEndAt(int pos);
bool IsWordAt(int start, int end);
diff --git a/src/DocumentAccessor.cxx b/src/DocumentAccessor.cxx
index f479ce025..c695c5f51 100644
--- a/src/DocumentAccessor.cxx
+++ b/src/DocumentAccessor.cxx
@@ -18,6 +18,7 @@
#include "DocumentAccessor.h"
#include "CellBuffer.h"
#include "Scintilla.h"
+#include "CharClassify.h"
#include "Document.h"
DocumentAccessor::~DocumentAccessor() {
diff --git a/src/Editor.cxx b/src/Editor.cxx
index 8780a128f..08693815f 100644
--- a/src/Editor.cxx
+++ b/src/Editor.cxx
@@ -26,6 +26,7 @@
#include "LineMarker.h"
#include "Style.h"
#include "ViewStyle.h"
+#include "CharClassify.h"
#include "Document.h"
#include "Editor.h"
@@ -6043,14 +6044,14 @@ sptr_t Editor::WndProc(unsigned int iMessage, uptr_t wParam, sptr_t lParam) {
pdoc->SetDefaultCharClasses(false);
if (lParam == 0)
return 0;
- pdoc->SetCharClasses(reinterpret_cast<unsigned char *>(lParam), Document::ccWord);
+ pdoc->SetCharClasses(reinterpret_cast<unsigned char *>(lParam), CharClassify::ccWord);
}
break;
case SCI_SETWHITESPACECHARS: {
if (lParam == 0)
return 0;
- pdoc->SetCharClasses(reinterpret_cast<unsigned char *>(lParam), Document::ccSpace);
+ pdoc->SetCharClasses(reinterpret_cast<unsigned char *>(lParam), CharClassify::ccSpace);
}
break;
diff --git a/src/RESearch.cxx b/src/RESearch.cxx
index b7ea71bfb..f1fda7304 100644
--- a/src/RESearch.cxx
+++ b/src/RESearch.cxx
@@ -174,8 +174,14 @@
* matches: foo-foo fo-fo fob-fob foobar-foobar ...
*/
+#include "CharClassify.h"
#include "RESearch.h"
+// Shut up annoying Visual C++ warnings:
+#ifdef _MSC_VER
+#pragma warning(disable: 4514)
+#endif
+
#define OKP 1
#define NOP 0
@@ -206,7 +212,15 @@ const char bitarr[] = {1,2,4,8,16,32,64,'\200'};
#define badpat(x) (*nfa = END, x)
-RESearch::RESearch() {
+/*
+ * character classification table for word boundary operators BOW
+ * and EOW is passed in by the creator of this object (Scintilla
+ * Document). The Document default state is that word chars are:
+ * 0-9,a-z, A-Z and _
+ */
+
+RESearch::RESearch(CharClassify *charClassTable) {
+ charClass = charClassTable;
Init();
}
@@ -287,7 +301,7 @@ const char *RESearch::Compile(const char *pat, int length, bool caseSensitive, b
char *mp=nfa; /* nfa pointer */
char *lp; /* saved pointer.. */
char *sp=nfa; /* another one.. */
- char *mpMax = mp + MAXNFA - BITBLK - 10;
+ char *mpMax = mp + MAXNFA - BITBLK - 10;
int tagi = 0; /* tag stack index */
int tagc = 1; /* actual tag count */
@@ -624,33 +638,6 @@ int RESearch::Execute(CharacterIndexer &ci, int lp, int endp) {
extern void re_fail(char *,char);
-/*
- * character classification table for word boundary operators BOW
- * and EOW. the reason for not using ctype macros is that we can
- * let the user add into our own table. see RESearch::ModifyWord. This table
- * is not in the bitset form, since we may wish to extend it in the
- * future for other character classifications.
- *
- * TRUE for 0-9 A-Z a-z _
- */
-static char chrtyp[MAXCHR] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
- 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 0, 0, 0, 0, 1, 0, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 0, 0, 0, 0, 0
- };
-
-#define inascii(x) (0177&(x))
-#define iswordc(x) chrtyp[inascii(x)]
#define isinset(x,y) ((x)[((y)&BLKIND)>>3] & bitarr[(y)&BITIND])
/*
@@ -757,35 +744,6 @@ int RESearch::PMatch(CharacterIndexer &ci, int lp, int endp, char *ap) {
}
/*
- * RESearch::ModifyWord:
- * add new characters into the word table to change RESearch::Execute's
- * understanding of what a word should look like. Note that we
- * only accept additions into the word definition.
- *
- * If the string parameter is 0 or null string, the table is
- * reset back to the default containing A-Z a-z 0-9 _. [We use
- * the compact bitset representation for the default table]
- */
-
-static char deftab[16] = {
- 0, 0, 0, 0, 0, 0, '\377', 003, '\376', '\377', '\377', '\207',
- '\376', '\377', '\377', 007
-};
-
-void RESearch::ModifyWord(char *s) {
- int i;
-
- if (!s || !*s) {
- for (i = 0; i < MAXCHR; i++)
- if (!isinset(deftab,i))
- iswordc(i) = 0;
- }
- else
- while(*s)
- iswordc(*s++) = 1;
-}
-
-/*
* RESearch::Substitute:
* substitute the matched portions of the src in dst.
*
diff --git a/src/RESearch.h b/src/RESearch.h
index a558b371d..25205951f 100644
--- a/src/RESearch.h
+++ b/src/RESearch.h
@@ -18,7 +18,7 @@
#define BITBLK MAXCHR/CHRBIT
class CharacterIndexer {
-public:
+public:
virtual char CharAt(int index)=0;
virtual ~CharacterIndexer() {
}
@@ -27,7 +27,7 @@ public:
class RESearch {
public:
- RESearch();
+ RESearch(CharClassify *charClassTable);
~RESearch();
void Init();
void Clear();
@@ -36,7 +36,6 @@ public:
void ChSetWithCase(char c, bool caseSensitive);
const char *Compile(const char *pat, int length, bool caseSensitive, bool posix);
int Execute(CharacterIndexer &ci, int lp, int endp);
- void ModifyWord(char *s);
int Substitute(CharacterIndexer &ci, char *src, char *dst);
enum {MAXTAG=10};
@@ -57,6 +56,10 @@ private:
char bittab[BITBLK]; /* bit table for CCL */
/* pre-set bits... */
int failure;
+ CharClassify *charClass;
+ bool iswordc(unsigned char x) {
+ return charClass->IsWord(x);
+ }
};
#endif
diff --git a/src/ScintillaBase.cxx b/src/ScintillaBase.cxx
index 0ca5002a7..2d13efaba 100644
--- a/src/ScintillaBase.cxx
+++ b/src/ScintillaBase.cxx
@@ -31,6 +31,7 @@
#include "Style.h"
#include "ViewStyle.h"
#include "AutoComplete.h"
+#include "CharClassify.h"
#include "Document.h"
#include "Editor.h"
#include "ScintillaBase.h"