aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authornyamatongwe <unknown>2010-07-13 21:31:10 +1000
committernyamatongwe <unknown>2010-07-13 21:31:10 +1000
commit4a42ef938d5cc3b13f42b4a3f4e7310a6b5cbf4b (patch)
tree640b660b0e2dc77efb9531e2ab21218f589b4362
parente57e6904030c8df384b5f4b33b892ffa5dad4fe6 (diff)
downloadscintilla-mirror-4a42ef938d5cc3b13f42b4a3f4e7310a6b5cbf4b.tar.gz
Changed files for new lexer design.
-rw-r--r--lexlib/Accessor.h86
-rw-r--r--lexlib/CharacterSet.h100
-rw-r--r--lexlib/PropSetSimple.h4
-rw-r--r--lexlib/StyleContext.cxx9
-rw-r--r--lexlib/StyleContext.h42
5 files changed, 139 insertions, 102 deletions
diff --git a/lexlib/Accessor.h b/lexlib/Accessor.h
index d9db9c7bf..2f28c1acd 100644
--- a/lexlib/Accessor.h
+++ b/lexlib/Accessor.h
@@ -1,79 +1,35 @@
// Scintilla source code edit control
/** @file Accessor.h
- ** Rapid easy access to contents of a Scintilla.
+ ** Interfaces between Scintilla and lexers.
**/
-// Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
+// Copyright 1998-2010 by Neil Hodgson <neilh@scintilla.org>
// The License.txt file describes the conditions under which this software may be distributed.
+#ifndef ACCESSOR_H
+#define ACCESSOR_H
+
+#ifdef SCI_NAMESPACE
+namespace Scintilla {
+#endif
+
enum { wsSpace = 1, wsTab = 2, wsSpaceTab = 4, wsInconsistent=8};
class Accessor;
+class WordList;
+class PropSetSimple;
typedef bool (*PFNIsCommentLeader)(Accessor &styler, int pos, int len);
-/**
- * Interface to data in a Scintilla.
- */
-class Accessor {
-protected:
- enum {extremePosition=0x7FFFFFFF};
- /** @a bufferSize is a trade off between time taken to copy the characters
- * and retrieval overhead.
- * @a slopSize positions the buffer before the desired position
- * in case there is some backtracking. */
- enum {bufferSize=4000, slopSize=bufferSize/8};
- char buf[bufferSize+1];
- int startPos;
- int endPos;
- int codePage;
-
- virtual bool InternalIsLeadByte(char ch)=0;
- virtual void Fill(int position)=0;
-
+class Accessor : public LexAccessor {
public:
- Accessor() : startPos(extremePosition), endPos(0), codePage(0) {}
- virtual ~Accessor() {}
- char operator[](int position) {
- if (position < startPos || position >= endPos) {
- Fill(position);
- }
- return buf[position - startPos];
- }
- /** Safe version of operator[], returning a defined value for invalid position. */
- char SafeGetCharAt(int position, char chDefault=' ') {
- if (position < startPos || position >= endPos) {
- Fill(position);
- if (position < startPos || position >= endPos) {
- // Position is outside range of document
- return chDefault;
- }
- }
- return buf[position - startPos];
- }
- bool IsLeadByte(char ch) {
- return codePage && InternalIsLeadByte(ch);
- }
- void SetCodePage(int codePage_) { codePage = codePage_; }
+ PropSetSimple *pprops;
+ Accessor(IDocument *pAccess_, PropSetSimple *pprops_);
+ int GetPropertyInt(const char *, int defaultValue=0);
+ int IndentAmount(int line, int *flags, PFNIsCommentLeader pfnIsCommentLeader = 0);
+};
- virtual bool Match(int pos, const char *s)=0;
- virtual char StyleAt(int position)=0;
- virtual int GetLine(int position)=0;
- virtual int LineStart(int line)=0;
- virtual int LevelAt(int line)=0;
- virtual int Length()=0;
- virtual void Flush()=0;
- virtual int GetLineState(int line)=0;
- virtual int SetLineState(int line, int state)=0;
- virtual int GetPropertyInt(const char *key, int defaultValue=0)=0;
- virtual char *GetProperties()=0;
+#ifdef SCI_NAMESPACE
+}
+#endif
- // Style setting
- virtual void StartAt(unsigned int start, char chMask=31)=0;
- virtual void SetFlags(char chFlags_, char chWhile_)=0;
- virtual unsigned int GetStartSegment()=0;
- virtual void StartSegment(unsigned int pos)=0;
- virtual void ColourTo(unsigned int pos, int chAttr)=0;
- virtual void SetLevel(int line, int level)=0;
- virtual int IndentAmount(int line, int *flags, PFNIsCommentLeader pfnIsCommentLeader = 0)=0;
- virtual void IndicatorFill(int start, int end, int indicator, int value)=0;
-};
+#endif
diff --git a/lexlib/CharacterSet.h b/lexlib/CharacterSet.h
index 9b8869635..18cb0aa93 100644
--- a/lexlib/CharacterSet.h
+++ b/lexlib/CharacterSet.h
@@ -5,6 +5,13 @@
// Copyright 2007 by Neil Hodgson <neilh@scintilla.org>
// The License.txt file describes the conditions under which this software may be distributed.
+#ifndef CHARACTERSET_H
+#define CHARACTERSET_H
+
+#ifdef SCI_NAMESPACE
+namespace Scintilla {
+#endif
+
class CharacterSet {
int size;
bool valueAfter;
@@ -39,21 +46,104 @@ public:
size = 0;
}
void Add(int val) {
- PLATFORM_ASSERT(val >= 0);
- PLATFORM_ASSERT(val < size);
+ assert(val >= 0);
+ assert(val < size);
bset[val] = true;
}
void AddString(const char *CharacterSet) {
for (const char *cp=CharacterSet; *cp; cp++) {
int val = static_cast<unsigned char>(*cp);
- PLATFORM_ASSERT(val >= 0);
- PLATFORM_ASSERT(val < size);
+ assert(val >= 0);
+ assert(val < size);
bset[val] = true;
}
}
bool Contains(int val) const {
- PLATFORM_ASSERT(val >= 0);
+ assert(val >= 0);
if (val < 0) return false;
return (val < size) ? bset[val] : valueAfter;
}
};
+
+// Functions for classifying characters
+
+inline bool IsASpace(int ch) {
+ return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d));
+}
+
+inline bool IsASpaceOrTab(int ch) {
+ return (ch == ' ') || (ch == '\t');
+}
+
+inline bool IsADigit(int ch) {
+ return (ch >= '0') && (ch <= '9');
+}
+
+inline bool IsADigit(int ch, int base) {
+ if (base <= 10) {
+ return (ch >= '0') && (ch < '0' + base);
+ } else {
+ return ((ch >= '0') && (ch <= '9')) ||
+ ((ch >= 'A') && (ch < 'A' + base - 10)) ||
+ ((ch >= 'a') && (ch < 'a' + base - 10));
+ }
+}
+
+inline bool IsASCII(int ch) {
+ return ch < 0x80;
+}
+
+inline bool IsAlphaNumeric(int ch) {
+ return
+ ((ch >= '0') && (ch <= '9')) ||
+ ((ch >= 'a') && (ch <= 'z')) ||
+ ((ch >= 'A') && (ch <= 'Z'));
+}
+
+/**
+ * Check if a character is a space.
+ * This is ASCII specific but is safe with chars >= 0x80.
+ */
+inline bool isspacechar(int ch) {
+ return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d));
+}
+
+inline bool iswordchar(int ch) {
+ return IsASCII(ch) && (IsAlphaNumeric(ch) || ch == '.' || ch == '_');
+}
+
+inline bool iswordstart(int ch) {
+ return IsASCII(ch) && (IsAlphaNumeric(ch) || ch == '_');
+}
+
+inline bool isoperator(int ch) {
+ if (IsASCII(ch) && IsAlphaNumeric(ch))
+ return false;
+ // '.' left out as it is used to make up numbers
+ if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
+ ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
+ ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
+ ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
+ ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
+ ch == '?' || ch == '!' || ch == '.' || ch == '~')
+ return true;
+ return false;
+}
+
+// Simple case functions for ASCII.
+
+inline char MakeUpperCase(char ch) {
+ if (ch < 'a' || ch > 'z')
+ return ch;
+ else
+ return static_cast<char>(ch - 'a' + 'A');
+}
+
+int CompareCaseInsensitive(const char *a, const char *b);
+int CompareNCaseInsensitive(const char *a, const char *b, size_t len);
+
+#ifdef SCI_NAMESPACE
+}
+#endif
+
+#endif
diff --git a/lexlib/PropSetSimple.h b/lexlib/PropSetSimple.h
index 1674cfb9e..b79873720 100644
--- a/lexlib/PropSetSimple.h
+++ b/lexlib/PropSetSimple.h
@@ -12,7 +12,7 @@
namespace Scintilla {
#endif
-class PropSetSimple : public PropertyGet {
+class PropSetSimple {
void *impl;
void Set(const char *keyVal);
public:
@@ -22,7 +22,7 @@ public:
void SetMultiple(const char *);
const char *Get(const char *key) const;
char *Expanded(const char *key) const;
- char *ToString() const;
+ int GetExpanded(const char *key, char *result) const;
int GetInt(const char *key, int defaultValue=0) const;
};
diff --git a/lexlib/StyleContext.cxx b/lexlib/StyleContext.cxx
index 4a1f71622..cf59fdd24 100644
--- a/lexlib/StyleContext.cxx
+++ b/lexlib/StyleContext.cxx
@@ -9,10 +9,11 @@
#include <string.h>
#include <ctype.h>
#include <stdio.h>
+#include <assert.h>
-#include "Platform.h"
+#include "ILexer.h"
-#include "PropSet.h"
+#include "LexAccessor.h"
#include "Accessor.h"
#include "StyleContext.h"
@@ -22,7 +23,7 @@ using namespace Scintilla;
static void getRange(unsigned int start,
unsigned int end,
- Accessor &styler,
+ LexAccessor &styler,
char *s,
unsigned int len) {
unsigned int i = 0;
@@ -39,7 +40,7 @@ void StyleContext::GetCurrent(char *s, unsigned int len) {
static void getRangeLowered(unsigned int start,
unsigned int end,
- Accessor &styler,
+ LexAccessor &styler,
char *s,
unsigned int len) {
unsigned int i = 0;
diff --git a/lexlib/StyleContext.h b/lexlib/StyleContext.h
index 4e175bc29..8b1345432 100644
--- a/lexlib/StyleContext.h
+++ b/lexlib/StyleContext.h
@@ -5,16 +5,26 @@
// Copyright 1998-2004 by Neil Hodgson <neilh@scintilla.org>
// This file is in the public domain.
+#ifndef STYLECONTEXT_H
+#define STYLECONTEXT_H
+
#ifdef SCI_NAMESPACE
namespace Scintilla {
#endif
+static inline int MakeLowerCase(int ch) {
+ if (ch < 'A' || ch > 'Z')
+ return ch;
+ else
+ return ch - 'A' + 'a';
+}
+
// All languages handled so far can treat all characters >= 0x80 as one class
// which just continues the current token or starts an identifier if in default.
// DBCS treated specially as the second character can be < 0x80 and hence
// syntactically significant. UTF-8 avoids this as all trail bytes are >= 0x80
class StyleContext {
- Accessor &styler;
+ LexAccessor &styler;
unsigned int endPos;
StyleContext &operator=(const StyleContext &);
void GetNextChar(unsigned int pos) {
@@ -41,7 +51,7 @@ public:
int chNext;
StyleContext(unsigned int startPos, unsigned int length,
- int initStyle, Accessor &styler_, char chMask=31) :
+ int initStyle, LexAccessor &styler_, char chMask=31) :
styler(styler_),
endPos(startPos + length),
currentPos(startPos),
@@ -131,15 +141,15 @@ public:
return true;
}
bool MatchIgnoreCase(const char *s) {
- if (tolower(ch) != static_cast<unsigned char>(*s))
+ if (MakeLowerCase(ch) != static_cast<unsigned char>(*s))
return false;
s++;
- if (tolower(chNext) != static_cast<unsigned char>(*s))
+ if (MakeLowerCase(chNext) != static_cast<unsigned char>(*s))
return false;
s++;
for (int n=2; *s; n++) {
if (static_cast<unsigned char>(*s) !=
- tolower(static_cast<unsigned char>(styler.SafeGetCharAt(currentPos+n))))
+ MakeLowerCase(static_cast<unsigned char>(styler.SafeGetCharAt(currentPos+n))))
return false;
s++;
}
@@ -154,24 +164,4 @@ public:
}
#endif
-inline bool IsASpace(unsigned int ch) {
- return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d));
-}
-
-inline bool IsASpaceOrTab(unsigned int ch) {
- return (ch == ' ') || (ch == '\t');
-}
-
-inline bool IsADigit(unsigned int ch) {
- return (ch >= '0') && (ch <= '9');
-}
-
-inline bool IsADigit(unsigned int ch, unsigned int base) {
- if (base <= 10) {
- return (ch >= '0') && (ch < '0' + base);
- } else {
- return ((ch >= '0') && (ch <= '9')) ||
- ((ch >= 'A') && (ch < 'A' + base - 10)) ||
- ((ch >= 'a') && (ch < 'a' + base - 10));
- }
-}
+#endif