diff options
| author | mitchell <unknown> | 2018-03-11 23:04:41 -0400 | 
|---|---|---|
| committer | mitchell <unknown> | 2018-03-11 23:04:41 -0400 | 
| commit | 519b7328b66c4c84f03893a31e4be5ba6b1395f2 (patch) | |
| tree | 2055cd79006357e94c185f341d0df17b9a8769eb /lua/src/llex.c | |
| parent | c0373e036e965a70045971e2abc582cb4bf12a4e (diff) | |
| download | scintilla-mirror-519b7328b66c4c84f03893a31e4be5ba6b1395f2.tar.gz | |
Added optional Lua lexer support.
This support is disabled by default and must be enabled via compile-time option.
Diffstat (limited to 'lua/src/llex.c')
| -rw-r--r-- | lua/src/llex.c | 565 | 
1 files changed, 565 insertions, 0 deletions
| diff --git a/lua/src/llex.c b/lua/src/llex.c new file mode 100644 index 000000000..70328273f --- /dev/null +++ b/lua/src/llex.c @@ -0,0 +1,565 @@ +/* +** $Id: llex.c,v 2.96 2016/05/02 14:02:12 roberto Exp $ +** Lexical Analyzer +** See Copyright Notice in lua.h +*/ + +#define llex_c +#define LUA_CORE + +#include "lprefix.h" + + +#include <locale.h> +#include <string.h> + +#include "lua.h" + +#include "lctype.h" +#include "ldebug.h" +#include "ldo.h" +#include "lgc.h" +#include "llex.h" +#include "lobject.h" +#include "lparser.h" +#include "lstate.h" +#include "lstring.h" +#include "ltable.h" +#include "lzio.h" + + + +#define next(ls) (ls->current = zgetc(ls->z)) + + + +#define currIsNewline(ls)	(ls->current == '\n' || ls->current == '\r') + + +/* ORDER RESERVED */ +static const char *const luaX_tokens [] = { +    "and", "break", "do", "else", "elseif", +    "end", "false", "for", "function", "goto", "if", +    "in", "local", "nil", "not", "or", "repeat", +    "return", "then", "true", "until", "while", +    "//", "..", "...", "==", ">=", "<=", "~=", +    "<<", ">>", "::", "<eof>", +    "<number>", "<integer>", "<name>", "<string>" +}; + + +#define save_and_next(ls) (save(ls, ls->current), next(ls)) + + +static l_noret lexerror (LexState *ls, const char *msg, int token); + + +static void save (LexState *ls, int c) { +  Mbuffer *b = ls->buff; +  if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b)) { +    size_t newsize; +    if (luaZ_sizebuffer(b) >= MAX_SIZE/2) +      lexerror(ls, "lexical element too long", 0); +    newsize = luaZ_sizebuffer(b) * 2; +    luaZ_resizebuffer(ls->L, b, newsize); +  } +  b->buffer[luaZ_bufflen(b)++] = cast(char, c); +} + + +void luaX_init (lua_State *L) { +  int i; +  TString *e = luaS_newliteral(L, LUA_ENV);  /* create env name */ +  luaC_fix(L, obj2gco(e));  /* never collect this name */ +  for (i=0; i<NUM_RESERVED; i++) { +    TString *ts = luaS_new(L, luaX_tokens[i]); +    luaC_fix(L, obj2gco(ts));  /* reserved words are never collected */ +    ts->extra = cast_byte(i+1);  /* reserved word */ +  } +} + + +const char *luaX_token2str (LexState *ls, int token) { +  if (token < FIRST_RESERVED) {  /* single-byte symbols? */ +    lua_assert(token == cast_uchar(token)); +    return luaO_pushfstring(ls->L, "'%c'", token); +  } +  else { +    const char *s = luaX_tokens[token - FIRST_RESERVED]; +    if (token < TK_EOS)  /* fixed format (symbols and reserved words)? */ +      return luaO_pushfstring(ls->L, "'%s'", s); +    else  /* names, strings, and numerals */ +      return s; +  } +} + + +static const char *txtToken (LexState *ls, int token) { +  switch (token) { +    case TK_NAME: case TK_STRING: +    case TK_FLT: case TK_INT: +      save(ls, '\0'); +      return luaO_pushfstring(ls->L, "'%s'", luaZ_buffer(ls->buff)); +    default: +      return luaX_token2str(ls, token); +  } +} + + +static l_noret lexerror (LexState *ls, const char *msg, int token) { +  msg = luaG_addinfo(ls->L, msg, ls->source, ls->linenumber); +  if (token) +    luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token)); +  luaD_throw(ls->L, LUA_ERRSYNTAX); +} + + +l_noret luaX_syntaxerror (LexState *ls, const char *msg) { +  lexerror(ls, msg, ls->t.token); +} + + +/* +** creates a new string and anchors it in scanner's table so that +** it will not be collected until the end of the compilation +** (by that time it should be anchored somewhere) +*/ +TString *luaX_newstring (LexState *ls, const char *str, size_t l) { +  lua_State *L = ls->L; +  TValue *o;  /* entry for 'str' */ +  TString *ts = luaS_newlstr(L, str, l);  /* create new string */ +  setsvalue2s(L, L->top++, ts);  /* temporarily anchor it in stack */ +  o = luaH_set(L, ls->h, L->top - 1); +  if (ttisnil(o)) {  /* not in use yet? */ +    /* boolean value does not need GC barrier; +       table has no metatable, so it does not need to invalidate cache */ +    setbvalue(o, 1);  /* t[string] = true */ +    luaC_checkGC(L); +  } +  else {  /* string already present */ +    ts = tsvalue(keyfromval(o));  /* re-use value previously stored */ +  } +  L->top--;  /* remove string from stack */ +  return ts; +} + + +/* +** increment line number and skips newline sequence (any of +** \n, \r, \n\r, or \r\n) +*/ +static void inclinenumber (LexState *ls) { +  int old = ls->current; +  lua_assert(currIsNewline(ls)); +  next(ls);  /* skip '\n' or '\r' */ +  if (currIsNewline(ls) && ls->current != old) +    next(ls);  /* skip '\n\r' or '\r\n' */ +  if (++ls->linenumber >= MAX_INT) +    lexerror(ls, "chunk has too many lines", 0); +} + + +void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source, +                    int firstchar) { +  ls->t.token = 0; +  ls->L = L; +  ls->current = firstchar; +  ls->lookahead.token = TK_EOS;  /* no look-ahead token */ +  ls->z = z; +  ls->fs = NULL; +  ls->linenumber = 1; +  ls->lastline = 1; +  ls->source = source; +  ls->envn = luaS_newliteral(L, LUA_ENV);  /* get env name */ +  luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER);  /* initialize buffer */ +} + + + +/* +** ======================================================= +** LEXICAL ANALYZER +** ======================================================= +*/ + + +static int check_next1 (LexState *ls, int c) { +  if (ls->current == c) { +    next(ls); +    return 1; +  } +  else return 0; +} + + +/* +** Check whether current char is in set 'set' (with two chars) and +** saves it +*/ +static int check_next2 (LexState *ls, const char *set) { +  lua_assert(set[2] == '\0'); +  if (ls->current == set[0] || ls->current == set[1]) { +    save_and_next(ls); +    return 1; +  } +  else return 0; +} + + +/* LUA_NUMBER */ +/* +** this function is quite liberal in what it accepts, as 'luaO_str2num' +** will reject ill-formed numerals. +*/ +static int read_numeral (LexState *ls, SemInfo *seminfo) { +  TValue obj; +  const char *expo = "Ee"; +  int first = ls->current; +  lua_assert(lisdigit(ls->current)); +  save_and_next(ls); +  if (first == '0' && check_next2(ls, "xX"))  /* hexadecimal? */ +    expo = "Pp"; +  for (;;) { +    if (check_next2(ls, expo))  /* exponent part? */ +      check_next2(ls, "-+");  /* optional exponent sign */ +    if (lisxdigit(ls->current)) +      save_and_next(ls); +    else if (ls->current == '.') +      save_and_next(ls); +    else break; +  } +  save(ls, '\0'); +  if (luaO_str2num(luaZ_buffer(ls->buff), &obj) == 0)  /* format error? */ +    lexerror(ls, "malformed number", TK_FLT); +  if (ttisinteger(&obj)) { +    seminfo->i = ivalue(&obj); +    return TK_INT; +  } +  else { +    lua_assert(ttisfloat(&obj)); +    seminfo->r = fltvalue(&obj); +    return TK_FLT; +  } +} + + +/* +** skip a sequence '[=*[' or ']=*]'; if sequence is well formed, return +** its number of '='s; otherwise, return a negative number (-1 iff there +** are no '='s after initial bracket) +*/ +static int skip_sep (LexState *ls) { +  int count = 0; +  int s = ls->current; +  lua_assert(s == '[' || s == ']'); +  save_and_next(ls); +  while (ls->current == '=') { +    save_and_next(ls); +    count++; +  } +  return (ls->current == s) ? count : (-count) - 1; +} + + +static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) { +  int line = ls->linenumber;  /* initial line (for error message) */ +  save_and_next(ls);  /* skip 2nd '[' */ +  if (currIsNewline(ls))  /* string starts with a newline? */ +    inclinenumber(ls);  /* skip it */ +  for (;;) { +    switch (ls->current) { +      case EOZ: {  /* error */ +        const char *what = (seminfo ? "string" : "comment"); +        const char *msg = luaO_pushfstring(ls->L, +                     "unfinished long %s (starting at line %d)", what, line); +        lexerror(ls, msg, TK_EOS); +        break;  /* to avoid warnings */ +      } +      case ']': { +        if (skip_sep(ls) == sep) { +          save_and_next(ls);  /* skip 2nd ']' */ +          goto endloop; +        } +        break; +      } +      case '\n': case '\r': { +        save(ls, '\n'); +        inclinenumber(ls); +        if (!seminfo) luaZ_resetbuffer(ls->buff);  /* avoid wasting space */ +        break; +      } +      default: { +        if (seminfo) save_and_next(ls); +        else next(ls); +      } +    } +  } endloop: +  if (seminfo) +    seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep), +                                     luaZ_bufflen(ls->buff) - 2*(2 + sep)); +} + + +static void esccheck (LexState *ls, int c, const char *msg) { +  if (!c) { +    if (ls->current != EOZ) +      save_and_next(ls);  /* add current to buffer for error message */ +    lexerror(ls, msg, TK_STRING); +  } +} + + +static int gethexa (LexState *ls) { +  save_and_next(ls); +  esccheck (ls, lisxdigit(ls->current), "hexadecimal digit expected"); +  return luaO_hexavalue(ls->current); +} + + +static int readhexaesc (LexState *ls) { +  int r = gethexa(ls); +  r = (r << 4) + gethexa(ls); +  luaZ_buffremove(ls->buff, 2);  /* remove saved chars from buffer */ +  return r; +} + + +static unsigned long readutf8esc (LexState *ls) { +  unsigned long r; +  int i = 4;  /* chars to be removed: '\', 'u', '{', and first digit */ +  save_and_next(ls);  /* skip 'u' */ +  esccheck(ls, ls->current == '{', "missing '{'"); +  r = gethexa(ls);  /* must have at least one digit */ +  while ((save_and_next(ls), lisxdigit(ls->current))) { +    i++; +    r = (r << 4) + luaO_hexavalue(ls->current); +    esccheck(ls, r <= 0x10FFFF, "UTF-8 value too large"); +  } +  esccheck(ls, ls->current == '}', "missing '}'"); +  next(ls);  /* skip '}' */ +  luaZ_buffremove(ls->buff, i);  /* remove saved chars from buffer */ +  return r; +} + + +static void utf8esc (LexState *ls) { +  char buff[UTF8BUFFSZ]; +  int n = luaO_utf8esc(buff, readutf8esc(ls)); +  for (; n > 0; n--)  /* add 'buff' to string */ +    save(ls, buff[UTF8BUFFSZ - n]); +} + + +static int readdecesc (LexState *ls) { +  int i; +  int r = 0;  /* result accumulator */ +  for (i = 0; i < 3 && lisdigit(ls->current); i++) {  /* read up to 3 digits */ +    r = 10*r + ls->current - '0'; +    save_and_next(ls); +  } +  esccheck(ls, r <= UCHAR_MAX, "decimal escape too large"); +  luaZ_buffremove(ls->buff, i);  /* remove read digits from buffer */ +  return r; +} + + +static void read_string (LexState *ls, int del, SemInfo *seminfo) { +  save_and_next(ls);  /* keep delimiter (for error messages) */ +  while (ls->current != del) { +    switch (ls->current) { +      case EOZ: +        lexerror(ls, "unfinished string", TK_EOS); +        break;  /* to avoid warnings */ +      case '\n': +      case '\r': +        lexerror(ls, "unfinished string", TK_STRING); +        break;  /* to avoid warnings */ +      case '\\': {  /* escape sequences */ +        int c;  /* final character to be saved */ +        save_and_next(ls);  /* keep '\\' for error messages */ +        switch (ls->current) { +          case 'a': c = '\a'; goto read_save; +          case 'b': c = '\b'; goto read_save; +          case 'f': c = '\f'; goto read_save; +          case 'n': c = '\n'; goto read_save; +          case 'r': c = '\r'; goto read_save; +          case 't': c = '\t'; goto read_save; +          case 'v': c = '\v'; goto read_save; +          case 'x': c = readhexaesc(ls); goto read_save; +          case 'u': utf8esc(ls);  goto no_save; +          case '\n': case '\r': +            inclinenumber(ls); c = '\n'; goto only_save; +          case '\\': case '\"': case '\'': +            c = ls->current; goto read_save; +          case EOZ: goto no_save;  /* will raise an error next loop */ +          case 'z': {  /* zap following span of spaces */ +            luaZ_buffremove(ls->buff, 1);  /* remove '\\' */ +            next(ls);  /* skip the 'z' */ +            while (lisspace(ls->current)) { +              if (currIsNewline(ls)) inclinenumber(ls); +              else next(ls); +            } +            goto no_save; +          } +          default: { +            esccheck(ls, lisdigit(ls->current), "invalid escape sequence"); +            c = readdecesc(ls);  /* digital escape '\ddd' */ +            goto only_save; +          } +        } +       read_save: +         next(ls); +         /* go through */ +       only_save: +         luaZ_buffremove(ls->buff, 1);  /* remove '\\' */ +         save(ls, c); +         /* go through */ +       no_save: break; +      } +      default: +        save_and_next(ls); +    } +  } +  save_and_next(ls);  /* skip delimiter */ +  seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1, +                                   luaZ_bufflen(ls->buff) - 2); +} + + +static int llex (LexState *ls, SemInfo *seminfo) { +  luaZ_resetbuffer(ls->buff); +  for (;;) { +    switch (ls->current) { +      case '\n': case '\r': {  /* line breaks */ +        inclinenumber(ls); +        break; +      } +      case ' ': case '\f': case '\t': case '\v': {  /* spaces */ +        next(ls); +        break; +      } +      case '-': {  /* '-' or '--' (comment) */ +        next(ls); +        if (ls->current != '-') return '-'; +        /* else is a comment */ +        next(ls); +        if (ls->current == '[') {  /* long comment? */ +          int sep = skip_sep(ls); +          luaZ_resetbuffer(ls->buff);  /* 'skip_sep' may dirty the buffer */ +          if (sep >= 0) { +            read_long_string(ls, NULL, sep);  /* skip long comment */ +            luaZ_resetbuffer(ls->buff);  /* previous call may dirty the buff. */ +            break; +          } +        } +        /* else short comment */ +        while (!currIsNewline(ls) && ls->current != EOZ) +          next(ls);  /* skip until end of line (or end of file) */ +        break; +      } +      case '[': {  /* long string or simply '[' */ +        int sep = skip_sep(ls); +        if (sep >= 0) { +          read_long_string(ls, seminfo, sep); +          return TK_STRING; +        } +        else if (sep != -1)  /* '[=...' missing second bracket */ +          lexerror(ls, "invalid long string delimiter", TK_STRING); +        return '['; +      } +      case '=': { +        next(ls); +        if (check_next1(ls, '=')) return TK_EQ; +        else return '='; +      } +      case '<': { +        next(ls); +        if (check_next1(ls, '=')) return TK_LE; +        else if (check_next1(ls, '<')) return TK_SHL; +        else return '<'; +      } +      case '>': { +        next(ls); +        if (check_next1(ls, '=')) return TK_GE; +        else if (check_next1(ls, '>')) return TK_SHR; +        else return '>'; +      } +      case '/': { +        next(ls); +        if (check_next1(ls, '/')) return TK_IDIV; +        else return '/'; +      } +      case '~': { +        next(ls); +        if (check_next1(ls, '=')) return TK_NE; +        else return '~'; +      } +      case ':': { +        next(ls); +        if (check_next1(ls, ':')) return TK_DBCOLON; +        else return ':'; +      } +      case '"': case '\'': {  /* short literal strings */ +        read_string(ls, ls->current, seminfo); +        return TK_STRING; +      } +      case '.': {  /* '.', '..', '...', or number */ +        save_and_next(ls); +        if (check_next1(ls, '.')) { +          if (check_next1(ls, '.')) +            return TK_DOTS;   /* '...' */ +          else return TK_CONCAT;   /* '..' */ +        } +        else if (!lisdigit(ls->current)) return '.'; +        else return read_numeral(ls, seminfo); +      } +      case '0': case '1': case '2': case '3': case '4': +      case '5': case '6': case '7': case '8': case '9': { +        return read_numeral(ls, seminfo); +      } +      case EOZ: { +        return TK_EOS; +      } +      default: { +        if (lislalpha(ls->current)) {  /* identifier or reserved word? */ +          TString *ts; +          do { +            save_and_next(ls); +          } while (lislalnum(ls->current)); +          ts = luaX_newstring(ls, luaZ_buffer(ls->buff), +                                  luaZ_bufflen(ls->buff)); +          seminfo->ts = ts; +          if (isreserved(ts))  /* reserved word? */ +            return ts->extra - 1 + FIRST_RESERVED; +          else { +            return TK_NAME; +          } +        } +        else {  /* single-char tokens (+ - / ...) */ +          int c = ls->current; +          next(ls); +          return c; +        } +      } +    } +  } +} + + +void luaX_next (LexState *ls) { +  ls->lastline = ls->linenumber; +  if (ls->lookahead.token != TK_EOS) {  /* is there a look-ahead token? */ +    ls->t = ls->lookahead;  /* use this one */ +    ls->lookahead.token = TK_EOS;  /* and discharge it */ +  } +  else +    ls->t.token = llex(ls, &ls->t.seminfo);  /* read next token */ +} + + +int luaX_lookahead (LexState *ls) { +  lua_assert(ls->lookahead.token == TK_EOS); +  ls->lookahead.token = llex(ls, &ls->lookahead.seminfo); +  return ls->lookahead.token; +} + | 
