diff options
Diffstat (limited to 'src/lspipat.lua')
-rw-r--r-- | src/lspipat.lua | 155 |
1 files changed, 155 insertions, 0 deletions
diff --git a/src/lspipat.lua b/src/lspipat.lua new file mode 100644 index 0000000..9db2082 --- /dev/null +++ b/src/lspipat.lua @@ -0,0 +1,155 @@ +-- +-- LSPIPAT - LUA SPIPAT WRAPPER +-- Copyright (C) 2010, Robin Haberkorn +-- License: LGPL +-- +-- ADDITIONAL METHODS IMPLEMENTED IN LUA +-- + +module("spipat", package.seeall) + +-- +-- Module and Pattern methods +-- + +function ssub(str, pattern, repl, n, flags) + assert(type(repl) == "string" or type(repl) == "function", + "Invalid replacement specified!") + assert(type(n) == "nil" or type(n) == "number", + "Invalid repeat value specified!") + + local cMatches = 0 + repeat + -- cares about the remaining checks + local s, e = smatch(str, pattern, flags) + if not s then break end + + local res = type(repl) == "string" and repl or repl(s, e) + assert(type(res) == "nil" or type(res) == "string", + "Replacement function returned invalid value!") + + if res then str = str:sub(1, s - 1)..res..str:sub(e + 1) end + + if type(n) == "number" then n = n - 1 end + cMatches = cMatches + 1 + until n == 0 + + return str, cMatches +end + +function siter(str, pattern, flags) + local endPos = 0 + pattern = Pos(function() return endPos end) * Arb() * + #function(p) startPos = p + 1 end * pattern * #function(p) endPos = p end + + return function() + if not smatch(str, pattern, flags) then return end + return startPos, endPos + end +end + +-- +-- Primitives (shortcuts for deferring global variables) +-- + +local function genericSetGlobal(val, name) _G[name] = val end + +function _Setcur(name) return Setcur(genericSetGlobal, name) end +_G._Setcur = _Setcur +-- unfortunately, we can't register this as __len to strings... + + -- NOTE: if global `name' is of an invalid type, + -- lspipat will raise an error automatically +local function genericGetGlobal(name) return _G[name] end + +for _, prim in ipairs{ + "Pred", -- _Pred will be registered as __unm to strings + "Any", "Break", "BreakX", "NotAny", "NSpan", "Span", -- string primitives + "Len", "Pos", "RPos", "RTab", "Tab" -- number primitives +} do + local _prim = "_"..prim + + spipat[_prim] = function(name) return spipat[prim](genericGetGlobal, name) end + _G[_prim] = spipat[_prim] +end + +-- FIXME: local cookie support for assignments -> shortcuts for assignment of global variables + +-- +-- POSIX Extended Regular Expressions To SPITBOL Pattern Compiler +-- + +function RegExp(str, captures) + assert(type(captures) == "nil" or type(captures) == "table", + "Invalid captures table given!") + + local stack = {} + local function push(v) table.insert(stack, v) end + local function pop() return table.remove(stack) end + local r2p = {["."] = Len(1), ["^"] = Pos(0), ["$"] = RPos(0)} + + local set + local function add(c) table.insert(set, c) return c end + + local classes = { + blank = " \t", + punct = [[-!"#$%&'()*+,./:;<=>?@[\]^_`{|}~]], + lower = "abcdefghijklmnopqrstuvwxyz", + digit = "0123456789" + } + classes.upper = classes.lower:upper() + classes.alpha = classes.upper..classes.lower + classes.alnum = classes.alpha..classes.digit + classes.word = classes.alnum.."_" + classes.xdigit = classes.upper:sub(1, 6)..classes.lower:sub(1, 6)..classes.digit + classes.space = classes.blank.."\r\n\v\f" + -- TODO: some character classes are still missing... + + local function exp() return exp end + local function seq() return seq end + local atom = ( "\\" * (Len(1) % push) + + NotAny(".[]^$()*+?|{}") % push + + Any(".^$") % function(r) push(r2p[r]) end + + "[" * ( "^" * -function() push(NotAny) set = {} end + + -function() push(Any) set = {} end ) + * (topattern("]") % add + "") + * Arbno( "[:" * (Break(":") % push) * ":]" * -function() return add(classes[pop()]) ~= nil end + + Len(1) * "-" * Len(1) + % function(range) for c = range:byte(), range:byte(3) do add(string.char(c)) end end + + Len(1) % add ) + * "]" * -function() push(pop()(table.concat(set))) end + + "(" * -exp * ")" + * -function() if captures then + push(topattern(pop()) / function(cap) table.insert(captures, cap) end) end end ) + * ( "*" * ( "?" * -function() push(Arbno(pop())) end + + -function() local r; r = pop() * -function() return r end + "" + push(r) end ) + + "+" * -function() local r; r = pop() * (-function() return r end + "") + push(r) end + + "?" * -function() push(topattern("") + pop()) end + + "{" * ( Span(classes.digit) % push ) * "," + * ( Span(classes.digit) + % function(max) local min, c = pop() + local r; r = pop() * -function() c = c + 1 + return c >= tonumber(max) or r end + "" + push(-function() c = 0 end * r * -function() return c >= tonumber(min) end) end ) + * "}" + + "" ) + seq = ( atom * -function() local rvalue, lvalue = pop(), pop() + push(type(lvalue) == "string" and type(rvalue) == "string" and + lvalue..rvalue or lvalue * rvalue) end + * (-seq + "") + "" ) + * ( "|" * -exp * -function() local pat = pop() push(pop() + topattern(pat)) end + + "" ) + exp = atom * seq + + assert(smatch(str, exp * RPos(0), match_anchored), + "Invalid regular expression!") + + return stack[1] +end +_G.RegExp = RegExp + + -- load C core, also registers Lua functions into metatables we cannot + -- access from Lua +require "lspipat.core"
\ No newline at end of file |