1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
|
--
-- LSPIPAT - LUA SPIPAT WRAPPER
-- Copyright (C) 2010, Robin Haberkorn
-- License: LGPL
--
-- ADDITIONAL METHODS IMPLEMENTED IN LUA
--
module("spipat", package.seeall)
--
-- Module and Pattern methods
--
function ssub(str, pattern, repl, n, flags)
assert(type(repl) == "string" or type(repl) == "function",
"Invalid replacement specified!")
assert(type(n) == "nil" or type(n) == "number",
"Invalid repeat value specified!")
local cMatches = 0
repeat
-- cares about the remaining checks
local s, e = smatch(str, pattern, flags)
if not s then break end
local res = type(repl) == "string" and repl or repl(s, e)
assert(type(res) == "nil" or type(res) == "string",
"Replacement function returned invalid value!")
if res then str = str:sub(1, s - 1)..res..str:sub(e + 1) end
if type(n) == "number" then n = n - 1 end
cMatches = cMatches + 1
until n == 0
return str, cMatches
end
function siter(str, pattern, flags)
local endPos = 0
pattern = Pos(function() return endPos end) * Arb() *
#function(p) startPos = p + 1 end * pattern * #function(p) endPos = p end
return function()
if not smatch(str, pattern, flags) then return end
return startPos, endPos
end
end
--
-- Primitives (shortcuts for deferring global variables)
--
local function genericSetGlobal(val, name) _G[name] = val end
function _Setcur(name) return Setcur(genericSetGlobal, name) end
_G._Setcur = _Setcur
-- unfortunately, we can't register this as __len to strings...
-- NOTE: if global `name' is of an invalid type,
-- lspipat will raise an error automatically
local function genericGetGlobal(name) return _G[name] end
for _, prim in ipairs{
"Pred", -- _Pred will be registered as __unm to strings
"Any", "Break", "BreakX", "NotAny", "NSpan", "Span", -- string primitives
"Len", "Pos", "RPos", "RTab", "Tab" -- number primitives
} do
local _prim = "_"..prim
spipat[_prim] = function(name) return spipat[prim](genericGetGlobal, name) end
_G[_prim] = spipat[_prim]
end
-- FIXME: local cookie support for assignments -> shortcuts for assignment of global variables
--
-- POSIX Extended Regular Expressions To SPITBOL Pattern Compiler
--
function RegExp(str, captures)
assert(type(captures) == "nil" or type(captures) == "table",
"Invalid captures table given!")
local stack = {}
local function push(v) table.insert(stack, v) end
local function pop() return table.remove(stack) end
local r2p = {["."] = Len(1), ["^"] = Pos(0), ["$"] = RPos(0)}
local set
local function add(c) table.insert(set, c) return c end
local classes = {
blank = " \t",
punct = [[-!"#$%&'()*+,./:;<=>?@[\]^_`{|}~]],
lower = "abcdefghijklmnopqrstuvwxyz",
digit = "0123456789"
}
classes.upper = classes.lower:upper()
classes.alpha = classes.upper..classes.lower
classes.alnum = classes.alpha..classes.digit
classes.word = classes.alnum.."_"
classes.xdigit = classes.upper:sub(1, 6)..classes.lower:sub(1, 6)..classes.digit
classes.space = classes.blank.."\r\n\v\f"
-- TODO: some character classes are still missing...
local function exp() return exp end
local function seq() return seq end
local atom = ( "\\" * (Len(1) % push)
+ NotAny(".[]^$()*+?|{}") % push
+ Any(".^$") % function(r) push(r2p[r]) end
+ "[" * ( "^" * -function() push(NotAny) set = {} end
+ -function() push(Any) set = {} end )
* (topattern("]") % add + "")
* Arbno( "[:" * (Break(":") % push) * ":]" * -function() return add(classes[pop()]) ~= nil end
+ Len(1) * "-" * Len(1)
% function(range) for c = range:byte(), range:byte(3) do add(string.char(c)) end end
+ Len(1) % add )
* "]" * -function() push(pop()(table.concat(set))) end
+ "(" * -exp * ")"
* -function() if captures then
push(topattern(pop()) / function(cap) table.insert(captures, cap) end) end end )
* ( "*" * ( "?" * -function() push(Arbno(pop())) end
+ -function() local r; r = pop() * -function() return r end + ""
push(r) end )
+ "+" * -function() local r; r = pop() * (-function() return r end + "")
push(r) end
+ "?" * -function() push(topattern("") + pop()) end
+ "{" * ( Span(classes.digit) % push ) * ","
* ( Span(classes.digit)
% function(max) local min, c = pop()
local r; r = pop() * -function() c = c + 1
return c >= tonumber(max) or r end + ""
push(-function() c = 0 end * r * -function() return c >= tonumber(min) end) end )
* "}"
+ "" )
seq = ( atom * -function() local rvalue, lvalue = pop(), pop()
push(type(lvalue) == "string" and type(rvalue) == "string" and
lvalue..rvalue or lvalue * rvalue) end
* (-seq + "") + "" )
* ( "|" * -exp * -function() local pat = pop() push(pop() + topattern(pat)) end
+ "" )
exp = atom * seq
assert(smatch(str, exp * RPos(0), match_anchored),
"Invalid regular expression!")
return stack[1]
end
_G.RegExp = RegExp
-- load C core, also registers Lua functions into metatables we cannot
-- access from Lua
require "lspipat.core"
|