aboutsummaryrefslogtreecommitdiff
path: root/src/lspipat.lua
blob: 9db2082ceef925aa7df057e578b2b7f346abb7b1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
--
-- LSPIPAT - LUA SPIPAT WRAPPER
-- Copyright (C) 2010, Robin Haberkorn
-- License: LGPL
--
-- ADDITIONAL METHODS IMPLEMENTED IN LUA
--

module("spipat", package.seeall)

--
-- Module and Pattern methods
--

function ssub(str, pattern, repl, n, flags)
	assert(type(repl) == "string" or type(repl) == "function",
	       "Invalid replacement specified!")
	assert(type(n) == "nil" or type(n) == "number",
	       "Invalid repeat value specified!")

	local cMatches = 0
	repeat
				-- cares about the remaining checks
		local s, e = smatch(str, pattern, flags)
		if not s then break end

		local res = type(repl) == "string" and repl or repl(s, e)
		assert(type(res) == "nil" or type(res) == "string",
		       "Replacement function returned invalid value!")

		if res then str = str:sub(1, s - 1)..res..str:sub(e + 1) end

		if type(n) == "number" then n = n - 1 end
		cMatches = cMatches + 1
	until n == 0

	return str, cMatches
end

function siter(str, pattern, flags)
	local endPos = 0
	pattern = Pos(function() return endPos end) * Arb() *
		  #function(p) startPos = p + 1 end * pattern * #function(p) endPos = p end

	return function()
		if not smatch(str, pattern, flags) then return end
		return startPos, endPos
	end
end

--
-- Primitives (shortcuts for deferring global variables)
--

local function genericSetGlobal(val, name) _G[name] = val end

function _Setcur(name) return Setcur(genericSetGlobal, name) end
_G._Setcur = _Setcur
-- unfortunately, we can't register this as __len to strings...

		-- NOTE: if global `name' is of an invalid type,
		-- lspipat will raise an error automatically
local function genericGetGlobal(name) return _G[name] end

for _, prim in ipairs{
	"Pred",							-- _Pred will be registered as __unm to strings
	"Any", "Break", "BreakX", "NotAny", "NSpan", "Span",	-- string primitives
	"Len", "Pos", "RPos", "RTab", "Tab"			-- number primitives
} do
	local _prim = "_"..prim

	spipat[_prim] = function(name) return spipat[prim](genericGetGlobal, name) end
	_G[_prim] = spipat[_prim]
end

-- FIXME: local cookie support for assignments -> shortcuts for assignment of global variables

--
-- POSIX Extended Regular Expressions To SPITBOL Pattern Compiler
--

function RegExp(str, captures)
	assert(type(captures) == "nil" or type(captures) == "table",
	       "Invalid captures table given!")

	local stack = {}
	local function push(v) table.insert(stack, v) end
	local function pop() return table.remove(stack) end
	local r2p = {["."] = Len(1), ["^"] = Pos(0), ["$"] = RPos(0)}

	local set
	local function add(c) table.insert(set, c) return c end

	local classes = {
		blank = " \t",
		punct = [[-!"#$%&'()*+,./:;<=>?@[\]^_`{|}~]],
		lower = "abcdefghijklmnopqrstuvwxyz",
		digit = "0123456789"
	}
	classes.upper = classes.lower:upper()
	classes.alpha = classes.upper..classes.lower
	classes.alnum = classes.alpha..classes.digit
	classes.word = classes.alnum.."_"
	classes.xdigit = classes.upper:sub(1, 6)..classes.lower:sub(1, 6)..classes.digit
	classes.space = classes.blank.."\r\n\v\f"
	-- TODO: some character classes are still missing...

	local function exp() return exp end
	local function seq() return seq end
	local atom = ( "\\" * (Len(1) % push)
		     + NotAny(".[]^$()*+?|{}") % push
	             + Any(".^$") % function(r) push(r2p[r]) end
	             + "[" * ( "^" * -function() push(NotAny) set = {} end
	       	             +       -function() push(Any) set = {} end )
		           * (topattern("]") % add + "")
		           * Arbno( "[:" * (Break(":") % push) * ":]" * -function() return add(classes[pop()]) ~= nil end
			   	  + Len(1) * "-" * Len(1)
			          % function(range) for c = range:byte(), range:byte(3) do add(string.char(c)) end end
			          + Len(1) % add )
		           * "]" * -function() push(pop()(table.concat(set))) end
	             + "(" * -exp * ")"
		     	   * -function() if captures then
			   		 push(topattern(pop()) / function(cap) table.insert(captures, cap) end) end end )
	             * ( "*" * ( "?" * -function() push(Arbno(pop())) end
	     	               +       -function() local r; r = pop() * -function() return r end + ""
	     				           push(r) end )
		       + "+" * -function() local r; r = pop() * (-function() return r end + "")
	     			           push(r) end
		       + "?" * -function() push(topattern("") + pop()) end
		       + "{" * ( Span(classes.digit) % push ) * ","
		       	     * ( Span(classes.digit)
		               % function(max) local min, c = pop()
			       		       local r; r = pop() * -function() c = c + 1
					       					return c >= tonumber(max) or r end + ""
			     		       push(-function() c = 0 end * r * -function() return c >= tonumber(min) end) end )
			     * "}"
		       + "" )
	seq = ( atom * -function() local rvalue, lvalue = pop(), pop()
				   push(type(lvalue) == "string" and type(rvalue) == "string" and
				        lvalue..rvalue or lvalue * rvalue) end
	      * (-seq + "") + "" )
	    * ( "|" * -exp * -function() local pat = pop() push(pop() + topattern(pat)) end
	      + "" )
	exp = atom * seq

	assert(smatch(str, exp * RPos(0), match_anchored),
	       "Invalid regular expression!")

	return stack[1]
end
_G.RegExp = RegExp

		-- load C core, also registers Lua functions into metatables we cannot
		-- access from Lua
require "lspipat.core"