diff options
-rw-r--r-- | AUTHORS | 0 | ||||
-rw-r--r-- | COPYING | 165 | ||||
-rw-r--r-- | ChangeLog | 0 | ||||
-rw-r--r-- | Makefile.am | 7 | ||||
-rw-r--r-- | NEWS | 0 | ||||
-rw-r--r-- | README | 1 | ||||
-rw-r--r-- | configure.ac | 118 | ||||
-rw-r--r-- | doc/Makefile.am | 15 | ||||
-rw-r--r-- | doc/html_custom.xsl | 6 | ||||
-rw-r--r-- | doc/html_titlepage.spec.xml | 688 | ||||
-rw-r--r-- | doc/lspipat.png | bin | 0 -> 4266 bytes | |||
-rw-r--r-- | doc/pattern.txt | 1017 | ||||
-rw-r--r-- | doc/reference.xml | 2005 | ||||
-rwxr-xr-x | samples/exp2bf.lua | 48 | ||||
-rw-r--r-- | samples/regexp.lua | 26 | ||||
-rwxr-xr-x | samples/wave.lua | 81 | ||||
-rw-r--r-- | spipat-patches/0.9.3+_image.patch | 94 | ||||
-rw-r--r-- | src/Makefile.am | 28 | ||||
-rw-r--r-- | src/call.c | 86 | ||||
-rw-r--r-- | src/compose.c | 106 | ||||
-rw-r--r-- | src/lspipat.c | 336 | ||||
-rw-r--r-- | src/lspipat.h | 149 | ||||
-rw-r--r-- | src/lspipat.lua | 155 | ||||
-rw-r--r-- | src/misc.c | 89 | ||||
-rw-r--r-- | src/render.c | 138 | ||||
-rw-r--r-- | src/simple.c | 57 | ||||
-rw-r--r-- | src/string.c | 131 | ||||
-rw-r--r-- | src/uint.c | 128 | ||||
-rw-r--r-- | src/unary.c | 182 |
29 files changed, 5856 insertions, 0 deletions
@@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/> + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/ChangeLog diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 0000000..aed7d2a --- /dev/null +++ b/Makefile.am @@ -0,0 +1,7 @@ + +SUBDIRS = src doc + +EXTRA_DIST = samples/exp2bf.lua \ + samples/wave.lua \ + samples/regexp.lua \ + spipat-patches/0.9.3+_image.patch @@ -0,0 +1 @@ +read doc/reference.html for Installation Notes & Module Reference diff --git a/configure.ac b/configure.ac new file mode 100644 index 0000000..6e5cb13 --- /dev/null +++ b/configure.ac @@ -0,0 +1,118 @@ +# -*- Autoconf -*- +# Process this file with autoconf to produce a configure script. + +AC_PREREQ([2.64]) +AC_INIT([SNOBOL/SPITBOL Patterns for Lua], [0.1], [robin.haberkorn@googlemail.com], [lspipat]) +AM_INIT_AUTOMAKE +AC_CONFIG_SRCDIR([src/lspipat.c]) +AC_CONFIG_HEADERS([config.h]) + +LT_INIT([disable-static]) + +# Checks for programs. +AC_PROG_CC +AC_PROG_INSTALL + +# Lua compiler (optional) +AC_CHECK_PROG(LUAC, luac5.1, luac5.1) +AC_CHECK_PROG(LUAC, luac, luac) + +LUAC_FLAGS= +AC_SUBST(LUAC_FLAGS) + +# XSLTProc (optional) +AC_CHECK_PROG(XSLTPROC, xsltproc, xsltproc) + +XSLT_FLAGS="--xinclude" +AC_SUBST(XSLT_FLAGS) + +# Checks for libraries. + +# libspipat +AC_CHECK_LIB([spipat], [spipat_match2], , [ + AC_MSG_ERROR([libspipat (Spipat library) not found!]) +]) + +# liblua (care about different distributions) +AC_CHECK_LIB([lua5.1], [lua_call], , [ + AC_CHECK_LIB([lua], [lua_call], , [ + AC_MSG_ERROR([liblua (Lua 5.1 library) not found!]) + ]) +]) + +# Checks for header files. +AC_CHECK_HEADERS([stdint.h stdlib.h string.h stdbool.h]) + +# spipat headers +AC_CHECK_HEADERS([spipat.h], , [ + AC_MSG_ERROR([Spipat header not found!]) +], [ + #include <stdint.h> + #include <stdbool.h> +]) + +# spipat_impl.h/spipat_image.h are not installed by default and are thus optional +AC_CHECK_HEADERS([spipat_impl.h spipat_image.h], , [ + AC_MSG_WARN([Optional spipat header not found! You are strongly encouraged to specify spipat's source dir in CPPFLAGS.]) +], [ + #include <stdint.h> + #include <stdbool.h> + #include <spipat.h> +]) + +# Lua headers (care about different distributions) +AC_CHECK_HEADERS([lua5.1/lua.h lua5.1/lauxlib.h lua5.1/lualib.h], , [ + AC_CHECK_HEADERS([lua.h lauxlib.h lualib.h], , [ + AC_MSG_ERROR([Lua 5.1 headers not found!]) + ]) + break +]) + +# Checks for typedefs, structures, and compiler characteristics. +AC_C_INLINE +AC_TYPE_SIZE_T +AC_HEADER_STDBOOL +AC_TYPE_UINT32_T + +# Checks for library functions. +AC_CHECK_FUNCS([memset]) + +# Package Configuration + +AC_ARG_ENABLE(lua-libdir, + AS_HELP_STRING([--enable-lua-libdir=DIR], + [Install lspipat into this directory (default is LIBDIR/lua/5.1)]), + [lualibdir=$enable_lua_libdir], [lualibdir=${libdir}/lua/5.1]) +AC_SUBST(lualibdir) +lualib_lspipatdir=${lualibdir}/lspipat +AC_SUBST(lualib_lspipatdir) + +AC_ARG_ENABLE(lua-precompile, + AS_HELP_STRING([--enable-lua-precompile], + [Enable precompilation of Lua source files (default is yes)]), + [lua_precompile=$enableval], [lua_precompile=yes]) +AM_CONDITIONAL([LUA_PRECOMPILE], [test x$lua_precompile = xyes]) + +if test x$lua_precompile = xyes -a x$LUAC = x; then + AC_MSG_ERROR([Lua chunk precompilation enabled, but Lua 5.1 compiler not found! Try --disable-lua-precompile.]) +fi + +AC_ARG_ENABLE(lua-strip, + AS_HELP_STRING([--enable-lua-strip], + [Strip compiled Lua source files (default is yes)]), + [lua_strip=$enableval], [lua_strip=yes]) +if test x$lua_strip = xyes; then + LUAC_FLAGS+=" -s" +fi + +AC_ARG_ENABLE(html-doc, + AS_HELP_STRING([--enable-html-doc], + [Generate HTML documentation (default is yes)]), + [html_doc=$enableval], [html_doc=yes]) + +if test x$html_doc = xyes -a x$XSLTPROC = x; then + AC_MSG_ERROR([Enabled generating documentation, but XSLTProc not found! Try --disable-html-doc.]) +fi + +AC_CONFIG_FILES([Makefile src/Makefile doc/Makefile]) +AC_OUTPUT diff --git a/doc/Makefile.am b/doc/Makefile.am new file mode 100644 index 0000000..f8ff134 --- /dev/null +++ b/doc/Makefile.am @@ -0,0 +1,15 @@ +## Docbook processing - very simplistic at the moment + +DB_URI = http://docbook.sourceforge.net/release/xsl/current + +dist_doc_DATA = pattern.txt + +dist_html_DATA = reference.html lspipat.png +CLEANFILES = reference.html html_titlepage.xsl +EXTRA_DIST = reference.xml html_custom.xsl html_titlepage.spec.xml + +reference.html : reference.xml html_custom.xsl html_titlepage.xsl + @XSLTPROC@ @XSLT_FLAGS@ -o $@ html_custom.xsl $< + +html_titlepage.xsl : html_titlepage.spec.xml + @XSLTPROC@ @XSLT_FLAGS@ -o $@ $(DB_URI)/template/titlepage.xsl $< diff --git a/doc/html_custom.xsl b/doc/html_custom.xsl new file mode 100644 index 0000000..af94064 --- /dev/null +++ b/doc/html_custom.xsl @@ -0,0 +1,6 @@ +<?xml version='1.0'?> + +<stylesheet xmlns="http://www.w3.org/1999/XSL/Transform" version="1.0"> + <import href="http://docbook.sourceforge.net/release/xsl/current/html/docbook.xsl"/> + <import href="html_titlepage.xsl"/> +</stylesheet> diff --git a/doc/html_titlepage.spec.xml b/doc/html_titlepage.spec.xml new file mode 100644 index 0000000..ea44036 --- /dev/null +++ b/doc/html_titlepage.spec.xml @@ -0,0 +1,688 @@ +<t:templates xmlns:t="http://nwalsh.com/docbook/xsl/template/1.0" + xmlns:param="http://nwalsh.com/docbook/xsl/template/1.0/param" + xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> + +<!-- ==================================================================== --> + +<t:titlepage t:element="article" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <mediaobject/> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <hr/> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="set" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <hr/> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="book" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <mediaobject/> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <hr/> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="part" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title + t:force="1" + t:named-template="division.title" + param:node="ancestor-or-self::part[1]"/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<t:titlepage t:element="partintro" t:wrapper="div"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="reference" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <hr/> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="refentry" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> +<!-- uncomment this if you want refentry titlepages + <title t:force="1" + t:named-template="refentry.title" + param:node="ancestor-or-self::refentry[1]"/> +--> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator/> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + + <t:titlepage t:element="dedication" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title + t:force="1" + t:named-template="component.title" + param:node="ancestor-or-self::dedication[1]"/> + <subtitle/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="acknowledgements" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title + t:force="1" + t:named-template="component.title" + param:node="ancestor-or-self::acknowledgements[1]"/> + <subtitle/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="preface" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="chapter" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="appendix" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="section" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <xsl:if test="count(parent::*)='0'"><hr/></xsl:if> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<t:titlepage t:element="sect1" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <xsl:if test="count(parent::*)='0'"><hr/></xsl:if> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<t:titlepage t:element="sect2" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <xsl:if test="count(parent::*)='0'"><hr/></xsl:if> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<t:titlepage t:element="sect3" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <xsl:if test="count(parent::*)='0'"><hr/></xsl:if> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<t:titlepage t:element="sect4" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <xsl:if test="count(parent::*)='0'"><hr/></xsl:if> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<t:titlepage t:element="sect5" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <xsl:if test="count(parent::*)='0'"><hr/></xsl:if> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<t:titlepage t:element="simplesect" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <xsl:if test="count(parent::*)='0'"><hr/></xsl:if> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="bibliography" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title + t:force="1" + t:named-template="component.title" + param:node="ancestor-or-self::bibliography[1]"/> + <subtitle/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="glossary" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title + t:force="1" + t:named-template="component.title" + param:node="ancestor-or-self::glossary[1]"/> + <subtitle/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="index" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title + t:force="1" + t:named-template="component.title" + param:node="ancestor-or-self::index[1]"/> + <subtitle/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="setindex" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title + t:force="1" + t:named-template="component.title" + param:node="ancestor-or-self::setindex[1]"/> + <subtitle/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +</t:templates> diff --git a/doc/lspipat.png b/doc/lspipat.png Binary files differnew file mode 100644 index 0000000..317751d --- /dev/null +++ b/doc/lspipat.png diff --git a/doc/pattern.txt b/doc/pattern.txt new file mode 100644 index 0000000..e0d1719 --- /dev/null +++ b/doc/pattern.txt @@ -0,0 +1,1017 @@ +Copyright (C) 2007,2008, Philip L. Budne +Copyright (C) 1998-2005, AdaCore + +This documentation (and the underlying software) developed from the +GNAT.SPITBOL.PATTERNS package of GNU Ada. GNAT was originally +developed by the GNAT team at New York University. Extensive +contributions were provided by Ada Core Technologies Inc. + +SPITBOL-like pattern construction and matching + +This child package of GNAT.SPITBOL provides a complete implementation +of the SPITBOL-like pattern construction and matching operations. This +package is based on Macro-SPITBOL created by Robert Dewar. + +This is a completely general patterm matching package based on the +pattern language of SNOBOL4, as implemented in SPITBOL. The pattern +language is modeled on context free grammars, with context sensitive +extensions that provide full (type 0) computational capabilities. + +------------------------------- +Pattern Matching Tutorial +------------------------------- + +A pattern matching operation (a call to one of the Match subprograms) +takes a subject string and a pattern, and optionally a replacement +string. The replacement string option is only allowed if the subject +is a variable. + +The pattern is matched against the subject string, and either the +match fails, or it succeeds matching a contiguous substring. If a +replacement string is specified, then the subject string is modified +by replacing the matched substring with the given replacement. + +Concatenation and Alternation +============================= + +A pattern consists of a series of pattern elements. The pattern is +built up using either the concatenation operator: + + A & B + + which means match A followed immediately by matching B, or the + alternation operator: + + A | B + + which means first attempt to match A, and then if that does not + succeed, match B. + + There is full backtracking, which means that if a given pattern + element fails to match, then previous alternatives are matched. + For example if we have the pattern: + + (A | B) & (C | D) & (E | F) + + First we attempt to match A, if that succeeds, then we go on to try + to match C, and if that succeeds, we go on to try to match E. If E + fails, then we try F. If F fails, then we go back and try matching + D instead of C. Let's make this explicit using a specific example, + and introducing the simplest kind of pattern element, which is a + literal string. The meaning of this pattern element is simply to + match the characters that correspond to the string characters. Now + let's rewrite the above pattern form with specific string literals + as the pattern elements: + + ("ABC" | "AB") & ("DEF" | "CDE") & ("GH" | "IJ") + + The following strings will be attempted in sequence: + + ABC . DEF . GH + ABC . DEF . IJ + ABC . CDE . GH + ABC . CDE . IJ + AB . DEF . GH + AB . DEF . IJ + AB . CDE . GH + AB . CDE . IJ + + Here we use the dot simply to separate the pieces of the string + matched by the three separate elements. + + Moving the Start Point + ====================== + + A pattern is not required to match starting at the first character + of the string, and is not required to match to the end of the string. + The first attempt does indeed attempt to match starting at the first + character of the string, trying all the possible alternatives. But + if all alternatives fail, then the starting point of the match is + moved one character, and all possible alternatives are attempted at + the new anchor point. + + The entire match fails only when every possible starting point has + been attempted. As an example, suppose that we had the subject + string + + "ABABCDEIJKL" + + matched using the pattern in the previous example: + + ("ABC" | "AB") & ("DEF" | "CDE") & ("GH" | "IJ") + + would succeed, afer two anchor point moves: + + "ABABCDEIJKL" + ^^^^^^^ + matched + section + + This mode of pattern matching is called the unanchored mode. It is + also possible to put the pattern matcher into anchored mode by + setting the global variable Anchored_Mode to True. This will cause + all subsequent matches to be performed in anchored mode, where the + match is required to start at the first character. + + We will also see later how the effect of an anchored match can be + obtained for a single specified anchor point if this is desired. + + Other Pattern Elements + ====================== + + In addition to strings (or single characters), there are many special + pattern elements that correspond to special predefined alternations: + + Arb Matches any string. First it matches the null string, and + then on a subsequent failure, matches one character, and + then two characters, and so on. It only fails if the + entire remaining string is matched. + + Bal Matches a non-empty string that is parentheses balanced + with respect to ordinary () characters. Examples of + balanced strings are "ABC", "A((B)C)", and "A(B)C(D)E". + Bal matches the shortest possible balanced string on the + first attempt, and if there is a subsequent failure, + attempts to extend the string. + + Abort Immediately aborts the entire pattern match, signalling + failure. This is a specialized pattern element, which is + useful in conjunction with some of the special pattern + elements that have side effects. + + Fail The null alternation. Matches no possible strings, so it + always signals failure. This is a specialized pattern + element, which is useful in conjunction with some of the + special pattern elements that have side effects. + + Fence Matches the null string at first, and then if a failure + causes alternatives to be sought, aborts the match (like + a Cancel). Note that using Fence at the start of a pattern + has the same effect as matching in anchored mode. + + Rem Matches from the current point to the last character in + the string. This is a specialized pattern element, which + is useful in conjunction with some of the special pattern + elements that have side effects. + + Succeed Repeatedly matches the null string (it is equivalent to + the alternation ("" | "" | "" ....). This is a special + pattern element, which is useful in conjunction with some + of the special pattern elements that have side effects. + + Pattern Construction Functions + ============================== + + The following functions construct additional pattern elements + + Any(S) Where S is a string, matches a single character that is + any one of the characters in S. Fails if the current + character is not one of the given set of characters. + + Arbno(P) Where P is any pattern, matches any number of instances + of the pattern, starting with zero occurrences. It is + thus equivalent to ("" | (P & ("" | (P & ("" ....)))). + The pattern P may contain any number of pattern elements + including the use of alternatiion and concatenation. + + Break(S) Where S is a string, matches a string of zero or more + characters up to but not including a break character + that is one of the characters given in the string S. + Can match the null string, but cannot match the last + character in the string, since a break character is + required to be present. + + BreakX(S) Where S is a string, behaves exactly like Break(S) when + it first matches, but if a string is successfully matched, + then a susequent failure causes an attempt to extend the + matched string. + + Fence(P) Where P is a pattern, attempts to match the pattern P + including trying all possible alternatives of P. If none + of these alternatives succeeds, then the Fence pattern + fails. If one alternative succeeds, then the pattern + match proceeds, but on a subsequent failure, no attempt + is made to search for alternative matches of P. The + pattern P may contain any number of pattern elements + including the use of alternatiion and concatenation. + + Len(N) Where N is a natural number, matches the given number of + characters. For example, Len(10) matches any string that + is exactly ten characters long. + + NotAny(S) Where S is a string, matches a single character that is + not one of the characters of S. Fails if the current + characer is one of the given set of characters. + + NSpan(S) Where S is a string, matches a string of zero or more + characters that is among the characters given in the + string. Always matches the longest possible such string. + Always succeeds, since it can match the null string. + + Pos(N) Where N is a natural number, matches the null string + if exactly N characters have been matched so far, and + otherwise fails. + + Rpos(N) Where N is a natural number, matches the null string + if exactly N characters remain to be matched, and + otherwise fails. + + Rtab(N) Where N is a natural number, matches characters from + the current position until exactly N characters remain + to be matched in the string. Fails if fewer than N + unmatched characters remain in the string. + + Tab(N) Where N is a natural number, matches characters from + the current position until exactly N characters have + been matched in all. Fails if more than N characters + have already been matched. + + Span(S) Where S is a string, matches a string of one or more + characters that is among the characters given in the + string. Always matches the longest possible such string. + Fails if the current character is not one of the given + set of characters. + + Recursive Pattern Matching + ========================== + + The plus operator (+P) where P is a pattern variable, creates + a recursive pattern that will, at pattern matching time, follow + the pointer to obtain the referenced pattern, and then match this + pattern. This may be used to construct recursive patterns. Consider + for example: + + P := ("A" | ("B" & (+P))) + + On the first attempt, this pattern attempts to match the string "A". + If this fails, then the alternative matches a "B", followed by an + attempt to match P again. This second attempt first attempts to + match "A", and so on. The result is a pattern that will match a + string of B's followed by a single A. + + This particular example could simply be written as NSpan('B') & 'A', + but the use of recursive patterns in the general case can construct + complex patterns which could not otherwise be built. + + Pattern Assignment Operations + ============================= + + In addition to the overall result of a pattern match, which indicates + success or failure, it is often useful to be able to keep track of + the pieces of the subject string that are matched by individual + pattern elements, or subsections of the pattern. + + The pattern assignment operators allow this capability. The first + form is the immediate assignment: + + P * S + + Here P is an arbitrary pattern, and S is a variable of type VString + that will be set to the substring matched by P. This assignment + happens during pattern matching, so if P matches more than once, + then the assignment happens more than once. + + The deferred assignment operation: + + P ** S + + avoids these multiple assignments by deferring the assignment to the + end of the match. If the entire match is successful, and if the + pattern P was part of the successful match, then at the end of the + matching operation the assignment to S of the string matching P is + performed. + + The cursor assignment operation: + + Setcur(N) + + assigns the current cursor position to the natural variable N. The + cursor position is defined as the count of characters that have been + matched so far (including any start point moves). + + Finally the operations * and ** may be used with values of type + Text_IO.File_Access. The effect is to do a Put_Line operation of + the matched substring. These are particularly useful in debugging + pattern matches. + + Deferred Matching + ================= + + The pattern construction functions (such as Len and Any) all permit + the use of pointers to natural or string values, or functions that + return natural or string values. These forms cause the actual value + to be obtained at pattern matching time. This allows interesting + possibilities for constructing dynamic patterns as illustrated in + the examples section. + + In addition the (+S) operator may be used where S is a pointer to + string or function returning string, with a similar deferred effect. + + A special use of deferred matching is the construction of predicate + functions. The element (+P) where P is an access to a function that + returns a Boolean value, causes the function to be called at the + time the element is matched. If the function returns True, then the + null string is matched, if the function returns False, then failure + is signalled and previous alternatives are sought. + + Deferred Replacement + ==================== + + The simple model given for pattern replacement (where the matched + substring is replaced by the string given as the third argument to + Match) works fine in simple cases, but this approach does not work + in the case where the expression used as the replacement string is + dependent on values set by the match. + + For example, suppose we want to find an instance of a parenthesized + character, and replace the parentheses with square brackets. At first + glance it would seem that: + + Match (Subject, '(' & Len (1) * Char & ')', '[' & Char & ']'); + + would do the trick, but that does not work, because the third + argument to Match gets evaluated too early, before the call to + Match, and before the pattern match has had a chance to set Char. + + To solve this problem we provide the deferred replacement capability. + With this approach, which of course is only needed if the pattern + involved has side effects, is to do the match in two stages. The + call to Match sets a pattern result in a variable of the private + type Match_Result, and then a subsequent Replace operation uses + this Match_Result object to perform the required replacement. + + Using this approach, we can now write the above operation properly + in a manner that will work: + + M : Match_Result; + ... + Match (Subject, '(' & Len (1) * Char & ')', M); + Replace (M, '[' & Char & ']'); + + As with other Match cases, there is a function and procedure form + of this match call. A call to Replace after a failed match has no + effect. Note that Subject should not be modified between the calls. + + Examples of Pattern Matching + ============================ + + First a simple example of the use of pattern replacement to remove + a line number from the start of a string. We assume that the line + number has the form of a string of decimal digits followed by a + period, followed by one or more spaces. + + Digs : constant Pattern := Span("0123456789"); + + Lnum : constant Pattern := Pos(0) & Digs & '.' & Span(' '); + + Now to use this pattern we simply do a match with a replacement: + + Match (Line, Lnum, ""); + + which replaces the line number by the null string. Note that it is + also possible to use an Ada.Strings.Maps.Character_Set value as an + argument to Span and similar functions, and in particular all the + useful constants 'in Ada.Strings.Maps.Constants are available. This + means that we could define Digs as: + + Digs : constant Pattern := Span(Decimal_Digit_Set); + + The style we use here, of defining constant patterns and then using + them is typical. It is possible to build up patterns dynamically, + but it is usually more efficient to build them in pieces in advance + using constant declarations. Note in particular that although it is + possible to construct a pattern directly as an argument for the + Match routine, it is much more efficient to preconstruct the pattern + as we did in this example. + + Now let's look at the use of pattern assignment to break a + string into sections. Suppose that the input string has two + unsigned decimal integers, separated by spaces or a comma, + with spaces allowed anywhere. Then we can isolate the two + numbers with the following pattern: + + Num1, Num2 : aliased VString; + + B : constant Pattern := NSpan(' '); + + N : constant Pattern := Span("0123456789"); + + T : constant Pattern := + NSpan(' ') & N * Num1 & Span(" ,") & N * Num2; + + The match operation Match (" 124, 257 ", T) would assign the + string 124 to Num1 and the string 257 to Num2. + + Now let's see how more complex elements can be built from the + set of primitive elements. The following pattern matches strings + that have the syntax of Ada 95 based literals: + + Digs : constant Pattern := Span(Decimal_Digit_Set); + UDigs : constant Pattern := Digs & Arbno('_' & Digs); + + Edig : constant Pattern := Span(Hexadecimal_Digit_Set); + UEdig : constant Pattern := Edig & Arbno('_' & Edig); + + Bnum : constant Pattern := Udigs & '#' & UEdig & '#'; + + A match against Bnum will now match the desired strings, e.g. + it will match 16#123_abc#, but not a#b#. However, this pattern + is not quite complete, since it does not allow colons to replace + the pound signs. The following is more complete: + + Bchar : constant Pattern := Any("#:"); + Bnum : constant Pattern := Udigs & Bchar & UEdig & Bchar; + + but that is still not quite right, since it allows # and : to be + mixed, and they are supposed to be used consistently. We solve + this by using a deferred match. + + Temp : aliased VString; + + Bnum : constant Pattern := + Udigs & Bchar * Temp & UEdig & (+Temp) + + Here the first instance of the base character is stored in Temp, and + then later in the pattern we rematch the value that was assigned. + + For an example of a recursive pattern, let's define a pattern + that is like the built in Bal, but the string matched is balanced + with respect to square brackets or curly brackets. + + The language for such strings might be defined in extended BNF as + + ELEMENT ::= <any character other than [] or {}> + | '[' BALANCED_STRING ']' + | '{' BALANCED_STRING '}' + + BALANCED_STRING ::= ELEMENT {ELEMENT} + + Here we use {} to indicate zero or more occurrences of a term, as + is common practice in extended BNF. Now we can translate the above + BNF into recursive patterns as follows: + + Element, Balanced_String : aliased Pattern; + . + . + . + Element := NotAny ("[]{}") + | + ('[' & (+Balanced_String) & ']') + | + ('{' & (+Balanced_String) & '}'); + + Balanced_String := Element & Arbno (Element); + + Note the important use of + here to refer to a pattern not yet + defined. Note also that we use assignments precisely because we + cannot refer to as yet undeclared variables in initializations. + + Now that this pattern is constructed, we can use it as though it + were a new primitive pattern element, and for example, the match: + + Match ("xy[ab{cd}]", Balanced_String * Current_Output & Fail); + + will generate the output: + + x + xy + xy[ab{cd}] + y + y[ab{cd}] + [ab{cd}] + a + ab + ab{cd} + b + b{cd} + {cd} + c + cd + d + + Note that the function of the fail here is simply to force the + pattern Balanced_String to match all possible alternatives. Studying + the operation of this pattern in detail is highly instructive. + + Finally we give a rather elaborate example of the use of deferred + matching. The following declarations build up a pattern which will + find the longest string of decimal digits in the subject string. + + Max, Cur : VString; + Loc : Natural; + + function GtS return Boolean is + begin + return Length (Cur) > Length (Max); + end GtS; + + Digit : constant Character_Set := Decimal_Digit_Set; + + Digs : constant Pattern := Span(Digit); + + Find : constant Pattern := + "" * Max & Fence & -- initialize Max to null + BreakX (Digit) & -- scan looking for digits + ((Span(Digit) * Cur & -- assign next string to Cur + (+GtS) & -- check size(Cur) > Size(Max) + Setcur(Loc)) -- if so, save location + * Max) & -- and assign to Max + Fail; -- seek all alternatives + + As we see from the comments here, complex patterns like this take + on aspects of sequential programs. In fact they are sequential + programs with general backtracking. In this pattern, we first use + a pattern assignment that matches null and assigns it to Max, so + that it is initialized for the new match. Now BreakX scans to the + next digit. Arb would do here, but BreakX will be more efficient. + Once we have found a digit, we scan out the longest string of + digits with Span, and assign it to Cur. The deferred call to GtS + tests if the string we assigned to Cur is the longest so far. If + not, then failure is signalled, and we seek alternatives (this + means that BreakX will extend and look for the next digit string). + If the call to GtS succeeds then the matched string is assigned + as the largest string so far into Max and its location is saved + in Loc. Finally Fail forces the match to fail and seek alternatives, + so that the entire string is searched. + + If the pattern Find is matched against a string, the variable Max + at the end of the pattern will have the longest string of digits, + and Loc will be the starting character location of the string. For + example, Match("ab123cd4657ef23", Find) will assign "4657" to Max + and 11 to Loc (indicating that the string ends with the eleventh + character of the string). + + Correspondence with Pattern Matching in SPITBOL + =============================================== + + Generally the Ada syntax and names correspond closely to SPITBOL + syntax for pattern matching construction. + + The basic pattern construction operators are renamed as follows: + + Spitbol Ada + + (space) & + | or + $ * + . ** + + The Ada operators were chosen so that the relative precedences of + these operators corresponds to that of the Spitbol operators, but + as always, the use of parentheses is advisable to clarify. + + The pattern construction operators all have similar names. + + The actual pattern matching syntax is modified in Ada as follows: + + Spitbol Ada + + X Y Match (X, Y); + X Y = Z Match (X, Y, Z); + + and pattern failure is indicated by returning a Boolean result from + the Match function (True for success, False for failure). + +----------------------- +Type Declarations +----------------------- + +type Pattern is private; + Type representing a pattern. This package provides a complete set of + operations for constructing patterns that can be used in the pattern + matching operations provided. + +type Boolean_Func is access function return Boolean; + General Boolean function type. When this type is used as a formal + parameter type in this package, it indicates a deferred predicate + pattern. The function will be called when the pattern element is + matched and failure signalled if False is returned. + +type Natural_Func is access function return Natural; + General Natural function type. When this type is used as a formal + parameter type in this package, it indicates a deferred pattern. + The function will be called when the pattern element is matched + to obtain the currently referenced Natural value. + +type VString_Func is access function return VString; + General VString function type. When this type is used as a formal + parameter type in this package, it indicates a deferred pattern. + The function will be called when the pattern element is matched + to obtain the currently referenced string value. + +subtype PString is String; + This subtype is used in the remainder of the package to indicate a + formal parameter that is converted to its corresponding pattern, + i.e. a pattern that matches the characters of the string. + +subtype PChar is Character; + Similarly, this subtype is used in the remainder of the package to + indicate a formal parameter that is converted to its corresponding + pattern, i.e. a pattern that matches this one character. + +subtype VString_Var is VString; +subtype Pattern_Var is Pattern; + These synonyms are used as formal parameter types to a function where, + if the language allowed, we would use in out parameters, but we are + not allowed to have in out parameters for functions. Instead we pass + actuals which must be variables, and with a bit of trickery in the + body, manage to interprete them properly as though they were indeed + in out parameters. + +-------------------------------- +Basic Pattern Construction +-------------------------------- + +function "&" (L : Pattern; R : Pattern) return Pattern; +function "&" (L : PString; R : Pattern) return Pattern; +function "&" (L : Pattern; R : PString) return Pattern; +function "&" (L : PChar; R : Pattern) return Pattern; +function "&" (L : Pattern; R : PChar) return Pattern; + + Pattern concatenation. Matches L followed by R + +function "or" (L : Pattern; R : Pattern) return Pattern; +function "or" (L : PString; R : Pattern) return Pattern; +function "or" (L : Pattern; R : PString) return Pattern; +function "or" (L : PString; R : PString) return Pattern; +function "or" (L : PChar; R : Pattern) return Pattern; +function "or" (L : Pattern; R : PChar) return Pattern; +function "or" (L : PChar; R : PChar) return Pattern; +function "or" (L : PString; R : PChar) return Pattern; +function "or" (L : PChar; R : PString) return Pattern; + Pattern alternation. Creates a pattern that will first try to match + L and then on a subsequent failure, attempts to match R instead. + +---------------------------------- +Pattern Assignment Functions +---------------------------------- + +function "*" (P : Pattern; Var : VString_Var) return Pattern; +function "*" (P : PString; Var : VString_Var) return Pattern; +function "*" (P : PChar; Var : VString_Var) return Pattern; + Matches P, and if the match succeeds, assigns the matched substring + to the given VString variable S. This assignment happens as soon as + the substring is matched, and if the pattern P1 is matched more than + once during the course of the match, then the assignment will occur + more than once. + +function "**" (P : Pattern; Var : VString_Var) return Pattern; +function "**" (P : PString; Var : VString_Var) return Pattern; +function "**" (P : PChar; Var : VString_Var) return Pattern; + Like "*" above, except that the assignment happens at most once + after the entire match is completed successfully. If the match + fails, then no assignment takes place. + +---------------------------------- +Deferred Matching Operations +---------------------------------- + +function "+" (Str : VString_Var) return Pattern; + Here Str must be a VString variable. This function constructs a + pattern which at pattern matching time will access the current + value of this variable, and match against these characters. + +function "+" (Str : VString_Func) return Pattern; + Constructs a pattern which at pattern matching time calls the given + function, and then matches against the string or character value + that is returned by the call. + +function "+" (P : Pattern_Var) return Pattern; + Here P must be a Pattern variable. This function constructs a + pattern which at pattern matching time will access the current + value of this variable, and match against the pattern value. + +function "+" (P : Boolean_Func) return Pattern; + Constructs a predicate pattern function that at pattern matching time + calls the given function. If True is returned, then the pattern matches. + If False is returned, then failure is signalled. + +-------------------------------- +Pattern Building Functions +-------------------------------- + +function Arb return Pattern; + Constructs a pattern that will match any string. On the first attempt, + the pattern matches a null string, then on each successive failure, it + matches one more character, and only fails if matching the entire rest + of the string. + +function Arbno (P : Pattern) return Pattern; +function Arbno (P : PString) return Pattern; +function Arbno (P : PChar) return Pattern; + Pattern repetition. First matches null, then on a subsequent failure + attempts to match an additional instance of the given pattern. + Equivalent to (but more efficient than) P & ("" | (P & ("" | ... + +function Any (Str : String) return Pattern; +function Any (Str : VString) return Pattern; +function Any (Str : Character) return Pattern; +function Any (Str : Character_Set) return Pattern; +function Any (Str : access VString) return Pattern; +function Any (Str : VString_Func) return Pattern; + Constructs a pattern that matches a single character that is one of + the characters in the given argument. The pattern fails if the current + character is not in Str. + +function Bal return Pattern; + Constructs a pattern that will match any non-empty string that is + parentheses balanced with respect to the normal parentheses characters. + Attempts to extend the string if a subsequent failure occurs. + +function Break (Str : String) return Pattern; +function Break (Str : VString) return Pattern; +function Break (Str : Character) return Pattern; +function Break (Str : Character_Set) return Pattern; +function Break (Str : access VString) return Pattern; +function Break (Str : VString_Func) return Pattern; + Constructs a pattern that matches a (possibly null) string which + is immediately followed by a character in the given argument. This + character is not part of the matched string. The pattern fails if + the remaining characters to be matched do not include any of the + characters in Str. + +function BreakX (Str : String) return Pattern; +function BreakX (Str : VString) return Pattern; +function BreakX (Str : Character) return Pattern; +function BreakX (Str : Character_Set) return Pattern; +function BreakX (Str : access VString) return Pattern; +function BreakX (Str : VString_Func) return Pattern; + Like Break, but the pattern attempts to extend on a failure to find + the next occurrence of a character in Str, and only fails when the + last such instance causes a failure. + +function Cancel return Pattern; + Constructs a pattern that immediately aborts the entire match + +function Fail return Pattern; + Constructs a pattern that always fails + +function Fence return Pattern; + Constructs a pattern that matches null on the first attempt, and then + causes the entire match to be aborted if a subsequent failure occurs. + +function Fence (P : Pattern) return Pattern; + Constructs a pattern that first matches P. if P fails, then the + constructed pattern fails. If P succeeds, then the match proceeds, + but if subsequent failure occurs, alternatives in P are not sought. + The idea of Fence is that each time the pattern is matched, just + one attempt is made to match P, without trying alternatives. + +function Len (Count : Natural) return Pattern; +function Len (Count : access Natural) return Pattern; +function Len (Count : Natural_Func) return Pattern; + Constructs a pattern that matches exactly the given number of + characters. The pattern fails if fewer than this number of characters + remain to be matched in the string. + +function NotAny (Str : String) return Pattern; +function NotAny (Str : VString) return Pattern; +function NotAny (Str : Character) return Pattern; +function NotAny (Str : Character_Set) return Pattern; +function NotAny (Str : access VString) return Pattern; +function NotAny (Str : VString_Func) return Pattern; + Constructs a pattern that matches a single character that is not + one of the characters in the given argument. The pattern Fails if + the current character is in Str. + +function NSpan (Str : String) return Pattern; +function NSpan (Str : VString) return Pattern; +function NSpan (Str : Character) return Pattern; +function NSpan (Str : Character_Set) return Pattern; +function NSpan (Str : access VString) return Pattern; +function NSpan (Str : VString_Func) return Pattern; + Constructs a pattern that matches the longest possible string + consisting entirely of characters from the given argument. The + string may be empty, so this pattern always succeeds. + +function Pos (Count : Natural) return Pattern; +function Pos (Count : access Natural) return Pattern; +function Pos (Count : Natural_Func) return Pattern; + Constructs a pattern that matches the null string if exactly Count + characters have already been matched, and otherwise fails. + +function Rem return Pattern; + Constructs a pattern that always succeeds, matching the remaining + unmatched characters in the pattern. + +function Rpos (Count : Natural) return Pattern; +function Rpos (Count : access Natural) return Pattern; +function Rpos (Count : Natural_Func) return Pattern; + Constructs a pattern that matches the null string if exactly Count + characters remain to be matched in the string, and otherwise fails. + +function Rtab (Count : Natural) return Pattern; +function Rtab (Count : access Natural) return Pattern; +function Rtab (Count : Natural_Func) return Pattern; + Constructs a pattern that matches from the current location until + exactly Count characters remain to be matched in the string. The + pattern fails if fewer than Count characters remain to be matched. + +function Setcur (Var : access Natural) return Pattern; + Constructs a pattern that matches the null string, and assigns the + current cursor position in the string. This value is the number of + characters matched so far. So it is zero at the start of the match. + +function Span (Str : String) return Pattern; +function Span (Str : VString) return Pattern; +function Span (Str : Character) return Pattern; +function Span (Str : Character_Set) return Pattern; +function Span (Str : access VString) return Pattern; +function Span (Str : VString_Func) return Pattern; + Constructs a pattern that matches the longest possible string + consisting entirely of characters from the given argument. The + string cannot be empty , so the pattern fails if the current + character is not one of the characters in Str. + +function Succeed return Pattern; + Constructs a pattern that succeeds matching null, both on the first + attempt, and on any rematch attempt, i.e. it is equivalent to an + infinite alternation of null strings. + +function Tab (Count : Natural) return Pattern; +function Tab (Count : access Natural) return Pattern; +function Tab (Count : Natural_Func) return Pattern; + Constructs a pattern that from the current location until Count + characters have been matched. The pattern fails if more than Count + characters have already been matched. + +--------------------------------- +Pattern Matching Operations +--------------------------------- + + The Match function performs an actual pattern matching operation. + The versions with three parameters perform a match without modifying + the subject string and return a Boolean result indicating if the + match is successful or not. The Anchor parameter is set to True to + obtain an anchored match in which the pattern is required to match + the first character of the string. In an unanchored match, which is + + the default, successive attempts are made to match the given pattern + at each character of the subject string until a match succeeds, or + until all possibilities have failed. + + Note that pattern assignment functions in the pattern may generate + side effects, so these functions are not necessarily pure. + +Anchored_Mode : Boolean := False; + This global variable can be set True to cause all subsequent pattern + matches to operate in anchored mode. In anchored mode, no attempt is + made to move the anchor point, so that if the match succeeds it must + succeed starting at the first character. Note that the effect of + anchored mode may be achieved in individual pattern matches by using + Fence or Pos(0) at the start of the pattern. + +Pattern_Stack_Overflow : exception; + Exception raised if internal pattern matching stack overflows. This + is typically the result of runaway pattern recursion. If there is a + genuine case of stack overflow, then either the match must be broken + down into simpler steps, or the stack limit must be reset. + +Stack_Size : constant Positive := 2000; + Size used for internal pattern matching stack. Increase this size if + complex patterns cause Pattern_Stack_Overflow to be raised. + + Simple match functions. The subject is matched against the pattern. + Any immediate or deferred assignments or writes are executed, and + the returned value indicates whether or not the match succeeded. + +function Match + (Subject : VString; + Pat : Pattern) return Boolean; + +function Match + (Subject : VString; + Pat : PString) return Boolean; + +function Match + (Subject : String; + Pat : Pattern) return Boolean; + +function Match + (Subject : String; + Pat : PString) return Boolean; + + Replacement functions. The subject is matched against the pattern. + Any immediate or deferred assignments or writes are executed, and + the returned value indicates whether or not the match succeeded. + If the match succeeds, then the matched part of the subject string + is replaced by the given Replace string. + +function Match + (Subject : VString_Var; + Pat : Pattern; + Replace : VString) return Boolean; + +function Match + (Subject : VString_Var; + Pat : PString; + Replace : VString) return Boolean; + +function Match + (Subject : VString_Var; + Pat : Pattern; + Replace : String) return Boolean; + +function Match + (Subject : VString_Var; + Pat : PString; + Replace : String) return Boolean; + +Deferred Replacement + +type Match_Result is private; + Type used to record result of pattern match + +subtype Match_Result_Var is Match_Result; + This synonyms is used as a formal parameter type to a function where, + if the language allowed, we would use an in out parameter, but we are + not allowed to have in out parameters for functions. Instead we pass + actuals which must be variables, and with a bit of trickery in the + body, manage to interprete them properly as though they were indeed + in out parameters. + +function Match + (Subject : VString_Var; + Pat : Pattern; + Result : Match_Result_Var) return Boolean; + +procedure Match + (Subject : in out VString; + Pat : Pattern; + Result : out Match_Result); + +procedure Replace + (Result : in out Match_Result; + Replace : VString); + Given a previous call to Match which set Result, performs a pattern + replacement if the match was successful. Has no effect if the match + failed. This call should immediately follow the Match call. + +------------------------ +Debugging Routines +------------------------ + + Debugging pattern matching operations can often be quite complex, + since there is no obvious way to trace the progress of the match. + The declarations in this section provide some debugging assistance. + +Debug_Mode : Boolean := False; + This global variable can be set True to generate debugging on all + subsequent calls to Match. The debugging output is a full trace of + the actions of the pattern matcher, written to Standard_Output. The + level of this information is intended to be comprehensible at the + abstract level of this package declaration. However, note that the + use of this switch often generates large amounts of output. + +function "*" (P : Pattern; Fil : File_Access) return Pattern; +function "*" (P : PString; Fil : File_Access) return Pattern; +function "*" (P : PChar; Fil : File_Access) return Pattern; +function "**" (P : Pattern; Fil : File_Access) return Pattern; +function "**" (P : PString; Fil : File_Access) return Pattern; +function "**" (P : PChar; Fil : File_Access) return Pattern; + These are similar to the corresponding pattern assignment operations + except that instead of setting the value of a variable, the matched + substring is written to the appropriate file. This can be useful in + following the progress of a match without generating the full amount + of information obtained by setting Debug_Mode to True. + +Terminal : constant File_Access := Standard_Error; +Output : constant File_Access := Standard_Output; + Two handy synonyms for use with the above pattern write operations + + Finally we have some routines that are useful for determining what + patterns are in use, particularly if they are constructed dynamically. + +function Image (P : Pattern) return String; +function Image (P : Pattern) return VString; + This procedures yield strings that corresponds to the syntax needed + to create the given pattern using the functions in this package. The + form of this string is such that it could actually be compiled and + evaluated to yield the required pattern except for references to + variables and functions, which are output using one of the following + forms: +-- + access Natural NP(16#...#) + access Pattern PP(16#...#) + access VString VP(16#...#) +-- + Natural_Func NF(16#...#) + VString_Func VF(16#...#) +-- + where 16#...# is the hex representation of the integer address that + corresponds to the given access value + +procedure Dump (P : Pattern); + This procedure writes information about the pattern to Standard_Out. + The format of this information is keyed to the internal data structures + used to implement patterns. The information provided by Dump is thus + more precise than that yielded by Image, but is also a bit more obscure + (i.e. it cannot be interpreted solely in terms of this spec, you have + to know something about the data structures). + +procedure Finalize (Object : in out Pattern); + Finalization routine used to release storage allocated for a pattern + + + + diff --git a/doc/reference.xml b/doc/reference.xml new file mode 100644 index 0000000..0e4d5be --- /dev/null +++ b/doc/reference.xml @@ -0,0 +1,2005 @@ +<?xml version="1.0"?> + +<!-- + LSPIPAT - LUA SPIPAT WRAPPER + Copyright (C) 2010, Robin Haberkorn + License: LGPL + + DOCUMENTATION AND MODULE REFERENCE +--> + +<book xmlns="http://docbook.org/ns/docbook" + xmlns:xlink="http://www.w3.org/1999/xlink"> + <info> + <title>SNOBOL/SPITBOL Patterns for Lua</title> + <subtitle>libspipat Lua wrapper</subtitle> + <titleabbrev>lspipat</titleabbrev> + + <author> + <personname><firstname>Robin</firstname> <surname>Haberkorn</surname></personname> + <email>robin.haberkorn at googlemail.com</email> + </author> + <copyright> + <year>2010</year><holder>Robin Haberkorn</holder> + </copyright> + + <mediaobject> + <imageobject><imagedata format="PNG" fileref="lspipat.png"/></imageobject> + </mediaobject> + + <abstract><para> + The following document is the <productname>lspipat</productname> + Lua 5.1 module documentation and reference. + </para></abstract> + </info> + + <acknowledgements> + <title>Thanks To...</title> + + <para> + <productname>lspipat</productname> would not be possible without: + </para> + <itemizedlist> + <listitem> + <firstname>Phil</firstname> <surname>Budne</surname>, for <productname>spipat</productname>. + <productname>lspipat</productname> is merely a <productname>spipat</productname> wrapper. + </listitem><listitem> + <firstname>Robert</firstname> <surname>Dewar</surname> who has created Macro SPITBOL and + the GNAT.Spitbol package. + <productname>spipat</productname> was derived from GNAT.Spitbol, which is based on Macro SPITBOL. + </listitem> + </itemizedlist> + </acknowledgements> + + <preface> + <title>Introduction</title> + + <para> + <productname>lspipat</productname> is a wrapper to <productname>spipat</productname> + that brings support for a first-class SNOBOL/SPITBOL-like pattern data type. + Patterns can be constructed and subsequently combined with other patterns, + strings, numbers and functions using binary and unary operators allowing + the construction of grammars describing any <emphasis>Context Free Language</emphasis>. + Patterns can be matched against any Lua string. + A major difference to other pattern matching techniques like regular expressions, besides + the supported language class, is the possibility to construct patterns/grammars in a + readable and intuitive way, somewhat reminiscent of the <emphasis>BNF</emphasis>. + </para><para> + They can include pattern elements that have side-effects (i.e. Lua code executed during + pattern matching) or produce and influence pattern elements dynamically. + For instance, functions can be specified that are executed during matching to produce + the parameters necessary for the interpretation of a pattern element. + Code can be embedded that generates entire patterns on the fly. + Matching previously matched substrings and implementing recursive patterns + is only one application of the powerful dynamic pattern elements traditionally + offered by SNOBOL pattern matching and thus by <productname>lspipat</productname>. + </para><para> + SNOBOL/SPITBOL pattern matching was traditionally used in compiler construction + and prototyping, artificial intelligence research and the humanities. + </para> + </preface> + + <chapter> + <title>Resources</title> + + <para> + These internet resources are more or less directly related to <productname>lspipat</productname> and + might be useful to you: + + <itemizedlist> + <listitem> + <link xlink:href="http://luaforge.net/projects/lspipat/">http://luaforge.net/projects/lspipat/</link>: + <productname>lspipat</productname> project page at LuaForge, downloads, bug tracker, etc. + </listitem><listitem> + <link xlink:href="http://www.snobol4.org/spipat/">http://www.snobol4.org/spipat/</link>: + <productname>libspipat</productname> downloads + </listitem><listitem> + <link xlink:href="http://pypi.python.org/pypi/spipat/">http://pypi.python.org/pypi/spipat/</link>: + <productname>libspipat's</productname> Python wrapper (included in <productname>libspipat</productname> + packages). + </listitem><listitem> + <link xlink:href="http://www.infeig.unige.ch/support/ada/gnatlb/g-spipat.html">http://www.infeig.unige.ch/support/ada/gnatlb/g-spipat.html</link>: + <productname>GNAT.Spitbol</productname> description. Also installed as <filename>pattern.txt</filename> by <productname>lspipat</productname>. + </listitem><listitem> + <link xlink:href="ftp://ftp.cs.arizona.edu/snobol/gb.pdf">ftp://ftp.cs.arizona.edu/snobol/gb.pdf</link>: + The SNOBOL4 Programming Language (The famous <emphasis>Green Book</emphasis>) + </listitem><listitem> + <link xlink:href="ftp://ftp.snobol4.com/spitman.pdf">ftp://ftp.snobol4.com/spitman.pdf</link>: + Macro SPITBOL Reference Manual + </listitem><listitem> + <link xlink:href="http://www.snobol4.org/">other interesting resources compiled by Phil Budne...</link> + </listitem> + </itemizedlist> + </para> + </chapter> + + <chapter> + <title>Comparison with SNOBOL</title> + + <para> + Just as patterns in <productname>SNOBOL</productname> are combined and constructed dynamically with + binary and unary operators, <productname>lspipat</productname> also uses operators available in + Lua to construct patterns in a simple and intuitive way. + The operators and pattern-construction functions were chosen, so the pattern construction syntax + is as similar as possible to <productname>SNOBOL</productname>/<productname>SPITBOL</productname>. + The following table shows a comparision of operators between + <productname>SPITBOL</productname> and <productname>lspipat</productname>: + </para> + <table xml:id="operator_comparison"> + <title>Comparision of SPITBOL and lspipat operators</title> + + <tgroup cols="4" colsep="1" rowsep="1"> + <colspec colnum="2" align="center"/> <!-- operators --> + <colspec colnum="3" align="center"/> + + <thead> + <row> + <entry>Operation</entry> + <entry>SPITBOL</entry> + <entry>lspipat</entry> + <entry>Notes</entry> + </row> + </thead><tbody> + <row> + <entry>Alternation</entry> + <entry><command>|</command></entry> + <entry><command>+</command></entry> + <entry morerows="1"><para> + Refer to <xref linkend="composition"/>. + Cannot be used to combine two <emphasis>strings</emphasis>. + </para></entry> + </row><row> + <entry>Concatenation</entry> + <entry><emphasis>(space)</emphasis></entry> + <entry><command>*</command></entry> + </row><row> + <entry>Immediate Assignment/Call</entry> + <entry><command>$</command></entry> + <entry><command>%</command></entry> + <entry morerows="1"><para> + <link linkend="assignment"><command>%</command> and <command>/</command></link> have the + <link xlink:href="http://www.lua.org/manual/5.1/manual.html#2.5.6">same precedence</link> + as <link linkend="concat"><command>*</command></link> in Lua. + Also only call versions are supported (see <xref linkend="deferring"/>). + </para></entry> + </row><row> + <entry>Deferred Assignment/Call</entry> + <entry><command>.</command></entry> + <entry><command>/</command></entry> + </row><row> + <entry morerows="1">Cursor Assignment</entry> + <entry morerows="1"><command>@</command> <emphasis>(unary)</emphasis></entry> + <entry><command>#</command> <emphasis>(unary)</emphasis></entry> + <entry morerows="1"><para> + Refer to <xref linkend="cursor"/>. + <productname>lspipat</productname> only supports a call version + (see <xref linkend="deferring"/>). + </para></entry> + </row><row> + <!-- <entry/> --> + <!-- <entry/> --> + <entry><command>Setcur</command></entry> + </row><row> + <entry>Defer Expression</entry> + <entry><command>*</command> <emphasis>(unary)</emphasis></entry> + <entry morerows="1"><command>-</command> <emphasis>(unary)</emphasis> or <command>Pred</command></entry> + <entry morerows="1"><para> + Refer to <xref linkend="predicate"/>. + In general, expressions can be wrapped in (anonymous) functions to defer them. + </para></entry> + </row><row> + <entry>Interrogation/Predicate</entry> + <entry><command>?</command> <emphasis>(unary)</emphasis></entry> + <!-- <entry/> --> + </row><row> + <entry morerows="1">Pattern Match</entry> + <entry><command>?</command></entry> + <entry morerows="1"><command>smatch</command></entry> + <entry morerows="1"><para> + Refer to <xref linkend="smatch"/>. + <code>S ? P</code> is roughly equivalent to <code>S:smatch(P)</code> in Lua. + </para></entry> + </row><row> + <!-- <entry/> --> + <entry><emphasis>(space)</emphasis></entry> + <!-- <entry/> --> + </row><row> + <entry>Substring Replacement</entry> + <entry><command>=</command></entry> + <entry><command>ssub</command></entry> + <entry><para> + Refer to <xref linkend="ssub"/>. + <code>S P = R</code> is roughly equivalent to <code>S:ssub(P, R, 1)</code> in Lua. + </para></entry> + </row> + </tbody> + </tgroup> + </table> + </chapter> + + <chapter> + <title>Installation</title> + + <para> + <productname>lspipat</productname> uses an autotools buildsystem. The standard + <filename>INSTALL</filename> file contains instructions on how to use it from + a package builder's perspective. + Nevertheless, there are some quirks that should be mentioned. + </para> + + <section> + <title>Dependencies</title> + + <itemizedlist> + <listitem><para> + <link xlink:href="http://www.snobol4.org/spipat/test/spipat-0.9.3%2b.tar.gz">spipat 0.9.3+</link>: + You are advised to apply the patch <filename>spipat-patches/0.9.3+_image.patch</filename> first + before building <productname>spipat</productname>, even though it is not mandatory. + It fixes a header file (so <productname>lspipat</productname> can make use of customized + <link linkend="tostring">render-to-string</link> functionality) and various bugs. + </para></listitem> + <listitem><para> + <link xlink:href="http://www.lua.org/download.html">Lua 5.1</link>: + You probably have this already. The <filename>configure</filename> script + should be able to cope with <productname>Ubuntu</productname> and + <link xlink:href="http://lua-users.org/wiki/LuaBinaries">Lua Binaries</link> + distributions. The standalone Lua compiler is only required if + <link linkend="precompile">compilation of Lua scripts</link> is enabled. + </para></listitem> + </itemizedlist> + </section> + + <section> + <title>Configuration Options</title> + + <para> + The following special <filename>configure</filename> script options + are supported: + </para> + <variablelist> + <varlistentry> + <term><arg choice="plain">--enable-lua-libdir=<replaceable>DIR</replaceable></arg></term> + <listitem><para> + Change the installation directory of <productname>lspipat</productname>. + It defaults to <filename>LIBDIR/lua/5.1</filename>. You probably want this to + point to some directory in Lua's + <link xlink:href="http://www.lua.org/manual/5.1/manual.html#pdf-require"> + module search path</link>, so the default should be ok. + </para></listitem> + </varlistentry> + <varlistentry xml:id="precompile"> + <term><arg choice="plain">--disable-lua-precompile</arg></term> + <listitem><para> + Disable precompilation of Lua source files. + Naturally, a Lua compiler will not be required when this option + is used. + </para></listitem> + </varlistentry> + <varlistentry> + <term><arg choice="plain">--disable-lua-strip</arg></term> + <listitem><para> + Do not strip (i.e. remove debugging symbols from) compiled + Lua sources. + </para></listitem> + </varlistentry> + <varlistentry> + <term><arg choice="plain">--disable-html-doc</arg></term> + <listitem><para> + Do not generate HTML documentation. The documentation is usually + derived from <productname>Docbook</productname> using + <application>XSLTProc</application>. + Disabling this may be useful if you have got some problem + with the tool chain but are satisfied with the precompiled + documentation in the distribution. + </para></listitem> + </varlistentry> + </variablelist> + <para> + Furthermore, you should note that <link linkend="tostring">render-to-string</link> results are not + reminiscent of <productname>lspipat</productname> syntax (used in this document) by default. + For <productname>lspipat</productname> to be able to customize these renderings, + <filename>configure</filename> has to find some <productname>spipat</productname> headers which + are not normally installed. + Therefore it is highly recommended to add spipat's source directory to the C include search path + using the <envar>CPPFLAGS</envar> variable before running <filename>configure</filename>. + </para> + </section> + + <para> + Thus, supposing that <productname>spipat</productname> sources are located in your home directory, + the most common way to install <productname>lspipat</productname> would be: + </para> + <informalexample> + <programlisting><![CDATA[./configure CPPFLAGS=-I~/spipat-0.9.3+ +make install]]></programlisting> + </informalexample> + </chapter> + + <chapter> + <title>Usage</title> + + <para> + After <productname>lspipat</productname> has been installed properly, you will + be able use it in your Lua program by simply requiring <literal>lspipat</literal> + (i.e. <code>require "lspipat"</code>). + </para><para> + The module table will be called <literal>spipat</literal>, but many functions + (especially pattern constructors) will be registered as globals as well. + Also, some operators will be overloaded. + For details on all that (operators, globals, etc.) refer to + <xref linkend="reference"/>. + </para> + </chapter> + + <chapter> + <title>Examples</title> + + <para> + The <filename>samples</filename> directory in the <productname>lspipat</productname> source package + contains some small examples that I hope give you some inspiration on how and where to use + <productname>lspipat</productname>. + </para> + <variablelist> + <varlistentry> + <term><filename>samples/exp2bf.lua</filename></term> + <listitem> + <cmdsynopsis><command>exp2bf.lua</command> <arg choice="plain"><replaceable>expression</replaceable></arg></cmdsynopsis> + <para> + Compiles simple arithmetic expressions to Brainfuck programs that when + executed evaluate the expression and print the result + (8-bit unsigned integer arithmetics). + Prints these programs to <emphasis>stdout</emphasis>. + </para><para> + Use that for whatever you can imagine ;-) + </para> + </listitem> + </varlistentry><varlistentry> + <term><filename>samples/wave.lua</filename></term> + <listitem> + <cmdsynopsis><command>wave.lua</command> <arg choice="plain"><replaceable>wavefile</replaceable></arg></cmdsynopsis> + <para> + Validates/parses <filename>WAV</filename> files + and prints some information about it. + </para><para> + This is an example of how to use <productname>lspipat</productname> + to do pattern matching on "binary" data (formats, protocols). Some + primitives were implemented in Lua for that reason - in the future + there might be a separate C-module to do the encoding/decoding of + integers in different byte-orders more efficiently. + </para> + </listitem> + </varlistentry><varlistentry> + <term><filename>samples/regexp.lua</filename></term> + <listitem><para> + Small regular expression example/test - uses a comprehensive regular + expression describing IPs. + </para></listitem> + </varlistentry> + </variablelist> + </chapter> + + <chapter xml:id="deferring"> + <title>Variable Deferring Techniques</title> + + <para> + In SNOBOL, arbitrary expressions could be <emphasis>deferred</emphasis> + (i.e. their evaluation could be deferred) by using the unary asterisk operator. + With <productname>lspipat</productname> however, you will have to pass functions + (which can be constructed anonymously) to the appropriate constructors to achieve + the same goal. + </para><para> + Deferring expressions which should be combined with other patterns is one + application of the <link linkend="predicate"><command>Pred</command> constructor</link> + and <command>-</command> operator respectively. + </para><para> + Deferring variables is just a special case of deferring expressions. + In this chapter, different ways of optimizing variable deferrings will be + explained using a simple example. + </para><para> + For instance if you would like to <link linkend="assignment">assign</link> a + matched quotation character to a local variable and use that to subsequently match + a simple quote/string, you could use function closures to write something like that: + </para> + <example> + <title>Function Closures for Deferring Purposes</title> + + <programlisting language="lua">local cquote +string = <link linkend="Any">Any</link>("\"'") / function(c) cquote = c end + * <link linkend="Break">Break</link>(function() return cquote end) + * -function() return cquote end</programlisting> + </example> + <para> + You may find this solution a bit verbose, compared with + SNOBOL's elegant syntax. + To save some typing you could define your own constructors + that take the name of a global variable (as a string) + and construct patterns whose arguments are retrieved by + a function closure accessing the globals table. + </para> + <example> + <title>Custom Constructors for Deferring Purposes</title> + + <programlisting language="lua">function _Break(name) + return <link linkend="Break">Break</link>(function() return _G[name] end) +end +function _Pred(name) + return -function() return _G[name] end +end + +string = <link linkend="Any">Any</link>("\"'") / function(c) cquote = c end + * _Break "cquote" + * _Pred "cquote"</programlisting> + </example> + <para> + Of course, if you do not want to pollute the global namespace + your custom functions could just as well access a local table. + Furthermore, you could optimize the code by defining one generic + table access function which is suitable to be used for + <productname>lspipat</productname>'s pattern constructors - + being able to pass so called <emphasis>cookies</emphasis> + to functions comes in handy. + </para> + <example> + <title>Generic Retrievers for Deferring Purposes</title> + + <programlisting language="lua">function getGlobal(name) return _G[name] end +function _Break(name) return <link linkend="Break">Break</link>(getGlobal, name) end +function _Pred(name) return <link linkend="Pred">Pred</link>(getGlobal, name) end +-- ...</programlisting> + </example> + <para> + Fortunately, <productname>lspipat</productname> already defines + such constructors (deferring global variables) for you. + Whereever possible, there will be versions of constructors + with leading underscores that work similar to the ones in + the example above. + You can of course overwrite these constructors, e.g. with + versions accessing a special local table. + </para> + + <section> + <title>Recursive Patterns</title> + + <para> + Recursive patterns can be implemented just as described above. + Supposing you want to match the repetition of the predefined pattern + <literal>P</literal> (<emphasis>greedy</emphasis>) you could write + something like that: + </para> + <example> + <title>Recursive Patterns</title> + + <programlisting language="lua"><![CDATA[foo = P * -"foo" + ""]]></programlisting> + </example> + <para> + Sometimes however when using global variables is inappropriate, + you might want to do the following trick: + </para> + <example> + <title>Recursive Pattern Trick</title> + + <programlisting language="lua"><![CDATA[local function foo() return foo end +foo = P * -foo + ""]]></programlisting> + </example> + <para> + It works because <literal>foo</literal> is still a function in the scope + of the assignment's right side, but a pattern afterwards so the + function - to which no (direct) reference exists anymore - will return + the pattern <literal>foo</literal> after the assignment. + </para> + </section> + </chapter> + + <reference xml:id="reference"> + <title>Module Reference</title> + + <partintro><para> + A compilation of all functions in the <productname>lspipat</productname> + module, global functions registered by the module, methods + and overloaded operators follows. + </para></partintro> + + <refentry xml:id="smatch"> + <refmeta><refentrytitle>smatch</refentrytitle></refmeta> + + <refnamediv> + <refname>smatch</refname> + <refpurpose>Perform pattern match on a subject string</refpurpose> + </refnamediv> + + <refsynopsisdiv> + <cmdsynopsis> + <command>spipat.smatch</command> + ( <arg choice="plain"><replaceable>subject</replaceable></arg> + <arg choice="plain">, <replaceable>pattern</replaceable></arg> + <arg>, <replaceable>flags</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command><replaceable>subject</replaceable>:smatch</command> + ( <arg choice="plain"><replaceable>pattern</replaceable></arg> + <arg>, <replaceable>flags</replaceable></arg> ) + </cmdsynopsis> + </refsynopsisdiv> + + <refsect1> + <title>Description</title> + + <para> + Tries to match <emphasis>pattern</emphasis> against <emphasis>subject</emphasis> + using the given <emphasis>flags</emphasis>. + </para> + </refsect1><refsect1> + <title>Parameters</title> + + <orderedlist> + <listitem>subject (<emphasis>string</emphasis>): A string against which the pattern match will be performed</listitem> + <listitem>pattern (<emphasis>userdata</emphasis>): The pattern used for matching</listitem> + <listitem> + flags (<emphasis>number</emphasis> or <emphasis>nil</emphasis>): + Optional <productname>spipat</productname> flags. + </listitem> + </orderedlist> + + <refsect2 xml:id="spipat_flags"> + <title>Spipat Flags</title> + + <para> + Flags are added (e.g. <code>spipat.match_anchored + spipat.match_debug</code>), + due to the lack of a logical/binary <emphasis>or</emphasis> operator in Lua. + </para> + <itemizedlist> + <listitem> + <literal>spipat.match_anchored</literal>: Match in anchored mode + </listitem><listitem> + <literal>spipat.match_debug</literal>: + Match with progress being printed to <emphasis>stdout</emphasis>. + Useful for pattern debugging as the name suggests. + </listitem> + </itemizedlist> + </refsect2> + </refsect1><refsect1> + <title>Return Values</title> + + <para> + In case of an exception during matching, raises an error. + In case no substring matches, returns a single <emphasis>nil</emphasis> value. + Otherwise returns + </para> + <orderedlist> + <listitem><emphasis>number</emphasis>: Start of matched substring</listitem> + <listitem><emphasis>number</emphasis>: End of matched substring</listitem> + </orderedlist> + </refsect1> + </refentry> + + <refentry xml:id="ssub"> + <refmeta><refentrytitle>ssub</refentrytitle></refmeta> + + <refnamediv> + <refname>ssub</refname> + <refpurpose>Substitute substrings matching a pattern in a subject</refpurpose> + </refnamediv> + + <refsynopsisdiv> + <cmdsynopsis> + <command>spipat.ssub</command> + ( <arg choice="plain"><replaceable>subject</replaceable></arg> + <arg choice="plain">, <replaceable>pattern</replaceable></arg> + <arg choice="plain">, <replaceable>replacement</replaceable></arg> + <arg><arg choice="plain">, <replaceable>n</replaceable></arg><arg>, <replaceable>flags</replaceable></arg></arg> ) + </cmdsynopsis><cmdsynopsis> + <command><replaceable>subject</replaceable>:ssub</command> + ( <arg choice="plain"><replaceable>pattern</replaceable></arg> + <arg choice="plain">, <replaceable>replacement</replaceable></arg> + <arg><arg choice="plain">, <replaceable>n</replaceable></arg><arg>, <replaceable>flags</replaceable></arg></arg> ) + </cmdsynopsis> + </refsynopsisdiv> + + <refsect1> + <title>Description</title> + + <para> + Substitutes regions in <emphasis>subject</emphasis> matching <emphasis>pattern</emphasis> either with a string + if <emphasis>replacement</emphasis> is a string or if <emphasis>replacement</emphasis> is a function, the result + of calling that function. This may be useful for deferring the evaluation of replacement strings + which depend on (are built from) results of the matching process (e.g. call-on-match or call-immediately function executions). + </para> + </refsect1><refsect1> + <title>Parameters</title> + + <orderedlist> + <listitem>subject (<emphasis>string</emphasis>): The subject for the first pattern match</listitem> + <listitem>pattern (<emphasis>userdata</emphasis>): The pattern used for matching</listitem> + <listitem> + replacement (<emphasis>string</emphasis> or <emphasis>function</emphasis>): + Replacement string or a function that's executed after matching to produce the replacement string + </listitem><listitem> + n (<emphasis>number</emphasis> or <emphasis>nil</emphasis>): + Optional maximal number of match/replacement operations. The first match + is performed on <emphasis>subject</emphasis>, subsequent matches on the result of the preceding + replacements. Naturally replacement stops when the pattern does not match anymore. + If <emphasis>n</emphasis> is absent or nil, replacement only stops when <emphasis>pattern</emphasis> + does not match anymore. + </listitem><listitem> + flags (<emphasis>number</emphasis> or <emphasis>nil</emphasis>): + Optional <productname>spipat</productname> flags, as in <xref linkend="spipat_flags"/>. + </listitem> + </orderedlist> + </refsect1><refsect1> + <title>Return Values</title> + + <para> + In case of an exception during matching, raises an error. + Otherwise returns + </para> + <orderedlist> + <listitem> + <emphasis>string</emphasis>: The result of the last replacement performed or the original + <emphasis>subject</emphasis> if no substring matched at all + </listitem><listitem> + <emphasis>number</emphasis>: The number of match/replacement operations actually performed + </listitem> + </orderedlist> + </refsect1><refsect1> + <title>Example</title> + + <example> + <title>Replacements with spipat.ssub</title> + + <screen>> print(spipat.ssub("abc ccC bab", <link linkend="Span">Span</link>("abc") / function(s) str = s end, function() return "["..str:upper().."]" end, 2)) +[ABC] [CC]C BaB +></screen> + </example> + </refsect1> + </refentry> + + <refentry xml:id="siter"> + <refmeta><refentrytitle>siter</refentrytitle></refmeta> + + <refnamediv> + <refname>siter</refname> + <refpurpose>Return iterator of substrings matching a pattern in a subject</refpurpose> + </refnamediv> + + <refsynopsisdiv> + <cmdsynopsis> + <command>spipat.siter</command> + ( <arg choice="plain"><replaceable>subject</replaceable></arg> + <arg choice="plain">, <replaceable>pattern</replaceable></arg> + <arg>, <replaceable>flags</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command><replaceable>subject</replaceable>:siter</command> + ( <arg choice="plain"><replaceable>pattern</replaceable></arg> + <arg>, <replaceable>flags</replaceable></arg> ) + </cmdsynopsis> + </refsynopsisdiv> + + <refsect1> + <title>Description</title> + + <para> + Returns an iterator function performing a <emphasis>pattern</emphasis> match on <emphasis>subject</emphasis> + and returning the matched substring (start/end positions in <emphasis>subject</emphasis>). + Each time it is called, it begins matching where the last substring ended, but using the same + <emphasis>subject</emphasis>. + </para> + </refsect1><refsect1> + <title>Parameters</title> + + <orderedlist> + <listitem>subject (<emphasis>string</emphasis>): The subject used for pattern matching</listitem> + <listitem> + pattern (<emphasis>userdata</emphasis>): The pattern used for matching. + Naturally, anchoring the <emphasis>pattern</emphasis> using any of the possible methods is nonsense. + </listitem><listitem> + flags (<emphasis>number</emphasis> or <emphasis>nil</emphasis>): + Optional <productname>spipat</productname> flags, as in <xref linkend="spipat_flags"/>. + </listitem> + </orderedlist> + </refsect1><refsect1> + <title>Return Values</title> + + <para> + In case of an exception during matching, raises an error. + Otherwise returns + </para> + <orderedlist><listitem> + <emphasis>function</emphasis>: The iterator function. Calling it returns + <orderedlist> + <listitem><emphasis>number</emphasis>: Start of matched substring</listitem> + <listitem><emphasis>number</emphasis>: End of matched substring</listitem> + </orderedlist> + </listitem></orderedlist> + </refsect1><refsect1> + <title>Example</title> + + <example xml:id="example_siter"> + <title>Iterating through substrings with spipat.siter</title> + + <screen>> str = "abc" +> for s, e in str:siter(<link linkend="Len">Len</link>(1)) do print(str:sub(s, e)) end +a +b +c +></screen> + </example> + </refsect1> + </refentry> + + <refentry xml:id="free"> + <refmeta><refentrytitle>free</refentrytitle></refmeta> + + <refnamediv> + <refname>free</refname> + <refpurpose>Finalize pattern</refpurpose> + </refnamediv> + + <refsynopsisdiv> + <cmdsynopsis> + <command>spipat.free</command>( <arg choice="plain"><replaceable>pattern</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command><replaceable>pattern</replaceable>:free</command>() + </cmdsynopsis> + </refsynopsisdiv> + + <refsect1> + <title>Description</title> + + <para> + Finalizes <emphasis>pattern</emphasis>, i.e. frees memory associated with it and unreferences any + other Lua values (other patterns, functions, etc.) so they can get garbage collected. + </para><para> + Finalizing an already finalized <emphasis>pattern</emphasis> does nothing. + Using a finalized <emphasis>pattern</emphasis> in any function or operator working with a pattern + will raise an error. + </para> + <tip><para> + <command>free</command> does early what would otherwise be done when the pattern is garbage + collected, so in most cases you will not need it at all. + It may be useful when you would like to free a large pattern you do not need anymore but + removing all references to that pattern and enforcing a full garbage collection cycle + is not feasible. + </para></tip> + </refsect1><refsect1> + <title>Parameters</title> + + <orderedlist><listitem> + pattern (<emphasis>userdata</emphasis>): The pattern to be finalized + </listitem></orderedlist> + </refsect1><refsect1> + <title>Return Values</title> + + <para> + Returns nothing. + </para> + </refsect1><refsect1> + <title>Example</title> + + <example> + <title>Finalizing a pattern</title> + + <screen>> p = <link linkend="Arb">Arb</link>() +> p:free() +> print(p * "foo") +stdin:1: Pattern already freed +></screen> + </example> + </refsect1> + </refentry> + + <refentry xml:id="conversion"> + <refmeta><refentrytitle>Conversion</refentrytitle></refmeta> + + <refnamediv xml:id="topattern"> + <refname>topattern</refname> + <refpurpose>Convert a value to a pattern</refpurpose> + </refnamediv> + <refnamediv xml:id="tostring"> + <refname>tostring</refname> + <refpurpose>Render a pattern as a string</refpurpose> + </refnamediv> + + <refsynopsisdiv> + <cmdsynopsis> + <command>spipat.topattern</command>( <arg choice="plain"><replaceable>value</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command>topattern</command>( <arg choice="plain"><replaceable>value</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command><replaceable>value</replaceable>:topattern</command>() + </cmdsynopsis> + + <cmdsynopsis> + <command>tostring</command>( <arg choice="plain"><replaceable>pattern</replaceable></arg> ) + </cmdsynopsis> + </refsynopsisdiv> + + <refsect1> + <title>Description</title> + + <para> + <command>topattern</command> creates a pattern for a string or number, matching that string or number. + If <emphasis>value</emphasis> is already a pattern it returns that pattern without modification. + In case of an unsupported <emphasis>value</emphasis> type or miscelleaneous error, topattern always + returns nil. + </para> + <tip><para> + <command>topattern</command> is useful to explicitly create pattern, e.g. when an operator requires + at least one operand to be a pattern but both are strings, numbers or functions. + </para></tip> + <para> + Lua's <link xlink:href="http://www.lua.org/manual/5.1/manual.html#pdf-tostring">built-in <command>tostring</command></link> + function called on a <emphasis>pattern</emphasis> renders that pattern as a string reminiscent of + lspipat's pattern construction syntax. + </para> + <!-- + <note><para> + The C++ wrapper overloads different operators than <productname>lspipat</productname> does, + so <command>tostring</command> return values might look confusing if you are only used to + <productname>lspipat</productname>. + </para></note> + --> + <!-- + <warning><para> + spipat's rendering support is quite buggy and has to be considered <emphasis>experimental</emphasis> + at this point. You are advised not to use <command>tostring</command>. + For debugging purposes you might want to have a look at + <link linkend="dump"><command>spipat.dump</command></link>. + </para></warning> + --> + </refsect1><refsect1> + <title>Example</title> + + <example> + <title>Explicit pattern construction & implicit conversion to strings</title> + + <screen><![CDATA[> print("2" + 3) +5 +> print(topattern("2") + 3) +("2" + "3") +>]]></screen> + </example> + </refsect1> + </refentry> + + <refentry xml:id="dump"> + <refmeta><refentrytitle>dump</refentrytitle></refmeta> + + <refnamediv> + <refname>dump</refname> + <refpurpose>Dump a pattern to stdout</refpurpose> + </refnamediv> + + <refsynopsisdiv> + <cmdsynopsis> + <command>spipat.dump</command>( <arg choice="plain"><replaceable>pattern</replaceable></arg> ) + </cmdsynopsis> + </refsynopsisdiv> + + <refsect1> + <title>Description</title> + + <para> + <command>dump</command> prints information about a <emphasis>pattern</emphasis> to + <emphasis>stdout</emphasis>. + The kind of information displayed is similar to <link linkend="tostring"> + <command>tostring</command>'s rendering</link>. + </para><para> + It is useful for debugging purposes. + </para> + </refsect1><refsect1> + <title>Parameters</title> + + <orderedlist><listitem> + pattern (<emphasis>userdata</emphasis>): The pattern to be dumped + </listitem></orderedlist> + </refsect1><refsect1> + <title>Return Values</title> + + <para> + Returns nothing. + </para> + </refsect1> + </refentry> + + <refentry xml:id="composition"> + <refmeta><refentrytitle>Concatenation and Alternation</refentrytitle></refmeta> + + <refnamediv xml:id="concat"> + <refname>*</refname> + <refpurpose>Concatenate patterns</refpurpose> + </refnamediv> + <refnamediv xml:id="alternate"> + <refname>+</refname> + <refpurpose>Alternate patterns</refpurpose> + </refnamediv> + + <refsynopsisdiv> + <cmdsynopsis> + <arg choice="plain"><replaceable>pattern</replaceable></arg><command>*</command> + <arg choice="plain"><replaceable>value</replaceable></arg> + </cmdsynopsis><cmdsynopsis> + <arg choice="plain"><replaceable>value</replaceable></arg><command>*</command> + <arg choice="plain"><replaceable>pattern</replaceable></arg> + </cmdsynopsis><cmdsynopsis> + <arg choice="plain"><replaceable>pattern</replaceable></arg><command>*</command> + <arg choice="plain"><replaceable>pattern</replaceable></arg> + </cmdsynopsis> + + <cmdsynopsis> + <arg choice="plain"><replaceable>pattern</replaceable></arg><command>+</command> + <arg choice="plain"><replaceable>value</replaceable></arg> + </cmdsynopsis><cmdsynopsis> + <arg choice="plain"><replaceable>value</replaceable></arg><command>+</command> + <arg choice="plain"><replaceable>pattern</replaceable></arg> + </cmdsynopsis><cmdsynopsis> + <arg choice="plain"><replaceable>pattern</replaceable></arg><command>+</command> + <arg choice="plain"><replaceable>pattern</replaceable></arg> + </cmdsynopsis> + </refsynopsisdiv> + + <refsect1> + <title>Description</title> + + <para> + The <command>*</command> operator constructs a concatenation of two <emphasis>values</emphasis> + if at least one of them is a <emphasis>pattern</emphasis> and returns the result as a <emphasis>pattern</emphasis>. + A concatenation matches the left operand immediately followed by the right operand. + </para><para> + The <command>+</command> operator constructs an alternation between two <emphasis>values</emphasis> + if at least one of them is a <emphasis>pattern</emphasis> and returns the result as a <emphasis>pattern</emphasis>. + An alternation matches the left operand and <emphasis>if unsuccessful</emphasis> the right operand. + </para><para> + The non-pattern <emphasis>values</emphasis> may be strings or numbers, which are matched + just like a <emphasis>pattern</emphasis> built by + <link linkend="topattern"><command>topattern</command></link>. + </para> + <note><para> + Even though the <emphasis>patterns</emphasis> participating in the composition will be copied, + references will be kept, so they will not be garbage collected until all <emphasis>patterns</emphasis> + using them are garbage collected. + </para></note> + </refsect1><refsect1> + <title>Return Values</title> + + <orderedlist><listitem> + <emphasis>pattern</emphasis> (userdata): Result of the pattern composition + </listitem></orderedlist> + </refsect1><refsect1> + <title>Example</title> + + <example> + <title>Concatenations and Alternations</title> + + <screen>> pat = (<link linkend="topattern">topattern</link>("ABC") + "AB") * (<link linkend="topattern">topattern</link>("DEF") + "CDE") * (<link linkend="topattern">topattern</link>("GH") + "IJ") +> assert(<link linkend="smatch">spipat.smatch</link>("ABCCDEGH", pat)) +> assert(<link linkend="smatch">spipat.smatch</link>("ABCDEFIJ", pat)) +></screen> + </example> + </refsect1> + </refentry> + + <refentry xml:id="assignment"> + <refmeta><refentrytitle>Assignment Calls</refentrytitle></refmeta> + + <refnamediv> + <refname>%</refname> + <refpurpose>Call Immediately</refpurpose> + </refnamediv><refnamediv> + <refname>/</refname> + <refpurpose>Deferred Call</refpurpose> + </refnamediv> + + <refsynopsisdiv> + <cmdsynopsis> + <arg choice="plain"><replaceable>pattern</replaceable></arg><command>%</command> + <arg choice="plain"><replaceable>function</replaceable></arg> + </cmdsynopsis> + + <cmdsynopsis> + <arg choice="plain"><replaceable>pattern</replaceable></arg><command>/</command> + <arg choice="plain"><replaceable>function</replaceable></arg> + </cmdsynopsis> + </refsynopsisdiv> + + <refsect1> + <title>Description</title> + + <para> + The <command>%</command> operator constructs a pattern matching operand <emphasis>pattern</emphasis> and + calling a Lua <emphasis>function</emphasis> whenever <emphasis>pattern</emphasis> matches during a pattern + match (i.e. <emphasis>function</emphasis> may be called more than once while matching regardless of whether + the match fails or succeeds). + </para><para> + On the other hand, the <command>/</command> operator constructs a pattern matching operand + <emphasis>pattern</emphasis> and calling a Lua <emphasis>function</emphasis> at most once - only if + the match succeeds. + </para><para> + In both cases, <emphasis>function</emphasis> receives the following arguments when called: + <orderedlist><listitem> + <emphasis>string</emphasis>: The substring matched by <emphasis>pattern</emphasis> + </listitem></orderedlist> + Its return value is ignored. + </para> + <note><para> + Unlike assignment operators in <emphasis>SNOBOL</emphasis>, the <command>%</command> and <command>/</command> + operators in Lua have the <link xlink:href="http://www.lua.org/manual/5.1/manual.html#2.5.6">same precedence</link> + as the <link linkend="concat">concatenation operator <command>*</command></link>, + so using parentheses is advised. + </para></note> + <tip><para> + Deferred assignments (assign on match & assign immediately) are not directly possible but can be + easily implemented using function closures as described in <xref linkend="deferring"/>. + </para></tip> + <note> + <para> + Even though the <emphasis>pattern</emphasis> operands will be copied, references will be kept, + so they will not be garbage collected until all <emphasis>patterns</emphasis> + using them are garbage collected. + </para><para> + Furthermore, references to <emphasis>functions</emphasis> will be kept so they will not be + garbage collected until the patterns constructed by the operators are garbage collected. + </para> + </note> + </refsect1><refsect1> + <title>Return Values</title> + + <orderedlist><listitem> + <emphasis>pattern</emphasis> (userdata): Pattern built by the operators + </listitem></orderedlist> + </refsect1><refsect1> + <title>Example</title> + + See <xref linkend="example_siter"/>. + </refsect1> + </refentry> + + <refentry xml:id="cursor"> + <refmeta><refentrytitle>Cursor Assignment Calls</refentrytitle></refmeta> + + <refnamediv> + <refname>Setcur</refname> + <refpurpose>Cursor Assignment</refpurpose> + </refnamediv> + + <refsynopsisdiv> + <cmdsynopsis> + <command>spipat.Setcur</command> + ( <arg choice="plain"><replaceable>function</replaceable></arg><arg>, <replaceable>cookie</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command>Setcur</command> + ( <arg choice="plain"><replaceable>function</replaceable></arg><arg>, <replaceable>cookie</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command>#</command><arg choice="plain"><replaceable>function</replaceable></arg> + </cmdsynopsis> + + <cmdsynopsis> + <command>spipat._Setcur</command>( <arg choice="plain"><replaceable>string</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command>_Setcur</command>( <arg choice="plain"><replaceable>string</replaceable></arg> ) + </cmdsynopsis> + </refsynopsisdiv> + + <refsect1> + <title>Description</title> + + <para> + <command>Setcur</command> is a pattern constructor returning a pattern matching the null string <literal>""</literal> + (i.e. always succeeds when matched) and immediately calling a Lua <emphasis>function</emphasis> when matched. + This <emphasis>function</emphasis> receives the following arguments when called: + <orderedlist> + <listitem> + <emphasis>number</emphasis>: The cursor in the subject string. + In other words, the number of characters matched so far from the beginning of the subject string. + </listitem><listitem> + <emphasis>cookie</emphasis>: Any Lua value specified as a cookie in the pattern constructor or + <emphasis>nil</emphasis> if no cookie was specified. + </listitem> + </orderedlist> + Its return value is ignored. + </para> + <tip><para> + The unary <command>#</command> operator is equivalent to the <command>Setcur</command> constructor with no + <emphasis>cookie</emphasis> specified. + </para></tip> + <para> + <command>_Setcur</command> is similar to <command>Setcur</command> but actually assigns the cursor position to + the global variable whose name is specified by a <emphasis>string</emphasis> value. + This means that <code>_Setcur(str)</code> does not assign the cursor position to the global variable <literal>str</literal> + but rather to the variable with the name <emphasis>str</emphasis> contains, e.g. <literal>foo</literal> if <code>str == "foo"</code>. + So generally <command>_Setcur</command> is equivalent to: + <informalexample> + <programlisting language="lua"><![CDATA[function _Setcur(val) + return #function(str) _G[val] = str end +end]]></programlisting> + </informalexample> + In a similar manner, other kinds of deferred assignments can be implemented + using function closures as described in <xref linkend="deferring"/>. + </para> + <note><para> + References to <emphasis>function</emphasis> and <emphasis>cookie</emphasis> will be kept so they will not be + garbage collected until the pattern constructed by <command>Setcur</command> is garbage collected. + </para></note> + </refsect1><refsect1> + <title>Return Values</title> + + <orderedlist><listitem> + <emphasis>pattern</emphasis> (userdata): Pattern built by the constructor + </listitem></orderedlist> + </refsect1> + <!--<refsect1> + <title>Example</title> + + </refsect1>--> <!-- TODO --> + </refentry> + + <refentry xml:id="predicate"> + <refmeta><refentrytitle>Predicates</refentrytitle></refmeta> + + <refnamediv xml:id="Pred"> + <refname>Pred</refname> + <refpurpose>Predicate Constructor</refpurpose> + </refnamediv> + + <refsynopsisdiv> + <cmdsynopsis> + <command>spipat.Pred</command> + ( <arg choice="plain"><replaceable>function</replaceable></arg><arg>, <replaceable>cookie</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command>Pred</command> + ( <arg choice="plain"><replaceable>function</replaceable></arg><arg>, <replaceable>cookie</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command>-</command><arg choice="plain"><replaceable>function</replaceable></arg> + </cmdsynopsis> + + <cmdsynopsis> + <command>spipat._Pred</command>( <arg choice="plain"><replaceable>string</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command>_Pred</command>( <arg choice="plain"><replaceable>string</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command>-</command><arg choice="plain"><replaceable>string</replaceable></arg> + </cmdsynopsis> + </refsynopsisdiv> + + <refsect1> + <title>Description</title> + + <para> + <command>Pred</command> constructs a pattern which allows you to transparently define its matching behaviour + using a <emphasis>function</emphasis> called when this pattern is attempted to be matched. + It receives the following arguments when invoked: + <orderedlist><listitem> + <emphasis>cookie</emphasis>: Any Lua value specified as a cookie in the pattern constructor or + <emphasis>nil</emphasis> if no cookie was specified. + </listitem></orderedlist> + </para><para> + The <emphasis>function's</emphasis> return value defines the behaviour dynamically, as shown in the following table: + + <table xml:id="dynfnc"> + <title>Dynamic Function Return Values</title> + + <tgroup cols="3" colsep="1" rowsep="1"> + <colspec colname="value"/> + <colspec colname="type"/> + <colspec colname="behaviour"/> + + <thead> + <row> + <entry>Value</entry> + <entry>Type</entry> + <entry>Behaviour</entry> + </row> + </thead><tbody> + <row> + <entry>nil</entry> + <entry>nil</entry> + <entry morerows="1"><para> + Match the <literal>""</literal> string, i.e. succeed. + </para></entry> + </row><row> + <entry>true</entry> + <entry morerows="1">boolean</entry> + </row><row> + <entry>false</entry> + <!-- <entry>boolean</entry> --> + <entry><para> + Pattern match fails, like when using the + <link linkend="Fail"><command>Fail</command> primitive</link>. + </para></entry> + </row><row> + <entry namest="value" nameend="type" align="center">any <emphasis>number</emphasis></entry> + <entry><para> + Try to match that number as a string, as if + <link linkend="topattern">converted to a pattern</link>. + </para></entry> + </row><row> + <entry namest="value" nameend="type" align="center">any <emphasis>string</emphasis></entry> + <entry><para> + Try to match that string, as if + <link linkend="topattern">converted to a pattern</link>. + </para></entry> + </row><row> + <entry namest="value" nameend="type" align="center">any <emphasis>pattern</emphasis></entry> + <entry><para> + Try to match that pattern. Returning a pattern assigned to a variable is the way + to implement recursive patterns. + </para></entry> + </row> + </tbody> + </tgroup> + </table> + </para> + <tip><para> + The unary <command>-</command> operator applied to a <emphasis>function</emphasis> is equivalent + to the <command>Pred</command> constructor with no <emphasis>cookie</emphasis> specified. + </para></tip> + <para> + <command>_Pred</command> is similar to <command>Pred</command> but actually gets the Lua value defining its behaviour from + the global variable whose name is specified by a <emphasis>string</emphasis> value. + This means that <code>_Pred(str)</code> does not get the value from the global variable <literal>str</literal> + but rather from the variable with the name <emphasis>str</emphasis> contains, e.g. <literal>foo</literal> if <code>str == "foo"</code>. + So generally <command>_Pred</command> is equivalent to: + <informalexample> + <programlisting language="lua"><![CDATA[function _Pred(val) + return -function() return _G[val] end +end]]></programlisting> + </informalexample> + In a similar manner, other kinds of variable deferring as well as recursive patterns can be implemented + using function closures as described in <xref linkend="deferring"/>. + </para> + <tip><para> + The unary <command>-</command> operator applied to a <emphasis>string</emphasis> which is not convertable to + a <emphasis>number</emphasis> is equivalent to the <command>_Pred</command> constructor - naturally this + <emphasis>should</emphasis> be true for all global variable names. + This constraint comes from the way Lua handles operations by default (it checks whether it is an arithmetic operation + before evaluating any metamethod - see <link xlink:href="http://www.lua.org/manual/5.1/manual.html#2.8">metatables</link>). + </para></tip> + <note><para> + References to <emphasis>function</emphasis> and <emphasis>cookie</emphasis> will be kept so they will not be + garbage collected until the pattern constructed by <command>Pred</command> is garbage collected. + </para></note> + </refsect1><refsect1> + <title>Return Values</title> + + <orderedlist><listitem> + <emphasis>pattern</emphasis> (userdata): Pattern built by the constructor + </listitem></orderedlist> + </refsect1> + <!--<refsect1> + <title>Example</title> + + </refsect1>--> <!-- TODO --> + </refentry> + + <refentry xml:id="string_primitives"> + <refmeta><refentrytitle>String Primitives</refentrytitle></refmeta> + + <refnamediv xml:id="Any"> + <refname>Any</refname> + <refpurpose>Match any character in a set</refpurpose> + </refnamediv> + <refnamediv> + <refname>NotAny</refname> + <refpurpose>Match any character not in a set</refpurpose> + </refnamediv> + <refnamediv xml:id="Break"> + <refname>Break</refname> + <refpurpose>Match characters up to a break character</refpurpose> + </refnamediv> + <refnamediv> + <refname>BreakX</refname> + <refpurpose>Match characters up to a break character (extending)</refpurpose> + </refnamediv> + <refnamediv> + <refname>NSpan</refname> + <refpurpose>Match nothing or characters from a set</refpurpose> + </refnamediv> + <refnamediv xml:id="Span"> + <refname>Span</refname> + <refpurpose>Match characters from a set</refpurpose> + </refnamediv> + + <refsynopsisdiv> + <cmdsynopsis> + <command><arg>spipat.</arg>Any</command>( <arg choice="plain"><replaceable>set</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command><arg>spipat.</arg>Any</command> + ( <arg choice="plain"><replaceable>function</replaceable></arg><arg>, <replaceable>cookie</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command><arg>spipat.</arg>_Any</command>( <arg choice="plain"><replaceable>string</replaceable></arg> ) + </cmdsynopsis> + + <cmdsynopsis> + <command><arg>spipat.</arg>NotAny</command>( <arg choice="plain"><replaceable>set</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command><arg>spipat.</arg>NotAny</command> + ( <arg choice="plain"><replaceable>function</replaceable></arg><arg>, <replaceable>cookie</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command><arg>spipat.</arg>_NotAny</command>( <arg choice="plain"><replaceable>string</replaceable></arg> ) + </cmdsynopsis> + + <cmdsynopsis> + <command><arg>spipat.</arg>Break</command>( <arg choice="plain"><replaceable>set</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command><arg>spipat.</arg>Break</command> + ( <arg choice="plain"><replaceable>function</replaceable></arg><arg>, <replaceable>cookie</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command><arg>spipat.</arg>_Break</command>( <arg choice="plain"><replaceable>string</replaceable></arg> ) + </cmdsynopsis> + + <cmdsynopsis> + <command><arg>spipat.</arg>BreakX</command>( <arg choice="plain"><replaceable>set</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command><arg>spipat.</arg>BreakX</command> + ( <arg choice="plain"><replaceable>function</replaceable></arg><arg>, <replaceable>cookie</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command><arg>spipat.</arg>_BreakX</command>( <arg choice="plain"><replaceable>string</replaceable></arg> ) + </cmdsynopsis> + + <cmdsynopsis> + <command><arg>spipat.</arg>NSpan</command>( <arg choice="plain"><replaceable>set</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command><arg>spipat.</arg>NSpan</command> + ( <arg choice="plain"><replaceable>function</replaceable></arg><arg>, <replaceable>cookie</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command><arg>spipat.</arg>_NSpan</command>( <arg choice="plain"><replaceable>string</replaceable></arg> ) + </cmdsynopsis> + + <cmdsynopsis> + <command><arg>spipat.</arg>Span</command>( <arg choice="plain"><replaceable>set</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command><arg>spipat.</arg>Span</command> + ( <arg choice="plain"><replaceable>function</replaceable></arg><arg>, <replaceable>cookie</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command><arg>spipat.</arg>_Span</command>( <arg choice="plain"><replaceable>string</replaceable></arg> ) + </cmdsynopsis> + </refsynopsisdiv> + + <refsect1> + <title>Description</title> + + <para> + String primitives are pattern constructors that in their first form all take a <emphasis>string</emphasis> or + <emphasis>number</emphasis> (which is converted to a <emphasis>string</emphasis>) as their sole argument + (<emphasis>set</emphasis>). + </para><para> + In their second form they take a Lua <emphasis>function</emphasis> and an optional <emphasis>cookie</emphasis> + as arguments. When the constructed pattern is about to be matched, the <emphasis>function</emphasis> is called + and is supposed to return a <emphasis>string</emphasis> or <emphasis>number</emphasis> (which is converted to + a <emphasis>string</emphasis>) to supply the primitive's argument dynamically. + It receives the following arguments when invoked: + <orderedlist><listitem> + <emphasis>cookie</emphasis>: Any Lua value specified as a cookie in the pattern constructor or + <emphasis>nil</emphasis> if no cookie was specified. + </listitem></orderedlist> + </para><para> + The primitives with a leading underscore (e.g. <command>_Any</command>) are similar but actually get their argument + from a global variable with the name a <emphasis>string</emphasis> argument contains. + This means that for instance <code>_Any(str)</code> does not get its character set from the global variable <literal>str</literal> + but rather from the variable with the name <emphasis>str</emphasis> contains, e.g. <literal>foo</literal> if <code>str == "foo"</code>. + So generally <command>_Any</command> is equivalent to: + <informalexample> + <programlisting language="lua"><![CDATA[function _Any(val) + return Any(function() return _G[val] end) +end]]></programlisting> + </informalexample> + In a similar manner, other kinds of variable deferring can be implemented + using function closures as described in <xref linkend="deferring"/>. + </para> + <note><para> + References to <emphasis>function</emphasis> and <emphasis>cookie</emphasis> will be kept so they will not be + garbage collected until the pattern constructed is garbage collected. + </para></note> + <para> + The following table describes what these primitives do: + + <!-- Largely copied from the GNAT.SPITBOL docs --> + <table> + <title>String Primitives</title> + + <tgroup cols="2" colsep="1" rowsep="1"> + <colspec colwidth="10%"/> + + <thead> + <row> + <entry>Primitive</entry> + <entry>Description</entry> + </row> + </thead><tbody> + <row> + <entry><command>Any</command>( <arg choice="plain"><replaceable>S</replaceable></arg> )</entry> + <entry><para> + Where S is a string, matches a single character that is + any one of the characters in S. Fails if the current + character is not one of the given set of characters. + </para></entry> + </row><row> + <entry><command>NotAny</command>( <arg choice="plain"><replaceable>S</replaceable></arg> )</entry> + <entry><para> + Where S is a string, matches a single character that is + not one of the characters of S. Fails if the current + characer is one of the given set of characters. + </para></entry> + </row><row> + <entry><command>Break</command>( <arg choice="plain"><replaceable>S</replaceable></arg> )</entry> + <entry><para> + Where S is a string, matches a string of zero or more + characters up to but not including a break character + that is one of the characters given in the string S. + Can match the null string, but cannot match the last + character in the string, since a break character is + required to be present. + </para></entry> + </row><row> + <entry><command>BreakX</command>( <arg choice="plain"><replaceable>S</replaceable></arg> )</entry> + <entry><para> + Where S is a string, behaves exactly like <command>Break</command>(S) when + it first matches, but if a string is successfully matched, + then a susequent failure causes an attempt to extend the + matched string. + </para></entry> + </row><row> + <entry><command>NSpan</command>( <arg choice="plain"><replaceable>S</replaceable></arg> )</entry> + <entry><para> + Where S is a string, matches a string of zero or more + characters that is among the characters given in the + string. Always matches the longest possible such string. + Always succeeds, since it can match the null string. + </para></entry> + </row><row> + <entry><command>Span</command>( <arg choice="plain"><replaceable>S</replaceable></arg> )</entry> + <entry><para> + Where S is a string, matches a string of one or more + characters that is among the characters given in the + string. Always matches the longest possible such string. + Fails if the current character is not one of the given + set of characters. + </para></entry> + </row> + </tbody> + </tgroup> + </table> + </para> + </refsect1><refsect1> + <title>Return Values</title> + + <orderedlist><listitem> + <emphasis>pattern</emphasis> (userdata): Pattern built by the constructor + </listitem></orderedlist> + </refsect1> + <!--<refsect1> + <title>Example</title> + + </refsect1>--> <!-- TODO --> + </refentry> + + <refentry> + <refmeta><refentrytitle>Arbno</refentrytitle></refmeta> + + <refnamediv> + <refname>Arbno</refname> + <refpurpose>Matches a pattern any number of times</refpurpose> + </refnamediv> + + <refsynopsisdiv> + <cmdsynopsis> + <command>spipat.Arbno</command>( <arg choice="plain"><replaceable>P</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command>Arbno</command>( <arg choice="plain"><replaceable>P</replaceable></arg> ) + </cmdsynopsis> + </refsynopsisdiv> + + <refsect1> + <title>Description</title> + + <para> + Where P is any pattern, matches any number of instances + of the pattern, starting with zero occurrences. It is + thus equivalent to <code>("" + (P * ("" + (P * ("" ....))))</code>. + The pattern P may contain any number of pattern elements + including the use of alternation and concatenation. + </para><para> + <command>Arbno</command> is a pattern constructor taking exactly one argument which is + either a <emphasis>pattern</emphasis> or <emphasis>string</emphasis> (which is treated + like it is <link linkend="topattern">converted to a pattern</link> first). + </para> + <note><para> + A reference to <emphasis>P</emphasis> will be kept if it is a <emphasis>pattern</emphasis> + so it will not be garbage collected until the pattern constructed is garbage collected. + </para></note> + </refsect1><refsect1> + <title>Return Values</title> + + <orderedlist><listitem> + <emphasis>pattern</emphasis> (userdata): Pattern built by <command>Arbno</command> + </listitem></orderedlist> + </refsect1> + <!--<refsect1> + <title>Example</title> + + </refsect1>--> <!-- TODO --> + </refentry> + + <refentry> + <refmeta><refentrytitle>Fence</refentrytitle></refmeta> + + <refnamediv> + <refname>Fence</refname> + <refpurpose>Abort match when alternations are sought</refpurpose> + </refnamediv> + + <refsynopsisdiv> + <cmdsynopsis> + <command>spipat.Fence</command>( <arg><replaceable>P</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command>Fence</command>( <arg><replaceable>P</replaceable></arg> ) + </cmdsynopsis> + </refsynopsisdiv> + + <refsect1> + <title>Description</title> + + <para> + <command>Fence</command> is a pattern constructor taking no or exactly one + <emphasis>pattern</emphasis> as an argument. + </para> + <note><para> + A reference to pattern <emphasis>P</emphasis> will be kept so it will not + be garbage collected until the pattern constructed is garbage collected. + </para></note> + <para> + The following table describes what the two versions do: + + <!-- Largely copied from the GNAT.SPITBOL docs --> + <table> + <title>Fence Primitive</title> + + <tgroup cols="2" colsep="1" rowsep="1"> + <colspec colwidth="10%"/> + + <thead> + <row> + <entry>Primitive</entry> + <entry>Description</entry> + </row> + </thead><tbody> + <row> + <entry><command>Fence</command>()</entry> + <entry><para> + Matches the null string at first, and then if a failure + causes alternatives to be sought, aborts the match (like + a Cancel). Note that using <command>Fence</command> at the + start of a pattern has the same effect as matching in anchored mode. + </para></entry> + </row><row> + <entry><command>Fence</command>( <arg choice="plain"><replaceable>P</replaceable></arg> )</entry> + <entry><para> + Where P is a pattern, attempts to match the pattern P + including trying all possible alternatives of P. If none + of these alternatives succeeds, then the <command>Fence</command> pattern + fails. If one alternative succeeds, then the pattern + match proceeds, but on a subsequent failure, no attempt + is made to search for alternative matches of P. The + pattern P may contain any number of pattern elements + including the use of alternatiion and concatenation. + </para></entry> + </row> + </tbody> + </tgroup> + </table> + </para> + </refsect1><refsect1> + <title>Return Values</title> + + <orderedlist><listitem> + <emphasis>pattern</emphasis> (userdata): Pattern built by <command>Fence</command> + </listitem></orderedlist> + </refsect1> + <!--<refsect1> + <title>Example</title> + + </refsect1>--> <!-- TODO --> + </refentry> + + <refentry xml:id="uint_primitives"> + <refmeta><refentrytitle>Integer Primitives</refentrytitle></refmeta> + + <refnamediv xml:id="Len"> + <refname>Len</refname> + <refpurpose>Match a number of characters</refpurpose> + </refnamediv> + <refnamediv> + <refname>Pos</refname> + <refpurpose>Match null string if number of characters have been matched</refpurpose> + </refnamediv> + <refnamediv> + <refname>RPos</refname> + <refpurpose>Match null string if number of characters remain to be matched</refpurpose> + </refnamediv> + <refnamediv> + <refname>Tab</refname> + <refpurpose>Match characters until number of characters have been matched</refpurpose> + </refnamediv> + <refnamediv> + <refname>RTab</refname> + <refpurpose>Match characters until number of characters remain to be matched</refpurpose> + </refnamediv> + + <refsynopsisdiv> + <cmdsynopsis> + <command><arg>spipat.</arg>Len</command>( <arg><replaceable>n</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command><arg>spipat.</arg>Len</command> + ( <arg choice="plain"><replaceable>function</replaceable></arg><arg>, <replaceable>cookie</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command><arg>spipat.</arg>_Len</command>( <arg choice="plain"><replaceable>string</replaceable></arg> ) + </cmdsynopsis> + + <cmdsynopsis> + <command><arg>spipat.</arg>Pos</command>( <arg><replaceable>n</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command><arg>spipat.</arg>Pos</command> + ( <arg choice="plain"><replaceable>function</replaceable></arg><arg>, <replaceable>cookie</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command><arg>spipat.</arg>_Pos</command>( <arg choice="plain"><replaceable>string</replaceable></arg> ) + </cmdsynopsis> + + <cmdsynopsis> + <command><arg>spipat.</arg>RPos</command>( <arg><replaceable>n</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command><arg>spipat.</arg>RPos</command> + ( <arg choice="plain"><replaceable>function</replaceable></arg><arg>, <replaceable>cookie</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command><arg>spipat.</arg>_RPos</command>( <arg choice="plain"><replaceable>string</replaceable></arg> ) + </cmdsynopsis> + + <cmdsynopsis> + <command><arg>spipat.</arg>Tab</command>( <arg><replaceable>n</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command><arg>spipat.</arg>Tab</command> + ( <arg choice="plain"><replaceable>function</replaceable></arg><arg>, <replaceable>cookie</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command><arg>spipat.</arg>_Tab</command>( <arg choice="plain"><replaceable>string</replaceable></arg> ) + </cmdsynopsis> + + <cmdsynopsis> + <command><arg>spipat.</arg>RTab</command>( <arg><replaceable>n</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command><arg>spipat.</arg>RTab</command> + ( <arg choice="plain"><replaceable>function</replaceable></arg><arg>, <replaceable>cookie</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command><arg>spipat.</arg>_RTab</command>( <arg choice="plain"><replaceable>string</replaceable></arg> ) + </cmdsynopsis> + </refsynopsisdiv> + + <refsect1> + <title>Description</title> + + <para> + Integer primitives are pattern constructors that in their first form all take a <emphasis>number</emphasis> or + <emphasis>string</emphasis> (which is converted to a <emphasis>number</emphasis>) as their sole argument + (<emphasis>n</emphasis>). + This <emphasis>number</emphasis> has to be an unsigned integer - sometimes a natural number depending on the + primitive. + </para> + <tip><para> + If the argument is ommitted, <emphasis>zero</emphasis> is assumed. + </para></tip> + <para> + In their second form the primitives take a Lua <emphasis>function</emphasis> and an optional <emphasis>cookie</emphasis> + as arguments. When the constructed pattern is about to be matched, the <emphasis>function</emphasis> is called + and is supposed to return a <emphasis>number</emphasis> or <emphasis>string</emphasis> (which is converted to + a <emphasis>number</emphasis>) to supply the primitive's argument dynamically. + It receives the following arguments when invoked: + <orderedlist><listitem> + <emphasis>cookie</emphasis>: Any Lua value specified as a cookie in the pattern constructor or + <emphasis>nil</emphasis> if no cookie was specified. + </listitem></orderedlist> + </para><para> + The primitives with a leading underscore (e.g. <command>_Len</command>) are similar but actually get their argument + from a global variable with the name a <emphasis>string</emphasis> argument contains. + This means that for instance <code>_Len(str)</code> does not get its argument from the global variable <literal>str</literal> + but rather from the variable with the name <emphasis>str</emphasis> contains, e.g. <literal>foo</literal> if <code>str == "foo"</code>. + So generally <command>_Len</command> is equivalent to: + <informalexample> + <programlisting language="lua"><![CDATA[function _Len(val) + return Len(function() return _G[val] end) +end]]></programlisting> + </informalexample> + In a similar manner, other kinds of variable deferring can be implemented + using function closures as described in <xref linkend="deferring"/>. + </para> + <note><para> + References to <emphasis>function</emphasis> and <emphasis>cookie</emphasis> will be kept so they will not be + garbage collected until the pattern constructed is garbage collected. + </para></note> + <para> + The following table describes what these primitives do: + + <!-- Largely copied from the GNAT.SPITBOL docs --> + <table> + <title>Integer Primitives</title> + + <tgroup cols="2" colsep="1" rowsep="1"> + <colspec colwidth="10%"/> + + <thead> + <row> + <entry>Primitive</entry> + <entry>Description</entry> + </row> + </thead><tbody> + <row> + <entry><command>Len</command>( <arg choice="plain"><replaceable>N</replaceable></arg> )</entry> + <entry><para> + Where N is a natural number, matches the given number of + characters. For example, <code>Len(10)</code> matches any string that + is exactly ten characters long. + </para></entry> + </row><row> + <entry><command>Pos</command>( <arg choice="plain"><replaceable>N</replaceable></arg> )</entry> + <entry><para> + Where N is a natural number, matches the null string + if exactly N characters have been matched so far, and + otherwise fails. + </para></entry> + </row><row> + <entry><command>RPos</command>( <arg choice="plain"><replaceable>N</replaceable></arg> )</entry> + <entry><para> + Where N is a natural number, matches the null string + if exactly N characters remain to be matched, and + otherwise fails. + </para></entry> + </row><row> + <entry><command>Tab</command>( <arg choice="plain"><replaceable>N</replaceable></arg> )</entry> + <entry><para> + Where N is a natural number, matches characters from + the current position until exactly N characters have + been matched in all. Fails if more than N characters + have already been matched. + </para></entry> + </row><row> + <entry><command>RTab</command>( <arg choice="plain"><replaceable>N</replaceable></arg> )</entry> + <entry><para> + Where N is a natural number, matches characters from + the current position until exactly N characters remain + to be matched in the string. Fails if fewer than N + unmatched characters remain in the string. + </para></entry> + </row> + </tbody> + </tgroup> + </table> + </para> + </refsect1><refsect1> + <title>Return Values</title> + + <orderedlist><listitem> + <emphasis>pattern</emphasis> (userdata): Pattern built by the constructor + </listitem></orderedlist> + </refsect1> + <!--<refsect1> + <title>Example</title> + + </refsect1>--> <!-- TODO --> + </refentry> + + <refentry xml:id="misc_primitives"> + <refmeta><refentrytitle>Miscelleanous Primitives</refentrytitle></refmeta> + + <refnamediv xml:id="Arb"> + <refname>Arb</refname> + <refpurpose>Matches any string</refpurpose> + </refnamediv> + <refnamediv> + <refname>Bal</refname> + <refpurpose>Matches parentheses balanced strings</refpurpose> + </refnamediv> + <refnamediv> + <refname>Abort</refname> + <refpurpose>Immediately abort pattern match</refpurpose> + </refnamediv> + <refnamediv xml:id="Fail"> + <refname>Fail</refname> + <refpurpose>Null alternation</refpurpose> + </refnamediv> + <refnamediv> + <refname>Rem</refname> + <refpurpose>Match the entire remaining subject string</refpurpose> + </refnamediv> + <refnamediv> + <refname>Succeed</refname> + <refpurpose>Match the null string in every alternative</refpurpose> + </refnamediv> + + <refsynopsisdiv> + <cmdsynopsis> + <command>spipat.Arb</command>() + </cmdsynopsis><cmdsynopsis> + <command>Arb</command>() + </cmdsynopsis><cmdsynopsis> + <command>spipat.Bal</command>() + </cmdsynopsis><cmdsynopsis> + <command>Bal</command>() + </cmdsynopsis><cmdsynopsis> + <command>spipat.Abort</command>() + </cmdsynopsis><cmdsynopsis> + <command>Abort</command>() + </cmdsynopsis><cmdsynopsis> + <command>spipat.Fail</command>() + </cmdsynopsis><cmdsynopsis> + <command>Fail</command>() + </cmdsynopsis><cmdsynopsis> + <command>spipat.Rem</command>() + </cmdsynopsis><cmdsynopsis> + <command>Rem</command>() + </cmdsynopsis><cmdsynopsis> + <command>spipat.Succeed</command>() + </cmdsynopsis><cmdsynopsis> + <command>Succeed</command>() + </cmdsynopsis> + </refsynopsisdiv> + + <refsect1> + <title>Description</title> + + <para> + These are <emphasis>simple</emphasis> pattern constructor + functions. + </para><para> + The following table describes what these primitives do: + + <!-- Largely copied from the GNAT.SPITBOL docs --> + <table> + <title>Miscelleanous Primitives</title> + + <tgroup cols="2" colsep="1" rowsep="1"> + <colspec colwidth="10%"/> + + <thead> + <row> + <entry>Primitive</entry> + <entry>Description</entry> + </row> + </thead><tbody> + <row> + <entry><command>Arb</command>()</entry> + <entry><para> + Matches any string. First it matches the null string, and + then on a subsequent failure, matches one character, and + then two characters, and so on. It only fails if the + entire remaining string is matched. + </para></entry> + </row><row> + <entry><command>Bal</command>()</entry> + <entry><para> + Matches a non-empty string that is parentheses balanced + with respect to ordinary <literal>()</literal> characters. + Examples of balanced strings are <literal>"ABC"</literal>, + <literal>"A((B)C)"</literal>, and <literal>"A(B)C(D)E"</literal>. + <command>Bal</command> matches the shortest possible balanced + string on the first attempt, and if there is a subsequent failure, + attempts to extend the string. + </para></entry> + </row><row> + <entry><command>Abort</command>()</entry> + <entry><para> + Immediately aborts the entire pattern match, signalling + failure. This is a specialized pattern element, which is + useful in conjunction with some of the special pattern + elements that have side effects. + </para></entry> + </row><row> + <entry><command>Fail</command>()</entry> + <entry><para> + The null alternation. Matches no possible strings, so it + always signals failure. This is a specialized pattern + element, which is useful in conjunction with some of the + special pattern elements that have side effects. + </para></entry> + </row><row> + <entry><command>Rem</command>()</entry> + <entry><para> + Matches from the current point to the last character in + the string. This is a specialized pattern element, which + is useful in conjunction with some of the special pattern + elements that have side effects. + </para></entry> + </row><row> + <entry><command>Succeed</command>()</entry> + <entry><para> + Repeatedly matches the null string (it is equivalent to + the alternation <code>("" + "" + "" ....)</code>. This is a special + pattern element, which is useful in conjunction with some + of the special pattern elements that have side effects. + </para></entry> + </row> + </tbody> + </tgroup> + </table> + </para> + </refsect1><refsect1> + <title>Return Values</title> + + <orderedlist><listitem> + <emphasis>pattern</emphasis> (userdata): Pattern built by the constructor + </listitem></orderedlist> + </refsect1> + <!--<refsect1> + <title>Example</title> + + </refsect1>--> <!-- TODO --> + </refentry> + + <refentry xml:id="RegExp"> + <refmeta><refentrytitle>POSIX Extended Regular Expressions</refentrytitle></refmeta> + + <refnamediv> + <refname>RegExp</refname> + <refpurpose>Matches a pattern equivalent to a regular expression</refpurpose> + </refnamediv> + + <refsynopsisdiv> + <cmdsynopsis> + <command>spipat.RegExp</command> + ( <arg choice="plain"><replaceable>expression</replaceable></arg><arg>, <replaceable>captures</replaceable></arg> ) + </cmdsynopsis><cmdsynopsis> + <command>RegExp</command> + ( <arg choice="plain"><replaceable>expression</replaceable></arg><arg>, <replaceable>captures</replaceable></arg> ) + </cmdsynopsis> + </refsynopsisdiv> + + <refsect1> + <title>Description</title> + + <para> + <command>RegExp</command> constructs from a + <link xlink:href="http://www.opengroup.org/onlinepubs/7990989775/xbd/re.html#tag_007_004"> + POSIX Extended Regular Expression</link>, a pattern that is equivalent to that regular + <emphasis>expression</emphasis> and can be combined with other patterns freely. + </para><para> + It can optionally construct the pattern to save the <emphasis>captures</emphasis> + from a regular expression match in a Lua <emphasis>table</emphasis>. + </para> + <warning><para> + Even though this implementation should support almost all elements of EREs, + it is considered <emphasis>experimental</emphasis>. + You are advised to use the usual pattern construction primitives. + </para></warning> + </refsect1><refsect1> + <title>Parameters</title> + + <orderedlist> + <listitem> + expression (<emphasis>string</emphasis>): The POSIX ERE which is compiled + to a pattern. + </listitem><listitem> + captures (<emphasis>table</emphasis>): Optional table, or more precisely + array, to hold subexpression captures. + Naturally, it has to exist when <command>RegExp</command> is called. + When a subexpression is captured (i.e. the pattern equivalent to what is + enclosed in parentheses), the matching string is added to the + end of the table. + Thus taken that <emphasis>captures</emphasis> is initially empty, if + <code>RegExp("(a(b))", captures)</code> matches, <emphasis>captures</emphasis> + will be <code>{"b", "ab"}</code>. + </listitem> + </orderedlist> + </refsect1><refsect1> + <title>Return Values</title> + + <orderedlist><listitem> + <emphasis>pattern</emphasis> (userdata): Pattern built by <command>RegExp</command> + </listitem></orderedlist> + </refsect1><refsect1> + <title>Example</title> + + <example> + <title>Regular Expressions</title> + + <screen><![CDATA[> print(RegExp "^[[:digit:]]*?(abc\\.|de?)") +Pos(0) * Arbno(Any(<CS>)) * ("abc." + "d" * ("" + "e")) +>]]></screen> + </example> + </refsect1> + </refentry> + </reference> +</book> diff --git a/samples/exp2bf.lua b/samples/exp2bf.lua new file mode 100755 index 0000000..dac59d4 --- /dev/null +++ b/samples/exp2bf.lua @@ -0,0 +1,48 @@ +#!/usr/bin/lua + +require "lspipat" + +function EXIT(...) + io.stderr:write(string.format(...)) + os.exit() +end + +stack = {} +function push(val) table.insert(stack, val) end +function binop() + table.insert(stack, { + l = table.remove(stack), + type = table.remove(stack), + r = table.remove(stack) + }) +end + +function compile(node) + if type(node) ~= "table" then return string.rep("+", tonumber(node)) end + + local ret = compile(node.l)..">"..compile(node.r) + node.type:smatch( Any("+-") % function(o) ret = ret.."[<"..o..">-]<" end + + "*" * -function() ret = ">>"..ret.."[<[<+<+>>-]<[>+<-]>>-]<[-]<<" end + + "/" * -function() ret = ">"..ret.."<[->->+>>+<<<[>>+>[-]<<<-]>>".. + "[<<+>>-]>[-<<[<+>-]<<<+>>>>>]<<<<]>[-]>[-]<<<" end, + spipat.match_anchored ) + + return ret +end + +if #arg ~= 1 then EXIT("Invalid number of parameters\n") end + +space = NSpan(" ") +pre = space * ("(" * -"exp" * space * ")" + Span("0123456789") % push) +post = space * ( Any("+-") % push * -"exp" * -binop + + Any("*/") % push * pre * -binop * -"post" ) + "" +exp = pre * post + +if not arg[1]:smatch(exp * RPos(0), spipat.match_anchored) then EXIT("Invalid expression!\n") end + +src = compile(stack[1]).. + "[>++++++++++<[->->+>>+<<<[>>+>[-]<<<-]>>[<<+>>-]".. + ">[-<<[<+>-]>>>+<]<<<<]>>>>>[<<<<<+>>>>>-]>[>]"..string.rep("+", string.byte("0")).. + "[<]<<<[>>>>[>]<+[<]<<<-]<[-]<]>>>>>>[>]<[.<]" + +print(src) diff --git a/samples/regexp.lua b/samples/regexp.lua new file mode 100644 index 0000000..b9b1da2 --- /dev/null +++ b/samples/regexp.lua @@ -0,0 +1,26 @@ +-- Parse IP address using regular expression compiler + +require "lspipat" + + +exp = [=[^([[:digit:]]{1,3})(\.([[:digit:]]{1,3})){3,3}$]=] + +ip1 = RegExp(exp) +print(ip1) + +local captures = {} +ip2 = RegExp(exp, captures) +print(ip2) + +print(spipat.smatch("192.168.0.1", ip1)) +print(spipat.smatch("192.168.000.001", ip1)) +print(spipat.smatch("192.168.0.XXX", ip1)) + +print(spipat.smatch("192.168.0.1", ip2)) + +-- remove captures due to grouping around "." +table.remove(captures, 3) +table.remove(captures, 5) +table.remove(captures, 7) + +print(table.concat(captures, ".")) diff --git a/samples/wave.lua b/samples/wave.lua new file mode 100755 index 0000000..9fd5adb --- /dev/null +++ b/samples/wave.lua @@ -0,0 +1,81 @@ +#!/usr/bin/lua + +require "lspipat" + +function uint(bytes, val) -- binary integer decoding + return Len(bytes) % function(bin) + bin = littleEndian and bin or bin:reverse() + + local n = 0 + local base = 1 + + for _, c in ipairs{bin:byte(1, bytes)} do + n = n + base * c + base = base * 256 + end + + val(n) + end +end + +function _uint(bytes, name) return uint(bytes, function(n) _G[name] = n end) end +function _uint16(name) return _uint(2, name) end +function _uint32(name) return _uint(4, name) end + +hnd = assert(io.open(arg[1])) + +file = hnd:read("*a") + +hnd:close() + +-- WAVE file "grammar" + +format = "fmt " + * _uint32 "FmtChunkSize" + * _Setcur "FmtStartPos" + * _uint16 "AudioFormat" + * _uint16 "NumChannels" + * _uint32 "SampleRate" + * _uint32 "ByteRate" + * _uint16 "BlockAlign" + * _uint16 "BitsPerSample" + * ( -function() return AudioFormat == 1 end + + _uint16 "ExtraParamSize" + * _Len "ExtraParamSize" ) + * Pos(function() return FmtStartPos + FmtChunkSize end) + * -function() return BitsPerSample % 8 == 0 and + BlockAlign == NumChannels * BitsPerSample/8 and + ByteRate == SampleRate * BlockAlign end + +data = "data" + * _uint32 "DataChunkSize" + * _Len "DataChunkSize" + +misc = Len(4) + * _uint32 "MiscChunkSize" + * _Len "MiscChunkSize" + +wave = (topattern("RIFF") + "RIFX") + % function(id) littleEndian = id == "RIFF" end + * _uint32 "ChunkSize" + * _Setcur "StartPos" + * "WAVE" + * Arbno(format + data + misc) + * Pos(function() return StartPos + ChunkSize end) + * -function() return DataChunkSize % BlockAlign == 0 end + * RPos(0) + +assert(file:smatch(wave, spipat.match_anchored), + arg[1].." is not a valid WAVE file!") + +print(string.format( +"%s\ +Format: %u\ +Channels: %u\ +Samplerate: %u Hz\ +Byterate: %u Hz\ +Bits/Sample: %u\ +Samples: %u", +arg[1], +AudioFormat, NumChannels, SampleRate, ByteRate, BitsPerSample, DataChunkSize / BlockAlign)) +print(os.date("Length:\t\t%T", DataChunkSize / ByteRate + 60*60*23)) diff --git a/spipat-patches/0.9.3+_image.patch b/spipat-patches/0.9.3+_image.patch new file mode 100644 index 0000000..6dee608 --- /dev/null +++ b/spipat-patches/0.9.3+_image.patch @@ -0,0 +1,94 @@ +--- image.c.orig 2010-05-15 02:03:24.000000000 +0200 ++++ image.c 2010-05-18 06:16:02.347573592 +0200 +@@ -346,16 +346,16 @@ + break; + + case PC_Arbno_S: +- Append(sp, sp->strings[E->Pcode]); ++ Append(sp, sp->strings[PC_Arbno_S]); + AppendC(sp, '('); + spipat_image_seq(sp, E->val.Alt, E, false); + AppendC(sp, ')'); + break; + + case PC_Arbno_X: +- Append(sp, sp->strings[E->Pcode]); ++ Append(sp, sp->strings[PC_Arbno_X]); + AppendC(sp, '('); +- spipat_image_seq(sp, E->val.Alt->Pthen, sp->Refs[E->Index - 2], false); ++ spipat_image_seq(sp, E->val.Alt->Pthen, sp->Refs[E->Index - 3], false); + AppendC(sp, ')'); + break; + +@@ -378,10 +378,10 @@ + break; + + case PC_Fence_X: +- Append(sp, sp->strings[E->Pcode]); +- spipat_image_seq (sp, E->Pthen, sp->Refs[E->Index - 1], false); ++ Append(sp, sp->strings[PC_Fence_X]); ++ AppendC(sp, '('); ++ spipat_image_seq(sp, sp->Refs[E->Index]->Pthen, E, false); // PC_R_Enter at Refs[E->Index] + AppendC(sp, ')'); +- ER = sp->Refs[E->Index - 1]->Pthen; + break; + + case PC_Len_Nat: +@@ -410,11 +410,13 @@ + break; + + case PC_Null: +- Append(sp, "\"\""); ++ Append(sp, sp->quote); ++ Append(sp, sp->quote); + break; + + case PC_R_Enter: + sp->Kill_Concat = true; ++ ER = sp->Refs[E->Index - 2]; // allows correct processing of PC_Fence_X & PC_Call_* + break; + + case PC_Rpat: +@@ -486,13 +488,11 @@ + + case PC_Call_Imm: + case PC_Call_OnM: +- // XXX fix me!! + AppendC(sp, '('); +- spipat_image_seq(sp, E, sp->Refs[E->Index - 1], true); ++ spipat_image_seq(sp, sp->Refs[E->Index]->Pthen, E, true); // PC_R_Enter at Refs[E->Index] + Append(sp, sp->strings[E->Pcode]); +- AppendMF(sp, sp->Refs[E->Index - 1]); ++ AppendMF(sp, E); + AppendC(sp, ')'); +- ER = sp->Refs[E->Index - 1]->Pthen; + break; + + case PC_Arb_Y: +--- spipat_image.h.orig 2010-05-15 01:24:44.000000000 +0200 ++++ spipat_image.h 2010-05-15 19:37:32.039626005 +0200 +@@ -78,7 +78,7 @@ + void (*fdf)(struct state *, struct pe *); + }; + +-extern const char *image_strs[PC_NUM_CODES]; ++extern const char *image_strs[]; + + void spipat_image_seq(struct state *sp, + struct pe *E, struct pe *Succ, bool Paren); +--- image_strs.c.orig 2010-05-12 01:50:05.000000000 +0200 ++++ image_strs.c 2010-05-16 06:27:43.228365501 +0200 +@@ -14,6 +14,7 @@ + [PC_Any_VP] = "Any", + [PC_Arb_X] = "Arb", + [PC_Arbno_S] = "Arbno", ++ [PC_Arbno_X] = "Arbno", + [PC_Assign_Imm] = " . ", + [PC_Assign_OnM] = " $ ", + [PC_Bal] = "Bal", +@@ -64,4 +65,5 @@ + [PC_Tab_NF] = "Tab", + [PC_Tab_NP] = "Tab", + [PC_Tab_Nat] = "Tab", ++ [PC_Dynamic_Func] = "Dynamic" + }; diff --git a/src/Makefile.am b/src/Makefile.am new file mode 100644 index 0000000..de6160b --- /dev/null +++ b/src/Makefile.am @@ -0,0 +1,28 @@ +# Main lspipat Automake file +# processed automatically + +AM_CFLAGS = -std=c99 -Wall + +lualib_lspipat_LTLIBRARIES = core.la +core_la_SOURCES = lspipat.c lspipat.h \ + call.c compose.c unary.c render.c misc.c \ + simple.c string.c uint.c +core_la_LDFLAGS = -module + +if LUA_PRECOMPILE + +lualib_DATA = lspipat.out +CLEANFILES = $(lualib_DATA) +EXTRA_DIST = lspipat.lua + +lspipat.out : lspipat.lua + @LUAC@ @LUAC_FLAGS@ -o $@ $< + +install-data-hook : + mv -f $(DESTDIR)$(lualibdir)/lspipat.out $(DESTDIR)$(lualibdir)/lspipat.lua + +else + +dist_lualib_DATA = lspipat.lua + +endif diff --git a/src/call.c b/src/call.c new file mode 100644 index 0000000..818ebdb --- /dev/null +++ b/src/call.c @@ -0,0 +1,86 @@ +/* + * LSPIPAT - LUA SPIPAT WRAPPER + * Copyright (C) 2010, Robin Haberkorn + * License: LGPL + * + * CORE: CALL OPERATIONS + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <string.h> + +#include "lspipat.h" + +static void +callFncHandler(VString matched, void *global __attribute__((unused)), void *local) +{ + struct callRefs *call = local; + lua_State *L = call->cb.L; + + lua_rawgeti(L, LUA_REGISTRYINDEX, call->cb.function); + lua_pushlstring(L, matched.ptr, matched.len); + lua_rawgeti(L, LUA_REGISTRYINDEX, call->cb.cookie); +#if 0 + lua_rawgeti(L, LUA_REGISTRYINDEX, *(int *)global); +#endif + + lua_call(L, 2, 0); +} + +struct callOperator { + struct pat *(*call)(struct pat *, void (*)(VString, void *, void *), void *); +}; + + /* TODO: local cookie support, this would also allow helper functions for assignment to global variables */ + /* at least one parameter is a pattern, the lvalue has to be it */ +static int +genericCallOperator(lua_State *L, struct callOperator spipat) +{ + PATTERN_WRAPPER *new; + struct callRefs *call; + + PATTERN_WRAPPER *lvalue = luaL_checkudata(L, 1, PATTERN_MT); + if (!lvalue->pattern) + L_ERROR(L_FREED); + if (!lua_isfunction(L, 2)) + L_ERROR(L_TYPE); + + if (!(new = lua_newuserdata(L, sizeof(PATTERN_WRAPPER)))) + L_ERROR(L_ALLOC); + memset(new, 0, sizeof(PATTERN_WRAPPER)); + lua_insert(L, 1); /* move wrapper below lvalue */ + + new->type = PATTERN_CALL; + + call = &new->u.call; + call->cb.L = L; + call->cb.cookie = LUA_REFNIL; + call->cb.function = luaL_ref(L, LUA_REGISTRYINDEX); + call->pattern = luaL_ref(L, LUA_REGISTRYINDEX); + /* wrapper at top again */ + + new->pattern = spipat.call(lvalue->pattern, callFncHandler, call); + if (!new->pattern) + L_ERROR(L_ALLOC); + + luaL_getmetatable(L, PATTERN_MT); + lua_setmetatable(L, -2); + + return 1; +} + +#define STDCALLOP(LFNC, SPIFNC) \ + LUA_SIG(LFNC) \ + { \ + return genericCallOperator(L, (struct callOperator) { \ + .call = SPIFNC \ + }); \ + } + +STDCALLOP(l_op_call_immed, spipat_call_immed) +STDCALLOP(l_op_call_onmatch, spipat_call_onmatch) + +#undef STDCALLOP diff --git a/src/compose.c b/src/compose.c new file mode 100644 index 0000000..b8be248 --- /dev/null +++ b/src/compose.c @@ -0,0 +1,106 @@ +/* + * LSPIPAT - LUA SPIPAT WRAPPER + * Copyright (C) 2010, Robin Haberkorn + * License: LGPL + * + * CORE: COMPOSITION OPERATIONS + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <string.h> + +#include "lspipat.h" + +struct composeOperator { + struct pat *(*str_pat)(VString, struct pat *); + struct pat *(*pat_str)(struct pat *, VString); + struct pat *(*chr_pat)(Character, struct pat *); + struct pat *(*pat_chr)(struct pat *, Character); + struct pat *(*pat_pat)(struct pat *, struct pat *); +}; + + /* at least one parameter must be a pattern, both are only allowed to be numbers, strings or patterns */ + +static int +genericComposeOperator(lua_State *L, struct composeOperator spipat) +{ + VString str = VSTRING_INITIALIZER; + PATTERN_WRAPPER *new; + + if (!(new = lua_newuserdata(L, sizeof(PATTERN_WRAPPER)))) + L_ERROR(L_ALLOC); + memset(new, 0, sizeof(PATTERN_WRAPPER)); + lua_insert(L, 1); + + if (lua_isstring(L, 2)) { /* lvalue number/string, rvalue is pattern */ + PATTERN_WRAPPER *rvalue = lua_touserdata(L, 3); + + if (!rvalue->pattern) + L_ERROR(L_FREED); + str.ptr = lua_tolstring(L, 2, (size_t *)&str.len); + + new->type = PATTERN_ONESUBPAT; + new->u.onesubpat.pattern = luaL_ref(L, LUA_REGISTRYINDEX); + + new->pattern = str.len == 1 ? spipat.chr_pat(*str.ptr, rvalue->pattern) + : spipat.str_pat(str, rvalue->pattern); + + lua_pop(L, 1); /* `new' at stack top */ + } else { /* lvalue must be pattern */ + PATTERN_WRAPPER *lvalue = luaL_checkudata(L, 2, PATTERN_MT); + + if (!lvalue->pattern) + L_ERROR(L_FREED); + + if (lua_isstring(L, 3)) { /* rvalue number/string */ + str.ptr = lua_tolstring(L, 3, (size_t *)&str.len); + + new->pattern = str.len == 1 ? spipat.pat_chr(lvalue->pattern, *str.ptr) + : spipat.pat_str(lvalue->pattern, str); + + lua_pop(L, 1); + + new->type = PATTERN_ONESUBPAT; + new->u.onesubpat.pattern = luaL_ref(L, LUA_REGISTRYINDEX); + } else { /* rvalue must be pattern */ + PATTERN_WRAPPER *rvalue = luaL_checkudata(L, 3, PATTERN_MT); + + if (!rvalue->pattern) + L_ERROR(L_FREED); + + new->type = PATTERN_TWOSUBPAT; + new->u.twosubpat.pattern2 = luaL_ref(L, LUA_REGISTRYINDEX); + new->u.twosubpat.pattern1 = luaL_ref(L, LUA_REGISTRYINDEX); + + new->pattern = spipat.pat_pat(lvalue->pattern, rvalue->pattern); + } + } + + if (!new->pattern) + L_ERROR(L_ALLOC); + + luaL_getmetatable(L, PATTERN_MT); + lua_setmetatable(L, -2); + + return 1; +} + +#define STDCOMPOSEOP(LFNC, SPIFNC) \ + LUA_SIG(LFNC) \ + { \ + return genericComposeOperator(L, (struct composeOperator) { \ + .str_pat = SPIFNC##_str_pat, \ + .pat_str = SPIFNC##_pat_str, \ + .chr_pat = SPIFNC##_chr_pat, \ + .pat_chr = SPIFNC##_pat_chr, \ + .pat_pat = SPIFNC##_pat_pat \ + }); \ + } + +STDCOMPOSEOP(l_op_and, spipat_and) +STDCOMPOSEOP(l_op_or, spipat_or) + +#undef STDCOMPOSEOP diff --git a/src/lspipat.c b/src/lspipat.c new file mode 100644 index 0000000..5075961 --- /dev/null +++ b/src/lspipat.c @@ -0,0 +1,336 @@ +/* + * LSPIPAT - LUA SPIPAT WRAPPER + * Copyright (C) 2010, Robin Haberkorn + * License: LGPL + * + * CORE: LIBSPIPAT <-> LUA INTERACTION + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <string.h> + +#include "lspipat.h" + +/* + * Module and Pattern methods + */ + +/* TODO: support global cookies */ + +LUA_SIG(l_smatch) +{ + int top = lua_gettop(L); + + struct spipat_match match; + enum spipat_match_ret ret; + + luaL_argcheck(L, top == 2 || top == 3, top, L_NUMBER); + + memset(&match, 0, sizeof(match)); + match.subject.ptr = luaL_checklstring(L, 1, (size_t *)&match.subject.len); + match.flags = luaL_optint(L, 3, 0); + + if (lua_isstring(L, 2)) { + VString str = VSTRING_INITIALIZER; + str.ptr = lua_tolstring(L, 2, (size_t *)&str.len); + + match.pattern = str.len == 1 ? spipat_char(*str.ptr) + : spipat_string(str); + if (!match.pattern) + L_ERROR(L_ALLOC); + } else { + PATTERN_WRAPPER *wrapper = luaL_checkudata(L, 2, PATTERN_MT); + luaL_argcheck(L, wrapper->pattern, 2, L_FREED); + + match.pattern = wrapper->pattern; + spipat_hold(match.pattern); + } + + ret = spipat_match2(&match); + spipat_free(match.pattern); /* only frees the temporary pattern for string params */ + if (ret == SPIPAT_MATCH_EXCEPTION) + L_ERROR("%s", match.exception); + + if (ret == SPIPAT_MATCH_FAILURE) { + lua_pushnil(L); + return 1; + } + + /* SPIPAT_MATCH_SUCCESS */ + lua_pushinteger(L, match.start); + lua_pushinteger(L, match.stop); + return 2; +} + + /* should we check __topattern operations in types metatables just like tostring does? */ +LUA_SIG(l_topattern) +{ + int top = lua_gettop(L); + + luaL_argcheck(L, top == 1, top, L_NUMBER); + + switch (lua_type(L, 1)) { + case LUA_TNUMBER: + case LUA_TSTRING: { + PATTERN_WRAPPER *wrapper; + VString str = VSTRING_INITIALIZER; + + if (!(wrapper = lua_newuserdata(L, sizeof(PATTERN_WRAPPER)))) + L_ERROR(L_ALLOC); + memset(wrapper, 0, sizeof(PATTERN_WRAPPER)); + + str.ptr = lua_tolstring(L, 1, (size_t *)&str.len); + + wrapper->pattern = str.len == 1 ? spipat_char(*str.ptr) + : spipat_string(str); + if (!wrapper->pattern) + L_ERROR(L_ALLOC); + + luaL_getmetatable(L, PATTERN_MT); + lua_setmetatable(L, -2); + + return 1; + } + case LUA_TUSERDATA: + /* FIXME: check whether it's a PATTERN_MT (without raising an error) */ + return 1; + + default: + return 0; + } + + /* not reached */ +} + +LUA_SIG(l_dump) +{ + PATTERN_WRAPPER *wrapper; + int top = lua_gettop(L); + + luaL_argcheck(L, top == 1, top, L_NUMBER); + wrapper = luaL_checkudata(L, 1, PATTERN_MT); + luaL_argcheck(L, wrapper->pattern, 1, L_FREED); + + spipat_dump(wrapper->pattern); + return 0; +} + +/* + * Finalizer + */ + +static inline void +unrefCallback(struct cbRefs *cb) +{ + luaL_unref(cb->L, LUA_REGISTRYINDEX, cb->function); + luaL_unref(cb->L, LUA_REGISTRYINDEX, cb->cookie); +} + +LUA_SIG(l_finalize_pattern) +{ + int top = lua_gettop(L); + PATTERN_WRAPPER *wrapper; + + luaL_argcheck(L, top == 1, top, L_NUMBER); + wrapper = luaL_checkudata(L, 1, PATTERN_MT); + + if (!wrapper->pattern) + return 0; /* already freed */ + + spipat_free(wrapper->pattern); /* should also release any strings/patterns */ + wrapper->pattern = NULL; /* (remove from registry using release functions) returned by some callback */ + + switch (wrapper->type) { + case PATTERN_OTHER: + break; + case PATTERN_ONESUBPAT: + luaL_unref(L, LUA_REGISTRYINDEX, wrapper->u.onesubpat.pattern); + break; + case PATTERN_TWOSUBPAT: + luaL_unref(L, LUA_REGISTRYINDEX, wrapper->u.twosubpat.pattern1); + luaL_unref(L, LUA_REGISTRYINDEX, wrapper->u.twosubpat.pattern2); + break; + case PATTERN_CALL: + luaL_unref(L, LUA_REGISTRYINDEX, wrapper->u.call.pattern); + unrefCallback(&wrapper->u.call.cb); + break; + case PATTERN_RETFNC: + unrefCallback(&wrapper->u.retfnc.cb); + break; + case PATTERN_SIMPLEFNC: + unrefCallback(&wrapper->u.simplefnc.cb); + break; + default: + L_ERROR(L_MISC); + } + + return 0; +} + +/* + * Cookie release function for function return values + */ + +void +retfncUnrefRet(void *arg) +{ + struct retfncRefs *retfnc = arg; + + luaL_unref(retfnc->cb.L, LUA_REGISTRYINDEX, retfnc->ret); +} + +/* + * Loader + */ + +int +luaopen_lspipat_core(lua_State *L) +{ + static const luaL_Reg spipat[] = { + {"smatch", l_smatch}, + + {"topattern", l_topattern}, + {"dump", l_dump}, + + {"free", l_finalize_pattern}, + {NULL, NULL} + }; + + static const luaL_Reg primitives[] = { + /* string primitives */ + {"Any", l_primitive_any}, + {"Break", l_primitive_break}, + {"BreakX", l_primitive_breakx}, + {"NotAny", l_primitive_notany}, + {"NSpan", l_primitive_nspan}, + {"Span", l_primitive_span}, + + /* unsigned integer primitives */ + {"Len", l_primitive_len}, + {"Pos", l_primitive_pos}, + {"RPos", l_primitive_rpos}, + {"RTab", l_primitive_rtab}, + {"Tab", l_primitive_tab}, + + /* simple primitives */ + {"Abort", l_primitive_abort}, + {"Arb", l_primitive_arb}, + {"Bal", l_primitive_bal}, + {"Fail", l_primitive_fail}, + {"Rem", l_primitive_rem}, + {"Succeed", l_primitive_succeed}, + + /* misc. primitives */ + {"Arbno", l_primitive_arbno}, + {"Fence", l_primitive_fence}, + + /* primitives for unary operators */ + {"Setcur", l_setcur}, + {"Pred", l_pred}, + {NULL, NULL} + }; + + static const luaL_Reg methods[] = { + {"free", l_finalize_pattern}, + {NULL, NULL} + }; + + static const luaL_Reg operations[] = { + {"__mul", l_op_and}, + {"__add", l_op_or}, + {"__mod", l_op_call_immed}, + {"__div", l_op_call_onmatch}, + + {"__tostring", l_tostring}, + + {"__gc", l_finalize_pattern}, + {NULL, NULL} + }; + + static const LUA_CONSTANT mapping[] = { + {"match_debug", SPIPAT_DEBUG}, + {"match_anchored", SPIPAT_ANCHORED}, + {NULL, 0} + }; + + /* module methods, primitives & constants */ + + luaL_register(L, "spipat", spipat); + luaL_register(L, NULL, primitives); + + for (const LUA_CONSTANT *m = mapping; m->lua; m++) { + lua_pushinteger(L, m->c); + lua_setfield(L, -2, m->lua); + } + /* module table should be at stack index 2 */ + + /* global methods & primitives */ + /* FIXME: make it optional (function or submodule) */ + + for (const luaL_Reg *p = primitives; p->name; p++) { + lua_pushcfunction(L, p->func); + lua_setglobal(L, p->name); + } + lua_pushcfunction(L, l_topattern); + lua_setglobal(L, "topattern"); + + /* "patch" string meta table with some methods */ + + lua_pushstring(L, "foo"); /* FIXME: use luaL_getmetatable */ + lua_getmetatable(L, -1); + lua_getfield(L, 2, "_Pred"); /* ok, this is hairy: will only be available if string cannot be converted to a number */ + lua_setfield(L, -2, "__unm"); + lua_getfield(L, -1, "__index"); + lua_pushcfunction(L, l_smatch); /* maybe split "spipat" and use luaL_register */ + lua_setfield(L, -2, "smatch"); + lua_getfield(L, 2, "ssub"); /* maybe write aux function to register Lua functions */ + lua_setfield(L, -2, "ssub"); + lua_getfield(L, 2, "siter"); + lua_setfield(L, -2, "siter"); + lua_pushcfunction(L, l_topattern); + lua_setfield(L, -2, "topattern"); + lua_pop(L, 3); + /* TODO: maybe also set the pattern-specific operations - adapt l_op_or/and to cope with two strings + however, arithmetic ops are already defined for strings if they can be converted to numbers */ + + /* "patch" number meta table with some methods */ + + lua_pushinteger(L, 23); /* FIXME: use luaL_getmetatable */ + if (!lua_getmetatable(L, -1)) { + lua_newtable(L); + lua_newtable(L); + } else + lua_getfield(L, -1, "__index"); + lua_pushcfunction(L, l_topattern); + lua_setfield(L, -2, "topattern"); + lua_setfield(L, -2, "__index"); + lua_setmetatable(L, -2); + lua_pop(L, 1); + + /* "patch" function meta table with operators */ + + lua_pushcfunction(L, l_smatch); /* FIXME: use luaL_getmetatable */ + if (!lua_getmetatable(L, -1)) + lua_newtable(L); + lua_pushcfunction(L, l_setcur); + lua_setfield(L, -2, "__len"); + lua_pushcfunction(L, l_pred); + lua_setfield(L, -2, "__unm"); + lua_setmetatable(L, -2); + lua_pop(L, 1); + + /* pattern metatable: methods & operations/events */ + + luaL_newmetatable(L, PATTERN_MT); + luaL_register(L, NULL, operations); + lua_newtable(L); + luaL_register(L, NULL, methods); + lua_setfield(L, -2, "__index"); + lua_pop(L, 1); + + /* module table should be on top of the stack again */ + return 1; +}
\ No newline at end of file diff --git a/src/lspipat.h b/src/lspipat.h new file mode 100644 index 0000000..dbae4b2 --- /dev/null +++ b/src/lspipat.h @@ -0,0 +1,149 @@ +/* + * LSPIPAT - LUA SPIPAT WRAPPER + * Copyright (C) 2010, Robin Haberkorn + * License: LGPL + */ + +#ifndef _LSPIPAT_H +#define _LSPIPAT_H + +#ifdef HAVE_LUA5_1_LUA_H +#include <lua5.1/lua.h> +#include <lua5.1/lauxlib.h> +#include <lua5.1/lualib.h> +#else +#include <lua.h> +#include <lauxlib.h> +#include <lualib.h> +#endif + +#include <stdint.h> +#include <stdbool.h> +#include <spipat.h> + +#if defined(HAVE_SPIPAT_IMPL_H) && defined(HAVE_SPIPAT_IMAGE_H) +#define USE_SPIPAT_IMAGE_CUSTOM + +#include <spipat_impl.h> +#include <spipat_image.h> + +#endif + +#define VSTRING_INITIALIZER {NULL, 0, NULL, NULL} + + /* Lua error raising */ + +#define L_ALLOC "Allocation error" +#define L_MISC "Miscellaneous error" +#define L_TYPE "Invalid type" +#define L_NUMBER "Invalid number of parameters" +#define L_VALUE "Invalid value for this parameter" +#define L_FREED "Pattern already freed" +#define L_RETURN "Invalid return value" + +#define L_ERROR(MSG, ...) do { \ + luaL_error(L, MSG "\n", ##__VA_ARGS__); \ +} while (0) /* return omitted, so it works for all functions */ + + /* metatables */ + +#define PATTERN_MT "SPIPAT.PATTERN_MT" + + /* structures */ + +typedef struct { + const char *lua; + int c; +} LUA_CONSTANT; + +struct cbRefs { /* wraps references necessary for callbacks */ + lua_State *L; + + int function; + int cookie; /* local cookie */ +}; + +typedef struct { + struct pat *pattern; + + enum { /* Lua reference classes of patterns */ + PATTERN_OTHER = 0, + PATTERN_ONESUBPAT, + PATTERN_TWOSUBPAT, + PATTERN_CALL, + PATTERN_RETFNC, + PATTERN_SIMPLEFNC + } type; + + union { /* references to control garbage collection */ + struct onesubpatRefs { + int pattern; + } onesubpat; + + struct twosubpatRefs { + int pattern1; + int pattern2; + } twosubpat; + + struct callRefs { + int pattern; + struct cbRefs cb; + } call; + + struct retfncRefs { + struct cbRefs cb; + int ret; + } retfnc; + + struct simplefncRefs { + struct cbRefs cb; + } simplefnc; + } u; +} PATTERN_WRAPPER; + + /* Lua functions */ + +#define LUA_SIG(FNC) \ + int FNC(lua_State *L) + +LUA_SIG(l_smatch); +LUA_SIG(l_topattern); +LUA_SIG(l_dump); + +LUA_SIG(l_tostring); + +LUA_SIG(l_op_and); +LUA_SIG(l_op_or); + +LUA_SIG(l_op_call_immed); +LUA_SIG(l_op_call_onmatch); + +LUA_SIG(l_setcur); +LUA_SIG(l_pred); + +LUA_SIG(l_primitive_any); +LUA_SIG(l_primitive_break); +LUA_SIG(l_primitive_breakx); +LUA_SIG(l_primitive_notany); +LUA_SIG(l_primitive_nspan); +LUA_SIG(l_primitive_span); + +LUA_SIG(l_primitive_len); +LUA_SIG(l_primitive_pos); +LUA_SIG(l_primitive_rpos); +LUA_SIG(l_primitive_rtab); +LUA_SIG(l_primitive_tab); + +LUA_SIG(l_primitive_abort); +LUA_SIG(l_primitive_arb); +LUA_SIG(l_primitive_bal); +LUA_SIG(l_primitive_fail); +LUA_SIG(l_primitive_rem); +LUA_SIG(l_primitive_succeed); + +LUA_SIG(l_primitive_arbno); +LUA_SIG(l_primitive_fence); + +void retfncUnrefRet(void *); + +#endif
\ No newline at end of file diff --git a/src/lspipat.lua b/src/lspipat.lua new file mode 100644 index 0000000..9db2082 --- /dev/null +++ b/src/lspipat.lua @@ -0,0 +1,155 @@ +-- +-- LSPIPAT - LUA SPIPAT WRAPPER +-- Copyright (C) 2010, Robin Haberkorn +-- License: LGPL +-- +-- ADDITIONAL METHODS IMPLEMENTED IN LUA +-- + +module("spipat", package.seeall) + +-- +-- Module and Pattern methods +-- + +function ssub(str, pattern, repl, n, flags) + assert(type(repl) == "string" or type(repl) == "function", + "Invalid replacement specified!") + assert(type(n) == "nil" or type(n) == "number", + "Invalid repeat value specified!") + + local cMatches = 0 + repeat + -- cares about the remaining checks + local s, e = smatch(str, pattern, flags) + if not s then break end + + local res = type(repl) == "string" and repl or repl(s, e) + assert(type(res) == "nil" or type(res) == "string", + "Replacement function returned invalid value!") + + if res then str = str:sub(1, s - 1)..res..str:sub(e + 1) end + + if type(n) == "number" then n = n - 1 end + cMatches = cMatches + 1 + until n == 0 + + return str, cMatches +end + +function siter(str, pattern, flags) + local endPos = 0 + pattern = Pos(function() return endPos end) * Arb() * + #function(p) startPos = p + 1 end * pattern * #function(p) endPos = p end + + return function() + if not smatch(str, pattern, flags) then return end + return startPos, endPos + end +end + +-- +-- Primitives (shortcuts for deferring global variables) +-- + +local function genericSetGlobal(val, name) _G[name] = val end + +function _Setcur(name) return Setcur(genericSetGlobal, name) end +_G._Setcur = _Setcur +-- unfortunately, we can't register this as __len to strings... + + -- NOTE: if global `name' is of an invalid type, + -- lspipat will raise an error automatically +local function genericGetGlobal(name) return _G[name] end + +for _, prim in ipairs{ + "Pred", -- _Pred will be registered as __unm to strings + "Any", "Break", "BreakX", "NotAny", "NSpan", "Span", -- string primitives + "Len", "Pos", "RPos", "RTab", "Tab" -- number primitives +} do + local _prim = "_"..prim + + spipat[_prim] = function(name) return spipat[prim](genericGetGlobal, name) end + _G[_prim] = spipat[_prim] +end + +-- FIXME: local cookie support for assignments -> shortcuts for assignment of global variables + +-- +-- POSIX Extended Regular Expressions To SPITBOL Pattern Compiler +-- + +function RegExp(str, captures) + assert(type(captures) == "nil" or type(captures) == "table", + "Invalid captures table given!") + + local stack = {} + local function push(v) table.insert(stack, v) end + local function pop() return table.remove(stack) end + local r2p = {["."] = Len(1), ["^"] = Pos(0), ["$"] = RPos(0)} + + local set + local function add(c) table.insert(set, c) return c end + + local classes = { + blank = " \t", + punct = [[-!"#$%&'()*+,./:;<=>?@[\]^_`{|}~]], + lower = "abcdefghijklmnopqrstuvwxyz", + digit = "0123456789" + } + classes.upper = classes.lower:upper() + classes.alpha = classes.upper..classes.lower + classes.alnum = classes.alpha..classes.digit + classes.word = classes.alnum.."_" + classes.xdigit = classes.upper:sub(1, 6)..classes.lower:sub(1, 6)..classes.digit + classes.space = classes.blank.."\r\n\v\f" + -- TODO: some character classes are still missing... + + local function exp() return exp end + local function seq() return seq end + local atom = ( "\\" * (Len(1) % push) + + NotAny(".[]^$()*+?|{}") % push + + Any(".^$") % function(r) push(r2p[r]) end + + "[" * ( "^" * -function() push(NotAny) set = {} end + + -function() push(Any) set = {} end ) + * (topattern("]") % add + "") + * Arbno( "[:" * (Break(":") % push) * ":]" * -function() return add(classes[pop()]) ~= nil end + + Len(1) * "-" * Len(1) + % function(range) for c = range:byte(), range:byte(3) do add(string.char(c)) end end + + Len(1) % add ) + * "]" * -function() push(pop()(table.concat(set))) end + + "(" * -exp * ")" + * -function() if captures then + push(topattern(pop()) / function(cap) table.insert(captures, cap) end) end end ) + * ( "*" * ( "?" * -function() push(Arbno(pop())) end + + -function() local r; r = pop() * -function() return r end + "" + push(r) end ) + + "+" * -function() local r; r = pop() * (-function() return r end + "") + push(r) end + + "?" * -function() push(topattern("") + pop()) end + + "{" * ( Span(classes.digit) % push ) * "," + * ( Span(classes.digit) + % function(max) local min, c = pop() + local r; r = pop() * -function() c = c + 1 + return c >= tonumber(max) or r end + "" + push(-function() c = 0 end * r * -function() return c >= tonumber(min) end) end ) + * "}" + + "" ) + seq = ( atom * -function() local rvalue, lvalue = pop(), pop() + push(type(lvalue) == "string" and type(rvalue) == "string" and + lvalue..rvalue or lvalue * rvalue) end + * (-seq + "") + "" ) + * ( "|" * -exp * -function() local pat = pop() push(pop() + topattern(pat)) end + + "" ) + exp = atom * seq + + assert(smatch(str, exp * RPos(0), match_anchored), + "Invalid regular expression!") + + return stack[1] +end +_G.RegExp = RegExp + + -- load C core, also registers Lua functions into metatables we cannot + -- access from Lua +require "lspipat.core"
\ No newline at end of file diff --git a/src/misc.c b/src/misc.c new file mode 100644 index 0000000..2bea4c8 --- /dev/null +++ b/src/misc.c @@ -0,0 +1,89 @@ +/* + * LSPIPAT - LUA SPIPAT WRAPPER + * Copyright (C) 2010, Robin Haberkorn + * License: LGPL + * + * CORE: MISCELLANEOUS PRIMITIVES/CONSTRUCTORS + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <string.h> + +#include "lspipat.h" + +LUA_SIG(l_primitive_arbno) +{ + int top = lua_gettop(L); + + VString str = VSTRING_INITIALIZER; + PATTERN_WRAPPER *new; + + luaL_argcheck(L, top == 1, top, L_NUMBER); + + if (!(new = lua_newuserdata(L, sizeof(PATTERN_WRAPPER)))) + L_ERROR(L_ALLOC); + memset(new, 0, sizeof(PATTERN_WRAPPER)); + + if (lua_isstring(L, 1)) { + str.ptr = lua_tolstring(L, 1, (size_t *)&str.len); + + new->pattern = str.len == 1 ? spipat_arbno_chr(*str.ptr) + : spipat_arbno_str(str); + } else { + PATTERN_WRAPPER *wrapper = luaL_checkudata(L, 1, PATTERN_MT); + luaL_argcheck(L, wrapper->pattern, 1, L_FREED); + + lua_insert(L, 1); /* move wrapper to bottom */ + new->type = PATTERN_ONESUBPAT; + new->u.onesubpat.pattern = luaL_ref(L, LUA_REGISTRYINDEX); + /* wrapper at top again */ + + new->pattern = spipat_arbno(wrapper->pattern); + } + + if (!new->pattern) + L_ERROR(L_ALLOC); + + luaL_getmetatable(L, PATTERN_MT); + lua_setmetatable(L, -2); + + return 1; + +} + +LUA_SIG(l_primitive_fence) +{ + int top = lua_gettop(L); + PATTERN_WRAPPER *new; + + luaL_argcheck(L, top < 2, top, L_NUMBER); + + if (!(new = lua_newuserdata(L, sizeof(PATTERN_WRAPPER)))) + L_ERROR(L_ALLOC); + memset(new, 0, sizeof(PATTERN_WRAPPER)); + + if (!top) { + new->pattern = spipat_fence_simple(); + } else { + PATTERN_WRAPPER *wrapper = luaL_checkudata(L, 1, PATTERN_MT); + luaL_argcheck(L, wrapper->pattern, 1, L_FREED); + + lua_insert(L, 1); /* move wrapper to bottom */ + new->type = PATTERN_ONESUBPAT; + new->u.onesubpat.pattern = luaL_ref(L, LUA_REGISTRYINDEX); + /* wrapper at top again */ + + new->pattern = spipat_fence_function(wrapper->pattern); + } + + if (!new->pattern) + L_ERROR(L_ALLOC); + + luaL_getmetatable(L, PATTERN_MT); + lua_setmetatable(L, -2); + + return 1; +} diff --git a/src/render.c b/src/render.c new file mode 100644 index 0000000..28c96ce --- /dev/null +++ b/src/render.c @@ -0,0 +1,138 @@ +/* + * LSPIPAT - LUA SPIPAT WRAPPER + * Copyright (C) 2010, Robin Haberkorn + * License: LGPL + * + * CORE: RENDER-TO-STRING OPERATION + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdlib.h> + +#include "lspipat.h" + +#ifdef USE_SPIPAT_IMAGE_CUSTOM + +static const char *lspipat_strs[] = { /* left out elements that can't be constructed with lspipat */ + [PC_Abort] = "Abort", + [PC_Alt] = " + ", + [PC_Any_CH] = "Any", + [PC_Any_CS] = "Any", + [PC_Any_VF] = "Any", + [PC_Arb_X] = "Arb", + [PC_Arbno_S] = "Arbno", + [PC_Arbno_X] = "Arbno", + [PC_Bal] = "Bal", + [PC_BreakX_CH] = "BreakX", + [PC_BreakX_CS] = "BreakX", + [PC_BreakX_VF] = "BreakX", + [PC_Break_CH] = "Break", + [PC_Break_CS] = "Break", + [PC_Break_VF] = "Break", + [PC_Call_Imm] = " % ", + [PC_Call_OnM] = " / ", + [PC_Fail] = "Fail", + [PC_Fence] = "Fence", + [PC_Fence_X] = "Fence", + [PC_Len_NF] = "Len", + [PC_Len_Nat] = "Len", + [PC_NSpan_CH] = "NSpan", + [PC_NSpan_CS] = "NSpan", + [PC_NSpan_VF] = "NSpan", + [PC_NotAny_CH] = "NotAny", + [PC_NotAny_CS] = "NotAny", + [PC_NotAny_VF] = "NotAny", + [PC_Null] = "\"\"", + [PC_Pos_NF] = "Pos", + [PC_Pos_Nat] = "Pos", + [PC_RPos_NF] = "RPos", + [PC_RPos_Nat] = "RPos", + [PC_RTab_NF] = "RTab", + [PC_RTab_Nat] = "RTab", + [PC_Rem] = "Rem", + [PC_Setcur_Func] = "#", /* also: Setcur */ + [PC_Span_CH] = "Span", + [PC_Span_CS] = "Span", + [PC_Span_VF] = "Span", + [PC_Succeed] = "Succeed", + [PC_Tab_NF] = "Tab", + [PC_Tab_Nat] = "Tab", + [PC_Dynamic_Func] = "-" /* also: Pred */ +}; + +/* TODO: Define some custom Append functions */ + +LUA_SIG(l_tostring) +{ + char buf[1024], *bigbuf; + unsigned len; + + struct state state = { + .ptr = buf, + .size = sizeof(buf) + }; + + PATTERN_WRAPPER *wrapper = lua_touserdata(L, 1); /* parameter is definitely a pattern */ + + luaL_argcheck(L, wrapper->pattern, 1, L_FREED); + + spipat_image_init_state(&state); + state.cquote = "\""; + state.concat = " * "; + state.strings = lspipat_strs; + + len = spipat_image_custom(&state, wrapper->pattern); + if (len < sizeof(buf)) { + lua_pushlstring(L, buf, len); + return 1; + } + + /* sizeof(buf) was too small */ + + state.size = len + 1; + if (!(bigbuf = malloc(state.size))) + L_ERROR(L_ALLOC); + state.ptr = bigbuf; + + spipat_image_custom(&state, wrapper->pattern); + lua_pushlstring(L, bigbuf, len); + + free(bigbuf); + + return 1; +} + +#else + +LUA_SIG(l_tostring) +{ + char buf[1024], *bigbuf; + unsigned len; + + PATTERN_WRAPPER *wrapper = lua_touserdata(L, 1); /* parameter is definitely a pattern */ + + luaL_argcheck(L, wrapper->pattern, 1, L_FREED); + + len = spipat_image(wrapper->pattern, buf, sizeof(buf)); + if (len < sizeof(buf)) { + lua_pushlstring(L, buf, len); + return 1; + } + + /* sizeof(buf) was too small */ + + if (!(bigbuf = malloc(len + 1))) + L_ERROR(L_ALLOC); + + spipat_image(wrapper->pattern, bigbuf, len + 1); + lua_pushlstring(L, bigbuf, len); + + free(bigbuf); + + return 1; +} + +#endif diff --git a/src/simple.c b/src/simple.c new file mode 100644 index 0000000..d407129 --- /dev/null +++ b/src/simple.c @@ -0,0 +1,57 @@ +/* + * LSPIPAT - LUA SPIPAT WRAPPER + * Copyright (C) 2010, Robin Haberkorn + * License: LGPL + * + * CORE: SIMPLE PRIMITIVES/CONSTRUCTORS + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <string.h> + +#include "lspipat.h" + +struct simplePrimitive { + struct pat *(*simple)(void); +}; + +static int +genericSimplePrimitive(lua_State *L, struct simplePrimitive spipat) +{ + int top = lua_gettop(L); + PATTERN_WRAPPER *new; + + luaL_argcheck(L, !top, top, L_NUMBER); + + if (!(new = lua_newuserdata(L, sizeof(PATTERN_WRAPPER)))) + L_ERROR(L_ALLOC); + memset(new, 0, sizeof(PATTERN_WRAPPER)); + + if (!(new->pattern = spipat.simple())) + L_ERROR(L_ALLOC); + + luaL_getmetatable(L, PATTERN_MT); + lua_setmetatable(L, -2); + + return 1; +} + +#define STDSIMPLEPRIM(LFNC, SPIFNC) \ + LUA_SIG(LFNC) \ + { \ + return genericSimplePrimitive(L, (struct simplePrimitive) { \ + .simple = SPIFNC \ + }); \ + } + +STDSIMPLEPRIM(l_primitive_abort, spipat_abort) +STDSIMPLEPRIM(l_primitive_arb, spipat_arb) +STDSIMPLEPRIM(l_primitive_bal, spipat_bal) +STDSIMPLEPRIM(l_primitive_fail, spipat_fail) +STDSIMPLEPRIM(l_primitive_rem, spipat_rem) +STDSIMPLEPRIM(l_primitive_succeed, spipat_succeed) + +#undef STDSIMPLEPRIM diff --git a/src/string.c b/src/string.c new file mode 100644 index 0000000..43f266c --- /dev/null +++ b/src/string.c @@ -0,0 +1,131 @@ +/* + * LSPIPAT - LUA SPIPAT WRAPPER + * Copyright (C) 2010, Robin Haberkorn + * License: LGPL + * + * CORE: STRING PRIMITIVES/CONSTRUCTORS + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <string.h> + +#include "lspipat.h" + +static VString +stringFncHandler(void *global __attribute__((unused)), void *local) +{ + struct retfncRefs *retfnc = local; + lua_State *L = retfnc->cb.L; + + VString ret; + + lua_rawgeti(L, LUA_REGISTRYINDEX, retfnc->cb.function); + lua_rawgeti(L, LUA_REGISTRYINDEX, retfnc->cb.cookie); +#if 0 + lua_rawgeti(L, LUA_REGISTRYINDEX, *(int *)global); +#endif + + lua_call(L, 1, 1); + + if (!lua_isstring(L, -1)) { + lua_pop(L, 1); + L_ERROR(L_RETURN); /* FIXME: is it safe to raise errors? */ + } + + ret.ptr = lua_tolstring(L, -1, (size_t *)&ret.len); + ret.release = retfncUnrefRet; + ret.cookie = retfnc; + + /* + * Register value so Lua doesn't free it until spipat + * doesn't need it anymore (value has to be popped now) + */ + retfnc->ret = luaL_ref(L, LUA_REGISTRYINDEX); + return ret; +} + +struct stringPrimitive { + struct pat *(*chr)(Character); + struct pat *(*str)(VString); + struct pat *(*fnc)(VString (*)(void *, void*), void *); +}; + +static int +genericStringPrimitive(lua_State *L, struct stringPrimitive spipat) +{ + int top = lua_gettop(L); + + VString str = VSTRING_INITIALIZER; + PATTERN_WRAPPER *new; + + luaL_argcheck(L, top, top, L_NUMBER); + + if (!(new = lua_newuserdata(L, sizeof(PATTERN_WRAPPER)))) + L_ERROR(L_ALLOC); + memset(new, 0, sizeof(PATTERN_WRAPPER)); + + switch (lua_type(L, 1)) { + case LUA_TNUMBER: + case LUA_TSTRING: + luaL_argcheck(L, top == 1, top, L_NUMBER); + + str.ptr = lua_tolstring(L, 1, (size_t *)&str.len); + + new->pattern = str.len == 1 ? spipat.chr(*str.ptr) + : spipat.str(str); + break; + + case LUA_TFUNCTION: { + struct retfncRefs *retfnc; + + luaL_argcheck(L, top == 1 || top == 2, top, L_NUMBER); + + lua_insert(L, 1); /* move wrapper to bottom */ + if (top == 1) + lua_pushnil(L); /* cookie will be LUA_REFNIL */ + + new->type = PATTERN_RETFNC; + + retfnc = &new->u.retfnc; + retfnc->cb.L = L; + retfnc->cb.cookie = luaL_ref(L, LUA_REGISTRYINDEX); + retfnc->cb.function = luaL_ref(L, LUA_REGISTRYINDEX); + /* wrapper at top again */ + + new->pattern = spipat.fnc(stringFncHandler, retfnc); + break; + } + default: + return luaL_argerror(L, 1, L_TYPE); + } + + if (!new->pattern) + L_ERROR(L_ALLOC); + + luaL_getmetatable(L, PATTERN_MT); + lua_setmetatable(L, -2); + + return 1; +} + +#define STDSTRPRIM(LFNC, SPIFNC) \ + LUA_SIG(LFNC) \ + { \ + return genericStringPrimitive(L, (struct stringPrimitive) { \ + .chr = SPIFNC##_chr, \ + .str = SPIFNC##_str, \ + .fnc = SPIFNC##_fnc \ + }); \ + } + +STDSTRPRIM(l_primitive_any, spipat_any) +STDSTRPRIM(l_primitive_break, spipat_break) +STDSTRPRIM(l_primitive_breakx, spipat_breakx) +STDSTRPRIM(l_primitive_notany, spipat_notany) +STDSTRPRIM(l_primitive_nspan, spipat_nspan) +STDSTRPRIM(l_primitive_span, spipat_span) + +#undef STDSTRPRIM diff --git a/src/uint.c b/src/uint.c new file mode 100644 index 0000000..1a0530b --- /dev/null +++ b/src/uint.c @@ -0,0 +1,128 @@ +/* + * LSPIPAT - LUA SPIPAT WRAPPER + * Copyright (C) 2010, Robin Haberkorn + * License: LGPL + * + * CORE: UNSIGNED INTEGER PRIMITIVES/CONSTRUCTORS + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <string.h> + +#include "lspipat.h" + +static unsigned +uintFncHandler(void *global __attribute__((unused)), void *local) +{ + struct simplefncRefs *simplefnc = local; + lua_State *L = simplefnc->cb.L; + + int val; + + lua_rawgeti(L, LUA_REGISTRYINDEX, simplefnc->cb.function); + lua_rawgeti(L, LUA_REGISTRYINDEX, simplefnc->cb.cookie); +#if 0 + lua_rawgeti(L, LUA_REGISTRYINDEX, *(int *)global); +#endif + + lua_call(L, 1, 1); + + if (lua_isnil(L, -1)) { + lua_pop(L, 1); + return 0; /* default value */ + } + + if (!lua_isnumber(L, -1)) { + lua_pop(L, 1); + L_ERROR(L_RETURN); /* FIXME: is it safe to raise errors? */ + } + + val = lua_tointeger(L, -1); + lua_pop(L, 1); + if (val < 0) + L_ERROR(L_RETURN); + + return (unsigned)val; +} + +struct uintPrimitive { + struct pat *(*uint)(unsigned); + struct pat *(*fnc)(unsigned (*)(void *, void *), void *); +}; + +static int +genericUIntPrimitive(lua_State *L, struct uintPrimitive spipat) +{ + int top = lua_gettop(L); + PATTERN_WRAPPER *new; + + if (!(new = lua_newuserdata(L, sizeof(PATTERN_WRAPPER)))) + L_ERROR(L_ALLOC); + memset(new, 0, sizeof(PATTERN_WRAPPER)); + + switch (lua_type(L, 1)) { + case LUA_TNONE: + case LUA_TNIL: + case LUA_TNUMBER: + case LUA_TSTRING: { + int val; + + luaL_argcheck(L, top < 2, top, L_NUMBER); + val = luaL_optint(L, 1, 0); + luaL_argcheck(L, val >= 0, 1, L_VALUE); + + new->pattern = spipat.uint((unsigned)val); + break; + } + case LUA_TFUNCTION: { + struct simplefncRefs *simplefnc; + + luaL_argcheck(L, top == 1 || top == 2, top, L_NUMBER); + + lua_insert(L, 1); /* move wrapper to bottom */ + if (top == 1) + lua_pushnil(L); /* cookie will be LUA_REFNIL */ + + new->type = PATTERN_SIMPLEFNC; + + simplefnc = &new->u.simplefnc; + simplefnc->cb.L = L; + simplefnc->cb.cookie = luaL_ref(L, LUA_REGISTRYINDEX); + simplefnc->cb.function = luaL_ref(L, LUA_REGISTRYINDEX); + /* wrapper at top again */ + + new->pattern = spipat.fnc(uintFncHandler, simplefnc); + break; + } + default: + return luaL_argerror(L, 1, L_TYPE); + } + + if (!new->pattern) + L_ERROR(L_ALLOC); + + luaL_getmetatable(L, PATTERN_MT); + lua_setmetatable(L, -2); + + return 1; +} + +#define STDUINTPRIM(LFNC, SPIFNC) \ + LUA_SIG(LFNC) \ + { \ + return genericUIntPrimitive(L, (struct uintPrimitive) { \ + .uint = SPIFNC, \ + .fnc = SPIFNC##_fnc \ + }); \ + } + +STDUINTPRIM(l_primitive_len, spipat_len) +STDUINTPRIM(l_primitive_pos, spipat_pos) +STDUINTPRIM(l_primitive_rpos, spipat_rpos) +STDUINTPRIM(l_primitive_rtab, spipat_rtab) +STDUINTPRIM(l_primitive_tab, spipat_tab) + +#undef STDUINTPRIM diff --git a/src/unary.c b/src/unary.c new file mode 100644 index 0000000..b3f40ef --- /dev/null +++ b/src/unary.c @@ -0,0 +1,182 @@ +/* + * LSPIPAT - LUA SPIPAT WRAPPER + * Copyright (C) 2010, Robin Haberkorn + * License: LGPL + * + * CORE: UNARY OPERATORS (ALSO USED AS PRIMITIVES) + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdbool.h> +#include <string.h> + +#include "lspipat.h" + +static void +setcurFncHandler(unsigned pos, void *global __attribute__((unused)), void *local) +{ + struct simplefncRefs *simplefnc = local; + lua_State *L = simplefnc->cb.L; + + lua_rawgeti(L, LUA_REGISTRYINDEX, simplefnc->cb.function); + lua_pushinteger(L, pos); + lua_rawgeti(L, LUA_REGISTRYINDEX, simplefnc->cb.cookie); +#if 0 + lua_rawgeti(L, LUA_REGISTRYINDEX, *(int *)global); +#endif + + lua_call(L, 2, 0); +} + + /* + * if called as an operator, there will be a nil on top of the stack + */ +LUA_SIG(l_setcur) +{ + int top = lua_gettop(L); + + PATTERN_WRAPPER *new; + struct simplefncRefs *simplefnc; + + luaL_argcheck(L, top == 1 || top == 2, top, L_NUMBER); + luaL_argcheck(L, lua_isfunction(L, 1), 1, L_TYPE); + + if (!(new = lua_newuserdata(L, sizeof(PATTERN_WRAPPER)))) + L_ERROR(L_ALLOC); + memset(new, 0, sizeof(PATTERN_WRAPPER)); + + lua_insert(L, 1); /* move wrapper to bottom */ + if (top == 1) + lua_pushnil(L); /* cookie will be LUA_REFNIL */ + + new->type = PATTERN_SIMPLEFNC; + + simplefnc = &new->u.simplefnc; + simplefnc->cb.L = L; + simplefnc->cb.cookie = luaL_ref(L, LUA_REGISTRYINDEX); + simplefnc->cb.function = luaL_ref(L, LUA_REGISTRYINDEX); + /* wrapper at top again */ + + new->pattern = spipat_setcur_fnc(setcurFncHandler, simplefnc); + if (!new->pattern) + L_ERROR(L_ALLOC); + + luaL_getmetatable(L, PATTERN_MT); + lua_setmetatable(L, -2); + + return 1; +} + +static void +predFncHandler(void *global __attribute__((unused)), void *local, struct dynamic *ret) +{ + struct retfncRefs *retfnc = local; + lua_State *L = retfnc->cb.L; + + lua_rawgeti(L, LUA_REGISTRYINDEX, retfnc->cb.function); + lua_rawgeti(L, LUA_REGISTRYINDEX, retfnc->cb.cookie); +#if 0 + lua_rawgeti(L, LUA_REGISTRYINDEX, *(int *)global); +#endif + + lua_call(L, 1, 1); + + switch (lua_type(L, -1)) { + case LUA_TNUMBER: + case LUA_TSTRING: { + VString *str = &ret->val.str; + + ret->type = DY_VSTR; + + str->ptr = lua_tolstring(L, -1, (size_t *)&str->len); + str->release = retfncUnrefRet; + str->cookie = retfnc; + + /* + * Register value so Lua doesn't free it until spipat + * doesn't need it anymore (value has to be popped now) + */ + retfnc->ret = luaL_ref(L, LUA_REGISTRYINDEX); + return; + } + case LUA_TNIL: /* default behaviour: continue matching (Succeed) */ + ret->type = DY_BOOL; + + ret->val.pred = true; + + lua_pop(L, 1); + return; + + case LUA_TBOOLEAN: + ret->type = DY_BOOL; + + ret->val.pred = lua_toboolean(L, -1); + + lua_pop(L, 1); + return; + + case LUA_TUSERDATA: { /* FIXME: check whether it's really a Pattern */ + PATTERN_WRAPPER *wrapper = lua_touserdata(L, -1); + if (!wrapper->pattern) { + lua_pop(L, 1); + L_ERROR(L_RETURN); + } + + ret->type = DY_PAT; + + ret->val.pat.p = wrapper->pattern; + ret->val.pat.release = retfncUnrefRet; + ret->val.pat.cookie = retfnc; + + /* + * Register value so Lua doesn't free it until spipat + * doesn't need it anymore (value has to be popped now) + */ + retfnc->ret = luaL_ref(L, LUA_REGISTRYINDEX); + return; + } + default: + lua_pop(L, 1); + L_ERROR(L_RETURN); + } + + /* not reached */ +} + +LUA_SIG(l_pred) +{ + int top = lua_gettop(L); + + PATTERN_WRAPPER *new; + struct retfncRefs *retfnc; + + luaL_argcheck(L, top == 1 || top == 2, top, L_NUMBER); + luaL_argcheck(L, lua_isfunction(L, 1), 1, L_TYPE); + + if (!(new = lua_newuserdata(L, sizeof(PATTERN_WRAPPER)))) + L_ERROR(L_ALLOC); + memset(new, 0, sizeof(PATTERN_WRAPPER)); + + lua_insert(L, 1); /* move wrapper to bottom */ + if (top == 1) + lua_pushnil(L); /* cookie will be LUA_REFNIL */ + + new->type = PATTERN_RETFNC; + retfnc = &new->u.retfnc; + retfnc->cb.L = L; + retfnc->cb.cookie = luaL_ref(L, LUA_REGISTRYINDEX); + retfnc->cb.function = luaL_ref(L, LUA_REGISTRYINDEX); + /* wrapper at top again */ + + new->pattern = spipat_dynamic_fnc(predFncHandler, retfnc); + if (!new->pattern) + L_ERROR(L_ALLOC); + + luaL_getmetatable(L, PATTERN_MT); + lua_setmetatable(L, -2); + + return 1; +} |