From d3148268857e01116d5d3c99ac0a43bc6a54b13c Mon Sep 17 00:00:00 2001 From: Robin Haberkorn Date: Wed, 29 Dec 2010 16:26:25 +0100 Subject: initial checkin (v0.1 release) --- AUTHORS | 0 COPYING | 165 +++ ChangeLog | 0 Makefile.am | 7 + NEWS | 0 README | 1 + configure.ac | 118 +++ doc/Makefile.am | 15 + doc/html_custom.xsl | 6 + doc/html_titlepage.spec.xml | 688 +++++++++++++ doc/lspipat.png | Bin 0 -> 4266 bytes doc/pattern.txt | 1017 +++++++++++++++++++ doc/reference.xml | 2005 +++++++++++++++++++++++++++++++++++++ samples/exp2bf.lua | 48 + samples/regexp.lua | 26 + samples/wave.lua | 81 ++ spipat-patches/0.9.3+_image.patch | 94 ++ src/Makefile.am | 28 + src/call.c | 86 ++ src/compose.c | 106 ++ src/lspipat.c | 336 +++++++ src/lspipat.h | 149 +++ src/lspipat.lua | 155 +++ src/misc.c | 89 ++ src/render.c | 138 +++ src/simple.c | 57 ++ src/string.c | 131 +++ src/uint.c | 128 +++ src/unary.c | 182 ++++ 29 files changed, 5856 insertions(+) create mode 100644 AUTHORS create mode 100644 COPYING create mode 100644 ChangeLog create mode 100644 Makefile.am create mode 100644 NEWS create mode 100644 README create mode 100644 configure.ac create mode 100644 doc/Makefile.am create mode 100644 doc/html_custom.xsl create mode 100644 doc/html_titlepage.spec.xml create mode 100644 doc/lspipat.png create mode 100644 doc/pattern.txt create mode 100644 doc/reference.xml create mode 100755 samples/exp2bf.lua create mode 100644 samples/regexp.lua create mode 100755 samples/wave.lua create mode 100644 spipat-patches/0.9.3+_image.patch create mode 100644 src/Makefile.am create mode 100644 src/call.c create mode 100644 src/compose.c create mode 100644 src/lspipat.c create mode 100644 src/lspipat.h create mode 100644 src/lspipat.lua create mode 100644 src/misc.c create mode 100644 src/render.c create mode 100644 src/simple.c create mode 100644 src/string.c create mode 100644 src/uint.c create mode 100644 src/unary.c diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..e69de29 diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..65c5ca8 --- /dev/null +++ b/COPYING @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..e69de29 diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 0000000..aed7d2a --- /dev/null +++ b/Makefile.am @@ -0,0 +1,7 @@ + +SUBDIRS = src doc + +EXTRA_DIST = samples/exp2bf.lua \ + samples/wave.lua \ + samples/regexp.lua \ + spipat-patches/0.9.3+_image.patch diff --git a/NEWS b/NEWS new file mode 100644 index 0000000..e69de29 diff --git a/README b/README new file mode 100644 index 0000000..b5ebca5 --- /dev/null +++ b/README @@ -0,0 +1 @@ +read doc/reference.html for Installation Notes & Module Reference diff --git a/configure.ac b/configure.ac new file mode 100644 index 0000000..6e5cb13 --- /dev/null +++ b/configure.ac @@ -0,0 +1,118 @@ +# -*- Autoconf -*- +# Process this file with autoconf to produce a configure script. + +AC_PREREQ([2.64]) +AC_INIT([SNOBOL/SPITBOL Patterns for Lua], [0.1], [robin.haberkorn@googlemail.com], [lspipat]) +AM_INIT_AUTOMAKE +AC_CONFIG_SRCDIR([src/lspipat.c]) +AC_CONFIG_HEADERS([config.h]) + +LT_INIT([disable-static]) + +# Checks for programs. +AC_PROG_CC +AC_PROG_INSTALL + +# Lua compiler (optional) +AC_CHECK_PROG(LUAC, luac5.1, luac5.1) +AC_CHECK_PROG(LUAC, luac, luac) + +LUAC_FLAGS= +AC_SUBST(LUAC_FLAGS) + +# XSLTProc (optional) +AC_CHECK_PROG(XSLTPROC, xsltproc, xsltproc) + +XSLT_FLAGS="--xinclude" +AC_SUBST(XSLT_FLAGS) + +# Checks for libraries. + +# libspipat +AC_CHECK_LIB([spipat], [spipat_match2], , [ + AC_MSG_ERROR([libspipat (Spipat library) not found!]) +]) + +# liblua (care about different distributions) +AC_CHECK_LIB([lua5.1], [lua_call], , [ + AC_CHECK_LIB([lua], [lua_call], , [ + AC_MSG_ERROR([liblua (Lua 5.1 library) not found!]) + ]) +]) + +# Checks for header files. +AC_CHECK_HEADERS([stdint.h stdlib.h string.h stdbool.h]) + +# spipat headers +AC_CHECK_HEADERS([spipat.h], , [ + AC_MSG_ERROR([Spipat header not found!]) +], [ + #include + #include +]) + +# spipat_impl.h/spipat_image.h are not installed by default and are thus optional +AC_CHECK_HEADERS([spipat_impl.h spipat_image.h], , [ + AC_MSG_WARN([Optional spipat header not found! You are strongly encouraged to specify spipat's source dir in CPPFLAGS.]) +], [ + #include + #include + #include +]) + +# Lua headers (care about different distributions) +AC_CHECK_HEADERS([lua5.1/lua.h lua5.1/lauxlib.h lua5.1/lualib.h], , [ + AC_CHECK_HEADERS([lua.h lauxlib.h lualib.h], , [ + AC_MSG_ERROR([Lua 5.1 headers not found!]) + ]) + break +]) + +# Checks for typedefs, structures, and compiler characteristics. +AC_C_INLINE +AC_TYPE_SIZE_T +AC_HEADER_STDBOOL +AC_TYPE_UINT32_T + +# Checks for library functions. +AC_CHECK_FUNCS([memset]) + +# Package Configuration + +AC_ARG_ENABLE(lua-libdir, + AS_HELP_STRING([--enable-lua-libdir=DIR], + [Install lspipat into this directory (default is LIBDIR/lua/5.1)]), + [lualibdir=$enable_lua_libdir], [lualibdir=${libdir}/lua/5.1]) +AC_SUBST(lualibdir) +lualib_lspipatdir=${lualibdir}/lspipat +AC_SUBST(lualib_lspipatdir) + +AC_ARG_ENABLE(lua-precompile, + AS_HELP_STRING([--enable-lua-precompile], + [Enable precompilation of Lua source files (default is yes)]), + [lua_precompile=$enableval], [lua_precompile=yes]) +AM_CONDITIONAL([LUA_PRECOMPILE], [test x$lua_precompile = xyes]) + +if test x$lua_precompile = xyes -a x$LUAC = x; then + AC_MSG_ERROR([Lua chunk precompilation enabled, but Lua 5.1 compiler not found! Try --disable-lua-precompile.]) +fi + +AC_ARG_ENABLE(lua-strip, + AS_HELP_STRING([--enable-lua-strip], + [Strip compiled Lua source files (default is yes)]), + [lua_strip=$enableval], [lua_strip=yes]) +if test x$lua_strip = xyes; then + LUAC_FLAGS+=" -s" +fi + +AC_ARG_ENABLE(html-doc, + AS_HELP_STRING([--enable-html-doc], + [Generate HTML documentation (default is yes)]), + [html_doc=$enableval], [html_doc=yes]) + +if test x$html_doc = xyes -a x$XSLTPROC = x; then + AC_MSG_ERROR([Enabled generating documentation, but XSLTProc not found! Try --disable-html-doc.]) +fi + +AC_CONFIG_FILES([Makefile src/Makefile doc/Makefile]) +AC_OUTPUT diff --git a/doc/Makefile.am b/doc/Makefile.am new file mode 100644 index 0000000..f8ff134 --- /dev/null +++ b/doc/Makefile.am @@ -0,0 +1,15 @@ +## Docbook processing - very simplistic at the moment + +DB_URI = http://docbook.sourceforge.net/release/xsl/current + +dist_doc_DATA = pattern.txt + +dist_html_DATA = reference.html lspipat.png +CLEANFILES = reference.html html_titlepage.xsl +EXTRA_DIST = reference.xml html_custom.xsl html_titlepage.spec.xml + +reference.html : reference.xml html_custom.xsl html_titlepage.xsl + @XSLTPROC@ @XSLT_FLAGS@ -o $@ html_custom.xsl $< + +html_titlepage.xsl : html_titlepage.spec.xml + @XSLTPROC@ @XSLT_FLAGS@ -o $@ $(DB_URI)/template/titlepage.xsl $< diff --git a/doc/html_custom.xsl b/doc/html_custom.xsl new file mode 100644 index 0000000..af94064 --- /dev/null +++ b/doc/html_custom.xsl @@ -0,0 +1,6 @@ + + + + + + diff --git a/doc/html_titlepage.spec.xml b/doc/html_titlepage.spec.xml new file mode 100644 index 0000000..ea44036 --- /dev/null +++ b/doc/html_titlepage.spec.xml @@ -0,0 +1,688 @@ + + + + + + + + + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <hr/> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="set" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <hr/> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="book" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <mediaobject/> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <hr/> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="part" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title + t:force="1" + t:named-template="division.title" + param:node="ancestor-or-self::part[1]"/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<t:titlepage t:element="partintro" t:wrapper="div"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="reference" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <hr/> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="refentry" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> +<!-- uncomment this if you want refentry titlepages + <title t:force="1" + t:named-template="refentry.title" + param:node="ancestor-or-self::refentry[1]"/> +--> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator/> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + + <t:titlepage t:element="dedication" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title + t:force="1" + t:named-template="component.title" + param:node="ancestor-or-self::dedication[1]"/> + <subtitle/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="acknowledgements" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title + t:force="1" + t:named-template="component.title" + param:node="ancestor-or-self::acknowledgements[1]"/> + <subtitle/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="preface" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="chapter" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="appendix" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="section" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <xsl:if test="count(parent::*)='0'"><hr/></xsl:if> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<t:titlepage t:element="sect1" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <xsl:if test="count(parent::*)='0'"><hr/></xsl:if> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<t:titlepage t:element="sect2" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <xsl:if test="count(parent::*)='0'"><hr/></xsl:if> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<t:titlepage t:element="sect3" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <xsl:if test="count(parent::*)='0'"><hr/></xsl:if> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<t:titlepage t:element="sect4" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <xsl:if test="count(parent::*)='0'"><hr/></xsl:if> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<t:titlepage t:element="sect5" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <xsl:if test="count(parent::*)='0'"><hr/></xsl:if> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<t:titlepage t:element="simplesect" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <xsl:if test="count(parent::*)='0'"><hr/></xsl:if> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="bibliography" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title + t:force="1" + t:named-template="component.title" + param:node="ancestor-or-self::bibliography[1]"/> + <subtitle/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="glossary" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title + t:force="1" + t:named-template="component.title" + param:node="ancestor-or-self::glossary[1]"/> + <subtitle/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="index" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title + t:force="1" + t:named-template="component.title" + param:node="ancestor-or-self::index[1]"/> + <subtitle/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="setindex" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title + t:force="1" + t:named-template="component.title" + param:node="ancestor-or-self::setindex[1]"/> + <subtitle/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +</t:templates> diff --git a/doc/lspipat.png b/doc/lspipat.png new file mode 100644 index 0000000..317751d Binary files /dev/null and b/doc/lspipat.png differ diff --git a/doc/pattern.txt b/doc/pattern.txt new file mode 100644 index 0000000..e0d1719 --- /dev/null +++ b/doc/pattern.txt @@ -0,0 +1,1017 @@ +Copyright (C) 2007,2008, Philip L. Budne +Copyright (C) 1998-2005, AdaCore + +This documentation (and the underlying software) developed from the +GNAT.SPITBOL.PATTERNS package of GNU Ada. GNAT was originally +developed by the GNAT team at New York University. Extensive +contributions were provided by Ada Core Technologies Inc. + +SPITBOL-like pattern construction and matching + +This child package of GNAT.SPITBOL provides a complete implementation +of the SPITBOL-like pattern construction and matching operations. This +package is based on Macro-SPITBOL created by Robert Dewar. + +This is a completely general patterm matching package based on the +pattern language of SNOBOL4, as implemented in SPITBOL. The pattern +language is modeled on context free grammars, with context sensitive +extensions that provide full (type 0) computational capabilities. + +------------------------------- +Pattern Matching Tutorial +------------------------------- + +A pattern matching operation (a call to one of the Match subprograms) +takes a subject string and a pattern, and optionally a replacement +string. The replacement string option is only allowed if the subject +is a variable. + +The pattern is matched against the subject string, and either the +match fails, or it succeeds matching a contiguous substring. If a +replacement string is specified, then the subject string is modified +by replacing the matched substring with the given replacement. + +Concatenation and Alternation +============================= + +A pattern consists of a series of pattern elements. The pattern is +built up using either the concatenation operator: + + A & B + + which means match A followed immediately by matching B, or the + alternation operator: + + A | B + + which means first attempt to match A, and then if that does not + succeed, match B. + + There is full backtracking, which means that if a given pattern + element fails to match, then previous alternatives are matched. + For example if we have the pattern: + + (A | B) & (C | D) & (E | F) + + First we attempt to match A, if that succeeds, then we go on to try + to match C, and if that succeeds, we go on to try to match E. If E + fails, then we try F. If F fails, then we go back and try matching + D instead of C. Let's make this explicit using a specific example, + and introducing the simplest kind of pattern element, which is a + literal string. The meaning of this pattern element is simply to + match the characters that correspond to the string characters. Now + let's rewrite the above pattern form with specific string literals + as the pattern elements: + + ("ABC" | "AB") & ("DEF" | "CDE") & ("GH" | "IJ") + + The following strings will be attempted in sequence: + + ABC . DEF . GH + ABC . DEF . IJ + ABC . CDE . GH + ABC . CDE . IJ + AB . DEF . GH + AB . DEF . IJ + AB . CDE . GH + AB . CDE . IJ + + Here we use the dot simply to separate the pieces of the string + matched by the three separate elements. + + Moving the Start Point + ====================== + + A pattern is not required to match starting at the first character + of the string, and is not required to match to the end of the string. + The first attempt does indeed attempt to match starting at the first + character of the string, trying all the possible alternatives. But + if all alternatives fail, then the starting point of the match is + moved one character, and all possible alternatives are attempted at + the new anchor point. + + The entire match fails only when every possible starting point has + been attempted. As an example, suppose that we had the subject + string + + "ABABCDEIJKL" + + matched using the pattern in the previous example: + + ("ABC" | "AB") & ("DEF" | "CDE") & ("GH" | "IJ") + + would succeed, afer two anchor point moves: + + "ABABCDEIJKL" + ^^^^^^^ + matched + section + + This mode of pattern matching is called the unanchored mode. It is + also possible to put the pattern matcher into anchored mode by + setting the global variable Anchored_Mode to True. This will cause + all subsequent matches to be performed in anchored mode, where the + match is required to start at the first character. + + We will also see later how the effect of an anchored match can be + obtained for a single specified anchor point if this is desired. + + Other Pattern Elements + ====================== + + In addition to strings (or single characters), there are many special + pattern elements that correspond to special predefined alternations: + + Arb Matches any string. First it matches the null string, and + then on a subsequent failure, matches one character, and + then two characters, and so on. It only fails if the + entire remaining string is matched. + + Bal Matches a non-empty string that is parentheses balanced + with respect to ordinary () characters. Examples of + balanced strings are "ABC", "A((B)C)", and "A(B)C(D)E". + Bal matches the shortest possible balanced string on the + first attempt, and if there is a subsequent failure, + attempts to extend the string. + + Abort Immediately aborts the entire pattern match, signalling + failure. This is a specialized pattern element, which is + useful in conjunction with some of the special pattern + elements that have side effects. + + Fail The null alternation. Matches no possible strings, so it + always signals failure. This is a specialized pattern + element, which is useful in conjunction with some of the + special pattern elements that have side effects. + + Fence Matches the null string at first, and then if a failure + causes alternatives to be sought, aborts the match (like + a Cancel). Note that using Fence at the start of a pattern + has the same effect as matching in anchored mode. + + Rem Matches from the current point to the last character in + the string. This is a specialized pattern element, which + is useful in conjunction with some of the special pattern + elements that have side effects. + + Succeed Repeatedly matches the null string (it is equivalent to + the alternation ("" | "" | "" ....). This is a special + pattern element, which is useful in conjunction with some + of the special pattern elements that have side effects. + + Pattern Construction Functions + ============================== + + The following functions construct additional pattern elements + + Any(S) Where S is a string, matches a single character that is + any one of the characters in S. Fails if the current + character is not one of the given set of characters. + + Arbno(P) Where P is any pattern, matches any number of instances + of the pattern, starting with zero occurrences. It is + thus equivalent to ("" | (P & ("" | (P & ("" ....)))). + The pattern P may contain any number of pattern elements + including the use of alternatiion and concatenation. + + Break(S) Where S is a string, matches a string of zero or more + characters up to but not including a break character + that is one of the characters given in the string S. + Can match the null string, but cannot match the last + character in the string, since a break character is + required to be present. + + BreakX(S) Where S is a string, behaves exactly like Break(S) when + it first matches, but if a string is successfully matched, + then a susequent failure causes an attempt to extend the + matched string. + + Fence(P) Where P is a pattern, attempts to match the pattern P + including trying all possible alternatives of P. If none + of these alternatives succeeds, then the Fence pattern + fails. If one alternative succeeds, then the pattern + match proceeds, but on a subsequent failure, no attempt + is made to search for alternative matches of P. The + pattern P may contain any number of pattern elements + including the use of alternatiion and concatenation. + + Len(N) Where N is a natural number, matches the given number of + characters. For example, Len(10) matches any string that + is exactly ten characters long. + + NotAny(S) Where S is a string, matches a single character that is + not one of the characters of S. Fails if the current + characer is one of the given set of characters. + + NSpan(S) Where S is a string, matches a string of zero or more + characters that is among the characters given in the + string. Always matches the longest possible such string. + Always succeeds, since it can match the null string. + + Pos(N) Where N is a natural number, matches the null string + if exactly N characters have been matched so far, and + otherwise fails. + + Rpos(N) Where N is a natural number, matches the null string + if exactly N characters remain to be matched, and + otherwise fails. + + Rtab(N) Where N is a natural number, matches characters from + the current position until exactly N characters remain + to be matched in the string. Fails if fewer than N + unmatched characters remain in the string. + + Tab(N) Where N is a natural number, matches characters from + the current position until exactly N characters have + been matched in all. Fails if more than N characters + have already been matched. + + Span(S) Where S is a string, matches a string of one or more + characters that is among the characters given in the + string. Always matches the longest possible such string. + Fails if the current character is not one of the given + set of characters. + + Recursive Pattern Matching + ========================== + + The plus operator (+P) where P is a pattern variable, creates + a recursive pattern that will, at pattern matching time, follow + the pointer to obtain the referenced pattern, and then match this + pattern. This may be used to construct recursive patterns. Consider + for example: + + P := ("A" | ("B" & (+P))) + + On the first attempt, this pattern attempts to match the string "A". + If this fails, then the alternative matches a "B", followed by an + attempt to match P again. This second attempt first attempts to + match "A", and so on. The result is a pattern that will match a + string of B's followed by a single A. + + This particular example could simply be written as NSpan('B') & 'A', + but the use of recursive patterns in the general case can construct + complex patterns which could not otherwise be built. + + Pattern Assignment Operations + ============================= + + In addition to the overall result of a pattern match, which indicates + success or failure, it is often useful to be able to keep track of + the pieces of the subject string that are matched by individual + pattern elements, or subsections of the pattern. + + The pattern assignment operators allow this capability. The first + form is the immediate assignment: + + P * S + + Here P is an arbitrary pattern, and S is a variable of type VString + that will be set to the substring matched by P. This assignment + happens during pattern matching, so if P matches more than once, + then the assignment happens more than once. + + The deferred assignment operation: + + P ** S + + avoids these multiple assignments by deferring the assignment to the + end of the match. If the entire match is successful, and if the + pattern P was part of the successful match, then at the end of the + matching operation the assignment to S of the string matching P is + performed. + + The cursor assignment operation: + + Setcur(N) + + assigns the current cursor position to the natural variable N. The + cursor position is defined as the count of characters that have been + matched so far (including any start point moves). + + Finally the operations * and ** may be used with values of type + Text_IO.File_Access. The effect is to do a Put_Line operation of + the matched substring. These are particularly useful in debugging + pattern matches. + + Deferred Matching + ================= + + The pattern construction functions (such as Len and Any) all permit + the use of pointers to natural or string values, or functions that + return natural or string values. These forms cause the actual value + to be obtained at pattern matching time. This allows interesting + possibilities for constructing dynamic patterns as illustrated in + the examples section. + + In addition the (+S) operator may be used where S is a pointer to + string or function returning string, with a similar deferred effect. + + A special use of deferred matching is the construction of predicate + functions. The element (+P) where P is an access to a function that + returns a Boolean value, causes the function to be called at the + time the element is matched. If the function returns True, then the + null string is matched, if the function returns False, then failure + is signalled and previous alternatives are sought. + + Deferred Replacement + ==================== + + The simple model given for pattern replacement (where the matched + substring is replaced by the string given as the third argument to + Match) works fine in simple cases, but this approach does not work + in the case where the expression used as the replacement string is + dependent on values set by the match. + + For example, suppose we want to find an instance of a parenthesized + character, and replace the parentheses with square brackets. At first + glance it would seem that: + + Match (Subject, '(' & Len (1) * Char & ')', '[' & Char & ']'); + + would do the trick, but that does not work, because the third + argument to Match gets evaluated too early, before the call to + Match, and before the pattern match has had a chance to set Char. + + To solve this problem we provide the deferred replacement capability. + With this approach, which of course is only needed if the pattern + involved has side effects, is to do the match in two stages. The + call to Match sets a pattern result in a variable of the private + type Match_Result, and then a subsequent Replace operation uses + this Match_Result object to perform the required replacement. + + Using this approach, we can now write the above operation properly + in a manner that will work: + + M : Match_Result; + ... + Match (Subject, '(' & Len (1) * Char & ')', M); + Replace (M, '[' & Char & ']'); + + As with other Match cases, there is a function and procedure form + of this match call. A call to Replace after a failed match has no + effect. Note that Subject should not be modified between the calls. + + Examples of Pattern Matching + ============================ + + First a simple example of the use of pattern replacement to remove + a line number from the start of a string. We assume that the line + number has the form of a string of decimal digits followed by a + period, followed by one or more spaces. + + Digs : constant Pattern := Span("0123456789"); + + Lnum : constant Pattern := Pos(0) & Digs & '.' & Span(' '); + + Now to use this pattern we simply do a match with a replacement: + + Match (Line, Lnum, ""); + + which replaces the line number by the null string. Note that it is + also possible to use an Ada.Strings.Maps.Character_Set value as an + argument to Span and similar functions, and in particular all the + useful constants 'in Ada.Strings.Maps.Constants are available. This + means that we could define Digs as: + + Digs : constant Pattern := Span(Decimal_Digit_Set); + + The style we use here, of defining constant patterns and then using + them is typical. It is possible to build up patterns dynamically, + but it is usually more efficient to build them in pieces in advance + using constant declarations. Note in particular that although it is + possible to construct a pattern directly as an argument for the + Match routine, it is much more efficient to preconstruct the pattern + as we did in this example. + + Now let's look at the use of pattern assignment to break a + string into sections. Suppose that the input string has two + unsigned decimal integers, separated by spaces or a comma, + with spaces allowed anywhere. Then we can isolate the two + numbers with the following pattern: + + Num1, Num2 : aliased VString; + + B : constant Pattern := NSpan(' '); + + N : constant Pattern := Span("0123456789"); + + T : constant Pattern := + NSpan(' ') & N * Num1 & Span(" ,") & N * Num2; + + The match operation Match (" 124, 257 ", T) would assign the + string 124 to Num1 and the string 257 to Num2. + + Now let's see how more complex elements can be built from the + set of primitive elements. The following pattern matches strings + that have the syntax of Ada 95 based literals: + + Digs : constant Pattern := Span(Decimal_Digit_Set); + UDigs : constant Pattern := Digs & Arbno('_' & Digs); + + Edig : constant Pattern := Span(Hexadecimal_Digit_Set); + UEdig : constant Pattern := Edig & Arbno('_' & Edig); + + Bnum : constant Pattern := Udigs & '#' & UEdig & '#'; + + A match against Bnum will now match the desired strings, e.g. + it will match 16#123_abc#, but not a#b#. However, this pattern + is not quite complete, since it does not allow colons to replace + the pound signs. The following is more complete: + + Bchar : constant Pattern := Any("#:"); + Bnum : constant Pattern := Udigs & Bchar & UEdig & Bchar; + + but that is still not quite right, since it allows # and : to be + mixed, and they are supposed to be used consistently. We solve + this by using a deferred match. + + Temp : aliased VString; + + Bnum : constant Pattern := + Udigs & Bchar * Temp & UEdig & (+Temp) + + Here the first instance of the base character is stored in Temp, and + then later in the pattern we rematch the value that was assigned. + + For an example of a recursive pattern, let's define a pattern + that is like the built in Bal, but the string matched is balanced + with respect to square brackets or curly brackets. + + The language for such strings might be defined in extended BNF as + + ELEMENT ::= <any character other than [] or {}> + | '[' BALANCED_STRING ']' + | '{' BALANCED_STRING '}' + + BALANCED_STRING ::= ELEMENT {ELEMENT} + + Here we use {} to indicate zero or more occurrences of a term, as + is common practice in extended BNF. Now we can translate the above + BNF into recursive patterns as follows: + + Element, Balanced_String : aliased Pattern; + . + . + . + Element := NotAny ("[]{}") + | + ('[' & (+Balanced_String) & ']') + | + ('{' & (+Balanced_String) & '}'); + + Balanced_String := Element & Arbno (Element); + + Note the important use of + here to refer to a pattern not yet + defined. Note also that we use assignments precisely because we + cannot refer to as yet undeclared variables in initializations. + + Now that this pattern is constructed, we can use it as though it + were a new primitive pattern element, and for example, the match: + + Match ("xy[ab{cd}]", Balanced_String * Current_Output & Fail); + + will generate the output: + + x + xy + xy[ab{cd}] + y + y[ab{cd}] + [ab{cd}] + a + ab + ab{cd} + b + b{cd} + {cd} + c + cd + d + + Note that the function of the fail here is simply to force the + pattern Balanced_String to match all possible alternatives. Studying + the operation of this pattern in detail is highly instructive. + + Finally we give a rather elaborate example of the use of deferred + matching. The following declarations build up a pattern which will + find the longest string of decimal digits in the subject string. + + Max, Cur : VString; + Loc : Natural; + + function GtS return Boolean is + begin + return Length (Cur) > Length (Max); + end GtS; + + Digit : constant Character_Set := Decimal_Digit_Set; + + Digs : constant Pattern := Span(Digit); + + Find : constant Pattern := + "" * Max & Fence & -- initialize Max to null + BreakX (Digit) & -- scan looking for digits + ((Span(Digit) * Cur & -- assign next string to Cur + (+GtS) & -- check size(Cur) > Size(Max) + Setcur(Loc)) -- if so, save location + * Max) & -- and assign to Max + Fail; -- seek all alternatives + + As we see from the comments here, complex patterns like this take + on aspects of sequential programs. In fact they are sequential + programs with general backtracking. In this pattern, we first use + a pattern assignment that matches null and assigns it to Max, so + that it is initialized for the new match. Now BreakX scans to the + next digit. Arb would do here, but BreakX will be more efficient. + Once we have found a digit, we scan out the longest string of + digits with Span, and assign it to Cur. The deferred call to GtS + tests if the string we assigned to Cur is the longest so far. If + not, then failure is signalled, and we seek alternatives (this + means that BreakX will extend and look for the next digit string). + If the call to GtS succeeds then the matched string is assigned + as the largest string so far into Max and its location is saved + in Loc. Finally Fail forces the match to fail and seek alternatives, + so that the entire string is searched. + + If the pattern Find is matched against a string, the variable Max + at the end of the pattern will have the longest string of digits, + and Loc will be the starting character location of the string. For + example, Match("ab123cd4657ef23", Find) will assign "4657" to Max + and 11 to Loc (indicating that the string ends with the eleventh + character of the string). + + Correspondence with Pattern Matching in SPITBOL + =============================================== + + Generally the Ada syntax and names correspond closely to SPITBOL + syntax for pattern matching construction. + + The basic pattern construction operators are renamed as follows: + + Spitbol Ada + + (space) & + | or + $ * + . ** + + The Ada operators were chosen so that the relative precedences of + these operators corresponds to that of the Spitbol operators, but + as always, the use of parentheses is advisable to clarify. + + The pattern construction operators all have similar names. + + The actual pattern matching syntax is modified in Ada as follows: + + Spitbol Ada + + X Y Match (X, Y); + X Y = Z Match (X, Y, Z); + + and pattern failure is indicated by returning a Boolean result from + the Match function (True for success, False for failure). + +----------------------- +Type Declarations +----------------------- + +type Pattern is private; + Type representing a pattern. This package provides a complete set of + operations for constructing patterns that can be used in the pattern + matching operations provided. + +type Boolean_Func is access function return Boolean; + General Boolean function type. When this type is used as a formal + parameter type in this package, it indicates a deferred predicate + pattern. The function will be called when the pattern element is + matched and failure signalled if False is returned. + +type Natural_Func is access function return Natural; + General Natural function type. When this type is used as a formal + parameter type in this package, it indicates a deferred pattern. + The function will be called when the pattern element is matched + to obtain the currently referenced Natural value. + +type VString_Func is access function return VString; + General VString function type. When this type is used as a formal + parameter type in this package, it indicates a deferred pattern. + The function will be called when the pattern element is matched + to obtain the currently referenced string value. + +subtype PString is String; + This subtype is used in the remainder of the package to indicate a + formal parameter that is converted to its corresponding pattern, + i.e. a pattern that matches the characters of the string. + +subtype PChar is Character; + Similarly, this subtype is used in the remainder of the package to + indicate a formal parameter that is converted to its corresponding + pattern, i.e. a pattern that matches this one character. + +subtype VString_Var is VString; +subtype Pattern_Var is Pattern; + These synonyms are used as formal parameter types to a function where, + if the language allowed, we would use in out parameters, but we are + not allowed to have in out parameters for functions. Instead we pass + actuals which must be variables, and with a bit of trickery in the + body, manage to interprete them properly as though they were indeed + in out parameters. + +-------------------------------- +Basic Pattern Construction +-------------------------------- + +function "&" (L : Pattern; R : Pattern) return Pattern; +function "&" (L : PString; R : Pattern) return Pattern; +function "&" (L : Pattern; R : PString) return Pattern; +function "&" (L : PChar; R : Pattern) return Pattern; +function "&" (L : Pattern; R : PChar) return Pattern; + + Pattern concatenation. Matches L followed by R + +function "or" (L : Pattern; R : Pattern) return Pattern; +function "or" (L : PString; R : Pattern) return Pattern; +function "or" (L : Pattern; R : PString) return Pattern; +function "or" (L : PString; R : PString) return Pattern; +function "or" (L : PChar; R : Pattern) return Pattern; +function "or" (L : Pattern; R : PChar) return Pattern; +function "or" (L : PChar; R : PChar) return Pattern; +function "or" (L : PString; R : PChar) return Pattern; +function "or" (L : PChar; R : PString) return Pattern; + Pattern alternation. Creates a pattern that will first try to match + L and then on a subsequent failure, attempts to match R instead. + +---------------------------------- +Pattern Assignment Functions +---------------------------------- + +function "*" (P : Pattern; Var : VString_Var) return Pattern; +function "*" (P : PString; Var : VString_Var) return Pattern; +function "*" (P : PChar; Var : VString_Var) return Pattern; + Matches P, and if the match succeeds, assigns the matched substring + to the given VString variable S. This assignment happens as soon as + the substring is matched, and if the pattern P1 is matched more than + once during the course of the match, then the assignment will occur + more than once. + +function "**" (P : Pattern; Var : VString_Var) return Pattern; +function "**" (P : PString; Var : VString_Var) return Pattern; +function "**" (P : PChar; Var : VString_Var) return Pattern; + Like "*" above, except that the assignment happens at most once + after the entire match is completed successfully. If the match + fails, then no assignment takes place. + +---------------------------------- +Deferred Matching Operations +---------------------------------- + +function "+" (Str : VString_Var) return Pattern; + Here Str must be a VString variable. This function constructs a + pattern which at pattern matching time will access the current + value of this variable, and match against these characters. + +function "+" (Str : VString_Func) return Pattern; + Constructs a pattern which at pattern matching time calls the given + function, and then matches against the string or character value + that is returned by the call. + +function "+" (P : Pattern_Var) return Pattern; + Here P must be a Pattern variable. This function constructs a + pattern which at pattern matching time will access the current + value of this variable, and match against the pattern value. + +function "+" (P : Boolean_Func) return Pattern; + Constructs a predicate pattern function that at pattern matching time + calls the given function. If True is returned, then the pattern matches. + If False is returned, then failure is signalled. + +-------------------------------- +Pattern Building Functions +-------------------------------- + +function Arb return Pattern; + Constructs a pattern that will match any string. On the first attempt, + the pattern matches a null string, then on each successive failure, it + matches one more character, and only fails if matching the entire rest + of the string. + +function Arbno (P : Pattern) return Pattern; +function Arbno (P : PString) return Pattern; +function Arbno (P : PChar) return Pattern; + Pattern repetition. First matches null, then on a subsequent failure + attempts to match an additional instance of the given pattern. + Equivalent to (but more efficient than) P & ("" | (P & ("" | ... + +function Any (Str : String) return Pattern; +function Any (Str : VString) return Pattern; +function Any (Str : Character) return Pattern; +function Any (Str : Character_Set) return Pattern; +function Any (Str : access VString) return Pattern; +function Any (Str : VString_Func) return Pattern; + Constructs a pattern that matches a single character that is one of + the characters in the given argument. The pattern fails if the current + character is not in Str. + +function Bal return Pattern; + Constructs a pattern that will match any non-empty string that is + parentheses balanced with respect to the normal parentheses characters. + Attempts to extend the string if a subsequent failure occurs. + +function Break (Str : String) return Pattern; +function Break (Str : VString) return Pattern; +function Break (Str : Character) return Pattern; +function Break (Str : Character_Set) return Pattern; +function Break (Str : access VString) return Pattern; +function Break (Str : VString_Func) return Pattern; + Constructs a pattern that matches a (possibly null) string which + is immediately followed by a character in the given argument. This + character is not part of the matched string. The pattern fails if + the remaining characters to be matched do not include any of the + characters in Str. + +function BreakX (Str : String) return Pattern; +function BreakX (Str : VString) return Pattern; +function BreakX (Str : Character) return Pattern; +function BreakX (Str : Character_Set) return Pattern; +function BreakX (Str : access VString) return Pattern; +function BreakX (Str : VString_Func) return Pattern; + Like Break, but the pattern attempts to extend on a failure to find + the next occurrence of a character in Str, and only fails when the + last such instance causes a failure. + +function Cancel return Pattern; + Constructs a pattern that immediately aborts the entire match + +function Fail return Pattern; + Constructs a pattern that always fails + +function Fence return Pattern; + Constructs a pattern that matches null on the first attempt, and then + causes the entire match to be aborted if a subsequent failure occurs. + +function Fence (P : Pattern) return Pattern; + Constructs a pattern that first matches P. if P fails, then the + constructed pattern fails. If P succeeds, then the match proceeds, + but if subsequent failure occurs, alternatives in P are not sought. + The idea of Fence is that each time the pattern is matched, just + one attempt is made to match P, without trying alternatives. + +function Len (Count : Natural) return Pattern; +function Len (Count : access Natural) return Pattern; +function Len (Count : Natural_Func) return Pattern; + Constructs a pattern that matches exactly the given number of + characters. The pattern fails if fewer than this number of characters + remain to be matched in the string. + +function NotAny (Str : String) return Pattern; +function NotAny (Str : VString) return Pattern; +function NotAny (Str : Character) return Pattern; +function NotAny (Str : Character_Set) return Pattern; +function NotAny (Str : access VString) return Pattern; +function NotAny (Str : VString_Func) return Pattern; + Constructs a pattern that matches a single character that is not + one of the characters in the given argument. The pattern Fails if + the current character is in Str. + +function NSpan (Str : String) return Pattern; +function NSpan (Str : VString) return Pattern; +function NSpan (Str : Character) return Pattern; +function NSpan (Str : Character_Set) return Pattern; +function NSpan (Str : access VString) return Pattern; +function NSpan (Str : VString_Func) return Pattern; + Constructs a pattern that matches the longest possible string + consisting entirely of characters from the given argument. The + string may be empty, so this pattern always succeeds. + +function Pos (Count : Natural) return Pattern; +function Pos (Count : access Natural) return Pattern; +function Pos (Count : Natural_Func) return Pattern; + Constructs a pattern that matches the null string if exactly Count + characters have already been matched, and otherwise fails. + +function Rem return Pattern; + Constructs a pattern that always succeeds, matching the remaining + unmatched characters in the pattern. + +function Rpos (Count : Natural) return Pattern; +function Rpos (Count : access Natural) return Pattern; +function Rpos (Count : Natural_Func) return Pattern; + Constructs a pattern that matches the null string if exactly Count + characters remain to be matched in the string, and otherwise fails. + +function Rtab (Count : Natural) return Pattern; +function Rtab (Count : access Natural) return Pattern; +function Rtab (Count : Natural_Func) return Pattern; + Constructs a pattern that matches from the current location until + exactly Count characters remain to be matched in the string. The + pattern fails if fewer than Count characters remain to be matched. + +function Setcur (Var : access Natural) return Pattern; + Constructs a pattern that matches the null string, and assigns the + current cursor position in the string. This value is the number of + characters matched so far. So it is zero at the start of the match. + +function Span (Str : String) return Pattern; +function Span (Str : VString) return Pattern; +function Span (Str : Character) return Pattern; +function Span (Str : Character_Set) return Pattern; +function Span (Str : access VString) return Pattern; +function Span (Str : VString_Func) return Pattern; + Constructs a pattern that matches the longest possible string + consisting entirely of characters from the given argument. The + string cannot be empty , so the pattern fails if the current + character is not one of the characters in Str. + +function Succeed return Pattern; + Constructs a pattern that succeeds matching null, both on the first + attempt, and on any rematch attempt, i.e. it is equivalent to an + infinite alternation of null strings. + +function Tab (Count : Natural) return Pattern; +function Tab (Count : access Natural) return Pattern; +function Tab (Count : Natural_Func) return Pattern; + Constructs a pattern that from the current location until Count + characters have been matched. The pattern fails if more than Count + characters have already been matched. + +--------------------------------- +Pattern Matching Operations +--------------------------------- + + The Match function performs an actual pattern matching operation. + The versions with three parameters perform a match without modifying + the subject string and return a Boolean result indicating if the + match is successful or not. The Anchor parameter is set to True to + obtain an anchored match in which the pattern is required to match + the first character of the string. In an unanchored match, which is + + the default, successive attempts are made to match the given pattern + at each character of the subject string until a match succeeds, or + until all possibilities have failed. + + Note that pattern assignment functions in the pattern may generate + side effects, so these functions are not necessarily pure. + +Anchored_Mode : Boolean := False; + This global variable can be set True to cause all subsequent pattern + matches to operate in anchored mode. In anchored mode, no attempt is + made to move the anchor point, so that if the match succeeds it must + succeed starting at the first character. Note that the effect of + anchored mode may be achieved in individual pattern matches by using + Fence or Pos(0) at the start of the pattern. + +Pattern_Stack_Overflow : exception; + Exception raised if internal pattern matching stack overflows. This + is typically the result of runaway pattern recursion. If there is a + genuine case of stack overflow, then either the match must be broken + down into simpler steps, or the stack limit must be reset. + +Stack_Size : constant Positive := 2000; + Size used for internal pattern matching stack. Increase this size if + complex patterns cause Pattern_Stack_Overflow to be raised. + + Simple match functions. The subject is matched against the pattern. + Any immediate or deferred assignments or writes are executed, and + the returned value indicates whether or not the match succeeded. + +function Match + (Subject : VString; + Pat : Pattern) return Boolean; + +function Match + (Subject : VString; + Pat : PString) return Boolean; + +function Match + (Subject : String; + Pat : Pattern) return Boolean; + +function Match + (Subject : String; + Pat : PString) return Boolean; + + Replacement functions. The subject is matched against the pattern. + Any immediate or deferred assignments or writes are executed, and + the returned value indicates whether or not the match succeeded. + If the match succeeds, then the matched part of the subject string + is replaced by the given Replace string. + +function Match + (Subject : VString_Var; + Pat : Pattern; + Replace : VString) return Boolean; + +function Match + (Subject : VString_Var; + Pat : PString; + Replace : VString) return Boolean; + +function Match + (Subject : VString_Var; + Pat : Pattern; + Replace : String) return Boolean; + +function Match + (Subject : VString_Var; + Pat : PString; + Replace : String) return Boolean; + +Deferred Replacement + +type Match_Result is private; + Type used to record result of pattern match + +subtype Match_Result_Var is Match_Result; + This synonyms is used as a formal parameter type to a function where, + if the language allowed, we would use an in out parameter, but we are + not allowed to have in out parameters for functions. Instead we pass + actuals which must be variables, and with a bit of trickery in the + body, manage to interprete them properly as though they were indeed + in out parameters. + +function Match + (Subject : VString_Var; + Pat : Pattern; + Result : Match_Result_Var) return Boolean; + +procedure Match + (Subject : in out VString; + Pat : Pattern; + Result : out Match_Result); + +procedure Replace + (Result : in out Match_Result; + Replace : VString); + Given a previous call to Match which set Result, performs a pattern + replacement if the match was successful. Has no effect if the match + failed. This call should immediately follow the Match call. + +------------------------ +Debugging Routines +------------------------ + + Debugging pattern matching operations can often be quite complex, + since there is no obvious way to trace the progress of the match. + The declarations in this section provide some debugging assistance. + +Debug_Mode : Boolean := False; + This global variable can be set True to generate debugging on all + subsequent calls to Match. The debugging output is a full trace of + the actions of the pattern matcher, written to Standard_Output. The + level of this information is intended to be comprehensible at the + abstract level of this package declaration. However, note that the + use of this switch often generates large amounts of output. + +function "*" (P : Pattern; Fil : File_Access) return Pattern; +function "*" (P : PString; Fil : File_Access) return Pattern; +function "*" (P : PChar; Fil : File_Access) return Pattern; +function "**" (P : Pattern; Fil : File_Access) return Pattern; +function "**" (P : PString; Fil : File_Access) return Pattern; +function "**" (P : PChar; Fil : File_Access) return Pattern; + These are similar to the corresponding pattern assignment operations + except that instead of setting the value of a variable, the matched + substring is written to the appropriate file. This can be useful in + following the progress of a match without generating the full amount + of information obtained by setting Debug_Mode to True. + +Terminal : constant File_Access := Standard_Error; +Output : constant File_Access := Standard_Output; + Two handy synonyms for use with the above pattern write operations + + Finally we have some routines that are useful for determining what + patterns are in use, particularly if they are constructed dynamically. + +function Image (P : Pattern) return String; +function Image (P : Pattern) return VString; + This procedures yield strings that corresponds to the syntax needed + to create the given pattern using the functions in this package. The + form of this string is such that it could actually be compiled and + evaluated to yield the required pattern except for references to + variables and functions, which are output using one of the following + forms: +-- + access Natural NP(16#...#) + access Pattern PP(16#...#) + access VString VP(16#...#) +-- + Natural_Func NF(16#...#) + VString_Func VF(16#...#) +-- + where 16#...# is the hex representation of the integer address that + corresponds to the given access value + +procedure Dump (P : Pattern); + This procedure writes information about the pattern to Standard_Out. + The format of this information is keyed to the internal data structures + used to implement patterns. The information provided by Dump is thus + more precise than that yielded by Image, but is also a bit more obscure + (i.e. it cannot be interpreted solely in terms of this spec, you have + to know something about the data structures). + +procedure Finalize (Object : in out Pattern); + Finalization routine used to release storage allocated for a pattern + + + + diff --git a/doc/reference.xml b/doc/reference.xml new file mode 100644 index 0000000..0e4d5be --- /dev/null +++ b/doc/reference.xml @@ -0,0 +1,2005 @@ +<?xml version="1.0"?> + +<!-- + LSPIPAT - LUA SPIPAT WRAPPER + Copyright (C) 2010, Robin Haberkorn + License: LGPL + + DOCUMENTATION AND MODULE REFERENCE +--> + +<book xmlns="http://docbook.org/ns/docbook" + xmlns:xlink="http://www.w3.org/1999/xlink"> + <info> + <title>SNOBOL/SPITBOL Patterns for Lua + libspipat Lua wrapper + lspipat + + + Robin Haberkorn + robin.haberkorn at googlemail.com + + + 2010Robin Haberkorn + + + + + + + + The following document is the lspipat + Lua 5.1 module documentation and reference. + + + + + Thanks To... + + + lspipat would not be possible without: + + + + Phil Budne, for spipat. + lspipat is merely a spipat wrapper. + + Robert Dewar who has created Macro SPITBOL and + the GNAT.Spitbol package. + spipat was derived from GNAT.Spitbol, which is based on Macro SPITBOL. + + + + + + Introduction + + + lspipat is a wrapper to spipat + that brings support for a first-class SNOBOL/SPITBOL-like pattern data type. + Patterns can be constructed and subsequently combined with other patterns, + strings, numbers and functions using binary and unary operators allowing + the construction of grammars describing any Context Free Language. + Patterns can be matched against any Lua string. + A major difference to other pattern matching techniques like regular expressions, besides + the supported language class, is the possibility to construct patterns/grammars in a + readable and intuitive way, somewhat reminiscent of the BNF. + + They can include pattern elements that have side-effects (i.e. Lua code executed during + pattern matching) or produce and influence pattern elements dynamically. + For instance, functions can be specified that are executed during matching to produce + the parameters necessary for the interpretation of a pattern element. + Code can be embedded that generates entire patterns on the fly. + Matching previously matched substrings and implementing recursive patterns + is only one application of the powerful dynamic pattern elements traditionally + offered by SNOBOL pattern matching and thus by lspipat. + + SNOBOL/SPITBOL pattern matching was traditionally used in compiler construction + and prototyping, artificial intelligence research and the humanities. + + + + + Resources + + + These internet resources are more or less directly related to lspipat and + might be useful to you: + + + + http://luaforge.net/projects/lspipat/: + lspipat project page at LuaForge, downloads, bug tracker, etc. + + http://www.snobol4.org/spipat/: + libspipat downloads + + http://pypi.python.org/pypi/spipat/: + libspipat's Python wrapper (included in libspipat + packages). + + http://www.infeig.unige.ch/support/ada/gnatlb/g-spipat.html: + GNAT.Spitbol description. Also installed as pattern.txt by lspipat. + + ftp://ftp.cs.arizona.edu/snobol/gb.pdf: + The SNOBOL4 Programming Language (The famous Green Book) + + ftp://ftp.snobol4.com/spitman.pdf: + Macro SPITBOL Reference Manual + + other interesting resources compiled by Phil Budne... + + + + + + + Comparison with SNOBOL + + + Just as patterns in SNOBOL are combined and constructed dynamically with + binary and unary operators, lspipat also uses operators available in + Lua to construct patterns in a simple and intuitive way. + The operators and pattern-construction functions were chosen, so the pattern construction syntax + is as similar as possible to SNOBOL/SPITBOL. + The following table shows a comparision of operators between + SPITBOL and lspipat: + + + Comparision of SPITBOL and lspipat operators + + + + + + + + Operation + SPITBOL + lspipat + Notes + + + + Alternation + | + + + + Refer to . + Cannot be used to combine two strings. + + + Concatenation + (space) + * + + Immediate Assignment/Call + $ + % + + % and / have the + same precedence + as * in Lua. + Also only call versions are supported (see ). + + + Deferred Assignment/Call + . + / + + Cursor Assignment + @ (unary) + # (unary) + + Refer to . + lspipat only supports a call version + (see ). + + + + + Setcur + + Defer Expression + * (unary) + - (unary) or Pred + + Refer to . + In general, expressions can be wrapped in (anonymous) functions to defer them. + + + Interrogation/Predicate + ? (unary) + + + Pattern Match + ? + smatch + + Refer to . + S ? P is roughly equivalent to S:smatch(P) in Lua. + + + + (space) + + + Substring Replacement + = + ssub + + Refer to . + S P = R is roughly equivalent to S:ssub(P, R, 1) in Lua. + + + + +
+
+ + + Installation + + + lspipat uses an autotools buildsystem. The standard + INSTALL file contains instructions on how to use it from + a package builder's perspective. + Nevertheless, there are some quirks that should be mentioned. + + +
+ Dependencies + + + + spipat 0.9.3+: + You are advised to apply the patch spipat-patches/0.9.3+_image.patch first + before building spipat, even though it is not mandatory. + It fixes a header file (so lspipat can make use of customized + render-to-string functionality) and various bugs. + + + Lua 5.1: + You probably have this already. The configure script + should be able to cope with Ubuntu and + Lua Binaries + distributions. The standalone Lua compiler is only required if + compilation of Lua scripts is enabled. + + +
+ +
+ Configuration Options + + + The following special configure script options + are supported: + + + + --enable-lua-libdir=DIR + + Change the installation directory of lspipat. + It defaults to LIBDIR/lua/5.1. You probably want this to + point to some directory in Lua's + + module search path, so the default should be ok. + + + + --disable-lua-precompile + + Disable precompilation of Lua source files. + Naturally, a Lua compiler will not be required when this option + is used. + + + + --disable-lua-strip + + Do not strip (i.e. remove debugging symbols from) compiled + Lua sources. + + + + --disable-html-doc + + Do not generate HTML documentation. The documentation is usually + derived from Docbook using + XSLTProc. + Disabling this may be useful if you have got some problem + with the tool chain but are satisfied with the precompiled + documentation in the distribution. + + + + + Furthermore, you should note that render-to-string results are not + reminiscent of lspipat syntax (used in this document) by default. + For lspipat to be able to customize these renderings, + configure has to find some spipat headers which + are not normally installed. + Therefore it is highly recommended to add spipat's source directory to the C include search path + using the CPPFLAGS variable before running configure. + +
+ + + Thus, supposing that spipat sources are located in your home directory, + the most common way to install lspipat would be: + + + + +
+ + + Usage + + + After lspipat has been installed properly, you will + be able use it in your Lua program by simply requiring lspipat + (i.e. require "lspipat"). + + The module table will be called spipat, but many functions + (especially pattern constructors) will be registered as globals as well. + Also, some operators will be overloaded. + For details on all that (operators, globals, etc.) refer to + . + + + + + Examples + + + The samples directory in the lspipat source package + contains some small examples that I hope give you some inspiration on how and where to use + lspipat. + + + + samples/exp2bf.lua + + exp2bf.lua expression + + Compiles simple arithmetic expressions to Brainfuck programs that when + executed evaluate the expression and print the result + (8-bit unsigned integer arithmetics). + Prints these programs to stdout. + + Use that for whatever you can imagine ;-) + + + + samples/wave.lua + + wave.lua wavefile + + Validates/parses WAV files + and prints some information about it. + + This is an example of how to use lspipat + to do pattern matching on "binary" data (formats, protocols). Some + primitives were implemented in Lua for that reason - in the future + there might be a separate C-module to do the encoding/decoding of + integers in different byte-orders more efficiently. + + + + samples/regexp.lua + + Small regular expression example/test - uses a comprehensive regular + expression describing IPs. + + + + + + + Variable Deferring Techniques + + + In SNOBOL, arbitrary expressions could be deferred + (i.e. their evaluation could be deferred) by using the unary asterisk operator. + With lspipat however, you will have to pass functions + (which can be constructed anonymously) to the appropriate constructors to achieve + the same goal. + + Deferring expressions which should be combined with other patterns is one + application of the Pred constructor + and - operator respectively. + + Deferring variables is just a special case of deferring expressions. + In this chapter, different ways of optimizing variable deferrings will be + explained using a simple example. + + For instance if you would like to assign a + matched quotation character to a local variable and use that to subsequently match + a simple quote/string, you could use function closures to write something like that: + + + Function Closures for Deferring Purposes + + local cquote +string = Any("\"'") / function(c) cquote = c end + * Break(function() return cquote end) + * -function() return cquote end + + + You may find this solution a bit verbose, compared with + SNOBOL's elegant syntax. + To save some typing you could define your own constructors + that take the name of a global variable (as a string) + and construct patterns whose arguments are retrieved by + a function closure accessing the globals table. + + + Custom Constructors for Deferring Purposes + + function _Break(name) + return Break(function() return _G[name] end) +end +function _Pred(name) + return -function() return _G[name] end +end + +string = Any("\"'") / function(c) cquote = c end + * _Break "cquote" + * _Pred "cquote" + + + Of course, if you do not want to pollute the global namespace + your custom functions could just as well access a local table. + Furthermore, you could optimize the code by defining one generic + table access function which is suitable to be used for + lspipat's pattern constructors - + being able to pass so called cookies + to functions comes in handy. + + + Generic Retrievers for Deferring Purposes + + function getGlobal(name) return _G[name] end +function _Break(name) return Break(getGlobal, name) end +function _Pred(name) return Pred(getGlobal, name) end +-- ... + + + Fortunately, lspipat already defines + such constructors (deferring global variables) for you. + Whereever possible, there will be versions of constructors + with leading underscores that work similar to the ones in + the example above. + You can of course overwrite these constructors, e.g. with + versions accessing a special local table. + + +
+ Recursive Patterns + + + Recursive patterns can be implemented just as described above. + Supposing you want to match the repetition of the predefined pattern + P (greedy) you could write + something like that: + + + Recursive Patterns + + + + + Sometimes however when using global variables is inappropriate, + you might want to do the following trick: + + + Recursive Pattern Trick + + + + + It works because foo is still a function in the scope + of the assignment's right side, but a pattern afterwards so the + function - to which no (direct) reference exists anymore - will return + the pattern foo after the assignment. + +
+
+ + + Module Reference + + + A compilation of all functions in the lspipat + module, global functions registered by the module, methods + and overloaded operators follows. + + + + smatch + + + smatch + Perform pattern match on a subject string + + + + + spipat.smatch + ( subject + , pattern + , flags ) + + subject:smatch + ( pattern + , flags ) + + + + + Description + + + Tries to match pattern against subject + using the given flags. + + + Parameters + + + subject (string): A string against which the pattern match will be performed + pattern (userdata): The pattern used for matching + + flags (number or nil): + Optional spipat flags. + + + + + Spipat Flags + + + Flags are added (e.g. spipat.match_anchored + spipat.match_debug), + due to the lack of a logical/binary or operator in Lua. + + + + spipat.match_anchored: Match in anchored mode + + spipat.match_debug: + Match with progress being printed to stdout. + Useful for pattern debugging as the name suggests. + + + + + Return Values + + + In case of an exception during matching, raises an error. + In case no substring matches, returns a single nil value. + Otherwise returns + + + number: Start of matched substring + number: End of matched substring + + + + + + ssub + + + ssub + Substitute substrings matching a pattern in a subject + + + + + spipat.ssub + ( subject + , pattern + , replacement + , n, flags ) + + subject:ssub + ( pattern + , replacement + , n, flags ) + + + + + Description + + + Substitutes regions in subject matching pattern either with a string + if replacement is a string or if replacement is a function, the result + of calling that function. This may be useful for deferring the evaluation of replacement strings + which depend on (are built from) results of the matching process (e.g. call-on-match or call-immediately function executions). + + + Parameters + + + subject (string): The subject for the first pattern match + pattern (userdata): The pattern used for matching + + replacement (string or function): + Replacement string or a function that's executed after matching to produce the replacement string + + n (number or nil): + Optional maximal number of match/replacement operations. The first match + is performed on subject, subsequent matches on the result of the preceding + replacements. Naturally replacement stops when the pattern does not match anymore. + If n is absent or nil, replacement only stops when pattern + does not match anymore. + + flags (number or nil): + Optional spipat flags, as in . + + + + Return Values + + + In case of an exception during matching, raises an error. + Otherwise returns + + + + string: The result of the last replacement performed or the original + subject if no substring matched at all + + number: The number of match/replacement operations actually performed + + + + Example + + + Replacements with spipat.ssub + + > print(spipat.ssub("abc ccC bab", Span("abc") / function(s) str = s end, function() return "["..str:upper().."]" end, 2)) +[ABC] [CC]C BaB +> + + + + + + siter + + + siter + Return iterator of substrings matching a pattern in a subject + + + + + spipat.siter + ( subject + , pattern + , flags ) + + subject:siter + ( pattern + , flags ) + + + + + Description + + + Returns an iterator function performing a pattern match on subject + and returning the matched substring (start/end positions in subject). + Each time it is called, it begins matching where the last substring ended, but using the same + subject. + + + Parameters + + + subject (string): The subject used for pattern matching + + pattern (userdata): The pattern used for matching. + Naturally, anchoring the pattern using any of the possible methods is nonsense. + + flags (number or nil): + Optional spipat flags, as in . + + + + Return Values + + + In case of an exception during matching, raises an error. + Otherwise returns + + + function: The iterator function. Calling it returns + + number: Start of matched substring + number: End of matched substring + + + + Example + + + Iterating through substrings with spipat.siter + + > str = "abc" +> for s, e in str:siter(Len(1)) do print(str:sub(s, e)) end +a +b +c +> + + + + + + free + + + free + Finalize pattern + + + + + spipat.free( pattern ) + + pattern:free() + + + + + Description + + + Finalizes pattern, i.e. frees memory associated with it and unreferences any + other Lua values (other patterns, functions, etc.) so they can get garbage collected. + + Finalizing an already finalized pattern does nothing. + Using a finalized pattern in any function or operator working with a pattern + will raise an error. + + + free does early what would otherwise be done when the pattern is garbage + collected, so in most cases you will not need it at all. + It may be useful when you would like to free a large pattern you do not need anymore but + removing all references to that pattern and enforcing a full garbage collection cycle + is not feasible. + + + Parameters + + + pattern (userdata): The pattern to be finalized + + + Return Values + + + Returns nothing. + + + Example + + + Finalizing a pattern + + > p = Arb() +> p:free() +> print(p * "foo") +stdin:1: Pattern already freed +> + + + + + + Conversion + + + topattern + Convert a value to a pattern + + + tostring + Render a pattern as a string + + + + + spipat.topattern( value ) + + topattern( value ) + + value:topattern() + + + + tostring( pattern ) + + + + + Description + + + topattern creates a pattern for a string or number, matching that string or number. + If value is already a pattern it returns that pattern without modification. + In case of an unsupported value type or miscelleaneous error, topattern always + returns nil. + + + topattern is useful to explicitly create pattern, e.g. when an operator requires + at least one operand to be a pattern but both are strings, numbers or functions. + + + Lua's built-in tostring + function called on a pattern renders that pattern as a string reminiscent of + lspipat's pattern construction syntax. + + + + + Example + + + Explicit pattern construction & implicit conversion to strings + + print("2" + 3) +5 +> print(topattern("2") + 3) +("2" + "3") +>]]> + + + + + + dump + + + dump + Dump a pattern to stdout + + + + + spipat.dump( pattern ) + + + + + Description + + + dump prints information about a pattern to + stdout. + The kind of information displayed is similar to + tostring's rendering. + + It is useful for debugging purposes. + + + Parameters + + + pattern (userdata): The pattern to be dumped + + + Return Values + + + Returns nothing. + + + + + + Concatenation and Alternation + + + * + Concatenate patterns + + + + + Alternate patterns + + + + + pattern* + value + + value* + pattern + + pattern* + pattern + + + + pattern+ + value + + value+ + pattern + + pattern+ + pattern + + + + + Description + + + The * operator constructs a concatenation of two values + if at least one of them is a pattern and returns the result as a pattern. + A concatenation matches the left operand immediately followed by the right operand. + + The + operator constructs an alternation between two values + if at least one of them is a pattern and returns the result as a pattern. + An alternation matches the left operand and if unsuccessful the right operand. + + The non-pattern values may be strings or numbers, which are matched + just like a pattern built by + topattern. + + + Even though the patterns participating in the composition will be copied, + references will be kept, so they will not be garbage collected until all patterns + using them are garbage collected. + + + Return Values + + + pattern (userdata): Result of the pattern composition + + + Example + + + Concatenations and Alternations + + > pat = (topattern("ABC") + "AB") * (topattern("DEF") + "CDE") * (topattern("GH") + "IJ") +> assert(spipat.smatch("ABCCDEGH", pat)) +> assert(spipat.smatch("ABCDEFIJ", pat)) +> + + + + + + Assignment Calls + + + % + Call Immediately + + / + Deferred Call + + + + + pattern% + function + + + + pattern/ + function + + + + + Description + + + The % operator constructs a pattern matching operand pattern and + calling a Lua function whenever pattern matches during a pattern + match (i.e. function may be called more than once while matching regardless of whether + the match fails or succeeds). + + On the other hand, the / operator constructs a pattern matching operand + pattern and calling a Lua function at most once - only if + the match succeeds. + + In both cases, function receives the following arguments when called: + + string: The substring matched by pattern + + Its return value is ignored. + + + Unlike assignment operators in SNOBOL, the % and / + operators in Lua have the same precedence + as the concatenation operator *, + so using parentheses is advised. + + + Deferred assignments (assign on match & assign immediately) are not directly possible but can be + easily implemented using function closures as described in . + + + + Even though the pattern operands will be copied, references will be kept, + so they will not be garbage collected until all patterns + using them are garbage collected. + + Furthermore, references to functions will be kept so they will not be + garbage collected until the patterns constructed by the operators are garbage collected. + + + + Return Values + + + pattern (userdata): Pattern built by the operators + + + Example + + See . + + + + + Cursor Assignment Calls + + + Setcur + Cursor Assignment + + + + + spipat.Setcur + ( function, cookie ) + + Setcur + ( function, cookie ) + + #function + + + + spipat._Setcur( string ) + + _Setcur( string ) + + + + + Description + + + Setcur is a pattern constructor returning a pattern matching the null string "" + (i.e. always succeeds when matched) and immediately calling a Lua function when matched. + This function receives the following arguments when called: + + + number: The cursor in the subject string. + In other words, the number of characters matched so far from the beginning of the subject string. + + cookie: Any Lua value specified as a cookie in the pattern constructor or + nil if no cookie was specified. + + + Its return value is ignored. + + + The unary # operator is equivalent to the Setcur constructor with no + cookie specified. + + + _Setcur is similar to Setcur but actually assigns the cursor position to + the global variable whose name is specified by a string value. + This means that _Setcur(str) does not assign the cursor position to the global variable str + but rather to the variable with the name str contains, e.g. foo if str == "foo". + So generally _Setcur is equivalent to: + + + + In a similar manner, other kinds of deferred assignments can be implemented + using function closures as described in . + + + References to function and cookie will be kept so they will not be + garbage collected until the pattern constructed by Setcur is garbage collected. + + + Return Values + + + pattern (userdata): Pattern built by the constructor + + + + + + + Predicates + + + Pred + Predicate Constructor + + + + + spipat.Pred + ( function, cookie ) + + Pred + ( function, cookie ) + + -function + + + + spipat._Pred( string ) + + _Pred( string ) + + -string + + + + + Description + + + Pred constructs a pattern which allows you to transparently define its matching behaviour + using a function called when this pattern is attempted to be matched. + It receives the following arguments when invoked: + + cookie: Any Lua value specified as a cookie in the pattern constructor or + nil if no cookie was specified. + + + The function's return value defines the behaviour dynamically, as shown in the following table: + + + Dynamic Function Return Values + + + + + + + + + Value + Type + Behaviour + + + + nil + nil + + Match the "" string, i.e. succeed. + + + true + boolean + + false + + + Pattern match fails, like when using the + Fail primitive. + + + any number + + Try to match that number as a string, as if + converted to a pattern. + + + any string + + Try to match that string, as if + converted to a pattern. + + + any pattern + + Try to match that pattern. Returning a pattern assigned to a variable is the way + to implement recursive patterns. + + + + +
+
+ + The unary - operator applied to a function is equivalent + to the Pred constructor with no cookie specified. + + + _Pred is similar to Pred but actually gets the Lua value defining its behaviour from + the global variable whose name is specified by a string value. + This means that _Pred(str) does not get the value from the global variable str + but rather from the variable with the name str contains, e.g. foo if str == "foo". + So generally _Pred is equivalent to: + + + + In a similar manner, other kinds of variable deferring as well as recursive patterns can be implemented + using function closures as described in . + + + The unary - operator applied to a string which is not convertable to + a number is equivalent to the _Pred constructor - naturally this + should be true for all global variable names. + This constraint comes from the way Lua handles operations by default (it checks whether it is an arithmetic operation + before evaluating any metamethod - see metatables). + + + References to function and cookie will be kept so they will not be + garbage collected until the pattern constructed by Pred is garbage collected. + +
+ Return Values + + + pattern (userdata): Pattern built by the constructor + + + +
+ + + String Primitives + + + Any + Match any character in a set + + + NotAny + Match any character not in a set + + + Break + Match characters up to a break character + + + BreakX + Match characters up to a break character (extending) + + + NSpan + Match nothing or characters from a set + + + Span + Match characters from a set + + + + + spipat.Any( set ) + + spipat.Any + ( function, cookie ) + + spipat._Any( string ) + + + + spipat.NotAny( set ) + + spipat.NotAny + ( function, cookie ) + + spipat._NotAny( string ) + + + + spipat.Break( set ) + + spipat.Break + ( function, cookie ) + + spipat._Break( string ) + + + + spipat.BreakX( set ) + + spipat.BreakX + ( function, cookie ) + + spipat._BreakX( string ) + + + + spipat.NSpan( set ) + + spipat.NSpan + ( function, cookie ) + + spipat._NSpan( string ) + + + + spipat.Span( set ) + + spipat.Span + ( function, cookie ) + + spipat._Span( string ) + + + + + Description + + + String primitives are pattern constructors that in their first form all take a string or + number (which is converted to a string) as their sole argument + (set). + + In their second form they take a Lua function and an optional cookie + as arguments. When the constructed pattern is about to be matched, the function is called + and is supposed to return a string or number (which is converted to + a string) to supply the primitive's argument dynamically. + It receives the following arguments when invoked: + + cookie: Any Lua value specified as a cookie in the pattern constructor or + nil if no cookie was specified. + + + The primitives with a leading underscore (e.g. _Any) are similar but actually get their argument + from a global variable with the name a string argument contains. + This means that for instance _Any(str) does not get its character set from the global variable str + but rather from the variable with the name str contains, e.g. foo if str == "foo". + So generally _Any is equivalent to: + + + + In a similar manner, other kinds of variable deferring can be implemented + using function closures as described in . + + + References to function and cookie will be kept so they will not be + garbage collected until the pattern constructed is garbage collected. + + + The following table describes what these primitives do: + + + + String Primitives + + + + + + + Primitive + Description + + + + Any( S ) + + Where S is a string, matches a single character that is + any one of the characters in S. Fails if the current + character is not one of the given set of characters. + + + NotAny( S ) + + Where S is a string, matches a single character that is + not one of the characters of S. Fails if the current + characer is one of the given set of characters. + + + Break( S ) + + Where S is a string, matches a string of zero or more + characters up to but not including a break character + that is one of the characters given in the string S. + Can match the null string, but cannot match the last + character in the string, since a break character is + required to be present. + + + BreakX( S ) + + Where S is a string, behaves exactly like Break(S) when + it first matches, but if a string is successfully matched, + then a susequent failure causes an attempt to extend the + matched string. + + + NSpan( S ) + + Where S is a string, matches a string of zero or more + characters that is among the characters given in the + string. Always matches the longest possible such string. + Always succeeds, since it can match the null string. + + + Span( S ) + + Where S is a string, matches a string of one or more + characters that is among the characters given in the + string. Always matches the longest possible such string. + Fails if the current character is not one of the given + set of characters. + + + + +
+
+
+ Return Values + + + pattern (userdata): Pattern built by the constructor + + + +
+ + + Arbno + + + Arbno + Matches a pattern any number of times + + + + + spipat.Arbno( P ) + + Arbno( P ) + + + + + Description + + + Where P is any pattern, matches any number of instances + of the pattern, starting with zero occurrences. It is + thus equivalent to ("" + (P * ("" + (P * ("" ....)))). + The pattern P may contain any number of pattern elements + including the use of alternation and concatenation. + + Arbno is a pattern constructor taking exactly one argument which is + either a pattern or string (which is treated + like it is converted to a pattern first). + + + A reference to P will be kept if it is a pattern + so it will not be garbage collected until the pattern constructed is garbage collected. + + + Return Values + + + pattern (userdata): Pattern built by Arbno + + + + + + + Fence + + + Fence + Abort match when alternations are sought + + + + + spipat.Fence( P ) + + Fence( P ) + + + + + Description + + + Fence is a pattern constructor taking no or exactly one + pattern as an argument. + + + A reference to pattern P will be kept so it will not + be garbage collected until the pattern constructed is garbage collected. + + + The following table describes what the two versions do: + + + + Fence Primitive + + + + + + + Primitive + Description + + + + Fence() + + Matches the null string at first, and then if a failure + causes alternatives to be sought, aborts the match (like + a Cancel). Note that using Fence at the + start of a pattern has the same effect as matching in anchored mode. + + + Fence( P ) + + Where P is a pattern, attempts to match the pattern P + including trying all possible alternatives of P. If none + of these alternatives succeeds, then the Fence pattern + fails. If one alternative succeeds, then the pattern + match proceeds, but on a subsequent failure, no attempt + is made to search for alternative matches of P. The + pattern P may contain any number of pattern elements + including the use of alternatiion and concatenation. + + + + +
+
+
+ Return Values + + + pattern (userdata): Pattern built by Fence + + + +
+ + + Integer Primitives + + + Len + Match a number of characters + + + Pos + Match null string if number of characters have been matched + + + RPos + Match null string if number of characters remain to be matched + + + Tab + Match characters until number of characters have been matched + + + RTab + Match characters until number of characters remain to be matched + + + + + spipat.Len( n ) + + spipat.Len + ( function, cookie ) + + spipat._Len( string ) + + + + spipat.Pos( n ) + + spipat.Pos + ( function, cookie ) + + spipat._Pos( string ) + + + + spipat.RPos( n ) + + spipat.RPos + ( function, cookie ) + + spipat._RPos( string ) + + + + spipat.Tab( n ) + + spipat.Tab + ( function, cookie ) + + spipat._Tab( string ) + + + + spipat.RTab( n ) + + spipat.RTab + ( function, cookie ) + + spipat._RTab( string ) + + + + + Description + + + Integer primitives are pattern constructors that in their first form all take a number or + string (which is converted to a number) as their sole argument + (n). + This number has to be an unsigned integer - sometimes a natural number depending on the + primitive. + + + If the argument is ommitted, zero is assumed. + + + In their second form the primitives take a Lua function and an optional cookie + as arguments. When the constructed pattern is about to be matched, the function is called + and is supposed to return a number or string (which is converted to + a number) to supply the primitive's argument dynamically. + It receives the following arguments when invoked: + + cookie: Any Lua value specified as a cookie in the pattern constructor or + nil if no cookie was specified. + + + The primitives with a leading underscore (e.g. _Len) are similar but actually get their argument + from a global variable with the name a string argument contains. + This means that for instance _Len(str) does not get its argument from the global variable str + but rather from the variable with the name str contains, e.g. foo if str == "foo". + So generally _Len is equivalent to: + + + + In a similar manner, other kinds of variable deferring can be implemented + using function closures as described in . + + + References to function and cookie will be kept so they will not be + garbage collected until the pattern constructed is garbage collected. + + + The following table describes what these primitives do: + + + + Integer Primitives + + + + + + + Primitive + Description + + + + Len( N ) + + Where N is a natural number, matches the given number of + characters. For example, Len(10) matches any string that + is exactly ten characters long. + + + Pos( N ) + + Where N is a natural number, matches the null string + if exactly N characters have been matched so far, and + otherwise fails. + + + RPos( N ) + + Where N is a natural number, matches the null string + if exactly N characters remain to be matched, and + otherwise fails. + + + Tab( N ) + + Where N is a natural number, matches characters from + the current position until exactly N characters have + been matched in all. Fails if more than N characters + have already been matched. + + + RTab( N ) + + Where N is a natural number, matches characters from + the current position until exactly N characters remain + to be matched in the string. Fails if fewer than N + unmatched characters remain in the string. + + + + +
+
+
+ Return Values + + + pattern (userdata): Pattern built by the constructor + + + +
+ + + Miscelleanous Primitives + + + Arb + Matches any string + + + Bal + Matches parentheses balanced strings + + + Abort + Immediately abort pattern match + + + Fail + Null alternation + + + Rem + Match the entire remaining subject string + + + Succeed + Match the null string in every alternative + + + + + spipat.Arb() + + Arb() + + spipat.Bal() + + Bal() + + spipat.Abort() + + Abort() + + spipat.Fail() + + Fail() + + spipat.Rem() + + Rem() + + spipat.Succeed() + + Succeed() + + + + + Description + + + These are simple pattern constructor + functions. + + The following table describes what these primitives do: + + + + Miscelleanous Primitives + + + + + + + Primitive + Description + + + + Arb() + + Matches any string. First it matches the null string, and + then on a subsequent failure, matches one character, and + then two characters, and so on. It only fails if the + entire remaining string is matched. + + + Bal() + + Matches a non-empty string that is parentheses balanced + with respect to ordinary () characters. + Examples of balanced strings are "ABC", + "A((B)C)", and "A(B)C(D)E". + Bal matches the shortest possible balanced + string on the first attempt, and if there is a subsequent failure, + attempts to extend the string. + + + Abort() + + Immediately aborts the entire pattern match, signalling + failure. This is a specialized pattern element, which is + useful in conjunction with some of the special pattern + elements that have side effects. + + + Fail() + + The null alternation. Matches no possible strings, so it + always signals failure. This is a specialized pattern + element, which is useful in conjunction with some of the + special pattern elements that have side effects. + + + Rem() + + Matches from the current point to the last character in + the string. This is a specialized pattern element, which + is useful in conjunction with some of the special pattern + elements that have side effects. + + + Succeed() + + Repeatedly matches the null string (it is equivalent to + the alternation ("" + "" + "" ....). This is a special + pattern element, which is useful in conjunction with some + of the special pattern elements that have side effects. + + + + +
+
+
+ Return Values + + + pattern (userdata): Pattern built by the constructor + + + +
+ + + POSIX Extended Regular Expressions + + + RegExp + Matches a pattern equivalent to a regular expression + + + + + spipat.RegExp + ( expression, captures ) + + RegExp + ( expression, captures ) + + + + + Description + + + RegExp constructs from a + + POSIX Extended Regular Expression, a pattern that is equivalent to that regular + expression and can be combined with other patterns freely. + + It can optionally construct the pattern to save the captures + from a regular expression match in a Lua table. + + + Even though this implementation should support almost all elements of EREs, + it is considered experimental. + You are advised to use the usual pattern construction primitives. + + + Parameters + + + + expression (string): The POSIX ERE which is compiled + to a pattern. + + captures (table): Optional table, or more precisely + array, to hold subexpression captures. + Naturally, it has to exist when RegExp is called. + When a subexpression is captured (i.e. the pattern equivalent to what is + enclosed in parentheses), the matching string is added to the + end of the table. + Thus taken that captures is initially empty, if + RegExp("(a(b))", captures) matches, captures + will be {"b", "ab"}. + + + + Return Values + + + pattern (userdata): Pattern built by RegExp + + + Example + + + Regular Expressions + + print(RegExp "^[[:digit:]]*?(abc\\.|de?)") +Pos(0) * Arbno(Any()) * ("abc." + "d" * ("" + "e")) +>]]> + + + +
+ diff --git a/samples/exp2bf.lua b/samples/exp2bf.lua new file mode 100755 index 0000000..dac59d4 --- /dev/null +++ b/samples/exp2bf.lua @@ -0,0 +1,48 @@ +#!/usr/bin/lua + +require "lspipat" + +function EXIT(...) + io.stderr:write(string.format(...)) + os.exit() +end + +stack = {} +function push(val) table.insert(stack, val) end +function binop() + table.insert(stack, { + l = table.remove(stack), + type = table.remove(stack), + r = table.remove(stack) + }) +end + +function compile(node) + if type(node) ~= "table" then return string.rep("+", tonumber(node)) end + + local ret = compile(node.l)..">"..compile(node.r) + node.type:smatch( Any("+-") % function(o) ret = ret.."[<"..o..">-]<" end + + "*" * -function() ret = ">>"..ret.."[<[<+<+>>-]<[>+<-]>>-]<[-]<<" end + + "/" * -function() ret = ">"..ret.."<[->->+>>+<<<[>>+>[-]<<<-]>>".. + "[<<+>>-]>[-<<[<+>-]<<<+>>>>>]<<<<]>[-]>[-]<<<" end, + spipat.match_anchored ) + + return ret +end + +if #arg ~= 1 then EXIT("Invalid number of parameters\n") end + +space = NSpan(" ") +pre = space * ("(" * -"exp" * space * ")" + Span("0123456789") % push) +post = space * ( Any("+-") % push * -"exp" * -binop + + Any("*/") % push * pre * -binop * -"post" ) + "" +exp = pre * post + +if not arg[1]:smatch(exp * RPos(0), spipat.match_anchored) then EXIT("Invalid expression!\n") end + +src = compile(stack[1]).. + "[>++++++++++<[->->+>>+<<<[>>+>[-]<<<-]>>[<<+>>-]".. + ">[-<<[<+>-]>>>+<]<<<<]>>>>>[<<<<<+>>>>>-]>[>]"..string.rep("+", string.byte("0")).. + "[<]<<<[>>>>[>]<+[<]<<<-]<[-]<]>>>>>>[>]<[.<]" + +print(src) diff --git a/samples/regexp.lua b/samples/regexp.lua new file mode 100644 index 0000000..b9b1da2 --- /dev/null +++ b/samples/regexp.lua @@ -0,0 +1,26 @@ +-- Parse IP address using regular expression compiler + +require "lspipat" + + +exp = [=[^([[:digit:]]{1,3})(\.([[:digit:]]{1,3})){3,3}$]=] + +ip1 = RegExp(exp) +print(ip1) + +local captures = {} +ip2 = RegExp(exp, captures) +print(ip2) + +print(spipat.smatch("192.168.0.1", ip1)) +print(spipat.smatch("192.168.000.001", ip1)) +print(spipat.smatch("192.168.0.XXX", ip1)) + +print(spipat.smatch("192.168.0.1", ip2)) + +-- remove captures due to grouping around "." +table.remove(captures, 3) +table.remove(captures, 5) +table.remove(captures, 7) + +print(table.concat(captures, ".")) diff --git a/samples/wave.lua b/samples/wave.lua new file mode 100755 index 0000000..9fd5adb --- /dev/null +++ b/samples/wave.lua @@ -0,0 +1,81 @@ +#!/usr/bin/lua + +require "lspipat" + +function uint(bytes, val) -- binary integer decoding + return Len(bytes) % function(bin) + bin = littleEndian and bin or bin:reverse() + + local n = 0 + local base = 1 + + for _, c in ipairs{bin:byte(1, bytes)} do + n = n + base * c + base = base * 256 + end + + val(n) + end +end + +function _uint(bytes, name) return uint(bytes, function(n) _G[name] = n end) end +function _uint16(name) return _uint(2, name) end +function _uint32(name) return _uint(4, name) end + +hnd = assert(io.open(arg[1])) + +file = hnd:read("*a") + +hnd:close() + +-- WAVE file "grammar" + +format = "fmt " + * _uint32 "FmtChunkSize" + * _Setcur "FmtStartPos" + * _uint16 "AudioFormat" + * _uint16 "NumChannels" + * _uint32 "SampleRate" + * _uint32 "ByteRate" + * _uint16 "BlockAlign" + * _uint16 "BitsPerSample" + * ( -function() return AudioFormat == 1 end + + _uint16 "ExtraParamSize" + * _Len "ExtraParamSize" ) + * Pos(function() return FmtStartPos + FmtChunkSize end) + * -function() return BitsPerSample % 8 == 0 and + BlockAlign == NumChannels * BitsPerSample/8 and + ByteRate == SampleRate * BlockAlign end + +data = "data" + * _uint32 "DataChunkSize" + * _Len "DataChunkSize" + +misc = Len(4) + * _uint32 "MiscChunkSize" + * _Len "MiscChunkSize" + +wave = (topattern("RIFF") + "RIFX") + % function(id) littleEndian = id == "RIFF" end + * _uint32 "ChunkSize" + * _Setcur "StartPos" + * "WAVE" + * Arbno(format + data + misc) + * Pos(function() return StartPos + ChunkSize end) + * -function() return DataChunkSize % BlockAlign == 0 end + * RPos(0) + +assert(file:smatch(wave, spipat.match_anchored), + arg[1].." is not a valid WAVE file!") + +print(string.format( +"%s\ +Format: %u\ +Channels: %u\ +Samplerate: %u Hz\ +Byterate: %u Hz\ +Bits/Sample: %u\ +Samples: %u", +arg[1], +AudioFormat, NumChannels, SampleRate, ByteRate, BitsPerSample, DataChunkSize / BlockAlign)) +print(os.date("Length:\t\t%T", DataChunkSize / ByteRate + 60*60*23)) diff --git a/spipat-patches/0.9.3+_image.patch b/spipat-patches/0.9.3+_image.patch new file mode 100644 index 0000000..6dee608 --- /dev/null +++ b/spipat-patches/0.9.3+_image.patch @@ -0,0 +1,94 @@ +--- image.c.orig 2010-05-15 02:03:24.000000000 +0200 ++++ image.c 2010-05-18 06:16:02.347573592 +0200 +@@ -346,16 +346,16 @@ + break; + + case PC_Arbno_S: +- Append(sp, sp->strings[E->Pcode]); ++ Append(sp, sp->strings[PC_Arbno_S]); + AppendC(sp, '('); + spipat_image_seq(sp, E->val.Alt, E, false); + AppendC(sp, ')'); + break; + + case PC_Arbno_X: +- Append(sp, sp->strings[E->Pcode]); ++ Append(sp, sp->strings[PC_Arbno_X]); + AppendC(sp, '('); +- spipat_image_seq(sp, E->val.Alt->Pthen, sp->Refs[E->Index - 2], false); ++ spipat_image_seq(sp, E->val.Alt->Pthen, sp->Refs[E->Index - 3], false); + AppendC(sp, ')'); + break; + +@@ -378,10 +378,10 @@ + break; + + case PC_Fence_X: +- Append(sp, sp->strings[E->Pcode]); +- spipat_image_seq (sp, E->Pthen, sp->Refs[E->Index - 1], false); ++ Append(sp, sp->strings[PC_Fence_X]); ++ AppendC(sp, '('); ++ spipat_image_seq(sp, sp->Refs[E->Index]->Pthen, E, false); // PC_R_Enter at Refs[E->Index] + AppendC(sp, ')'); +- ER = sp->Refs[E->Index - 1]->Pthen; + break; + + case PC_Len_Nat: +@@ -410,11 +410,13 @@ + break; + + case PC_Null: +- Append(sp, "\"\""); ++ Append(sp, sp->quote); ++ Append(sp, sp->quote); + break; + + case PC_R_Enter: + sp->Kill_Concat = true; ++ ER = sp->Refs[E->Index - 2]; // allows correct processing of PC_Fence_X & PC_Call_* + break; + + case PC_Rpat: +@@ -486,13 +488,11 @@ + + case PC_Call_Imm: + case PC_Call_OnM: +- // XXX fix me!! + AppendC(sp, '('); +- spipat_image_seq(sp, E, sp->Refs[E->Index - 1], true); ++ spipat_image_seq(sp, sp->Refs[E->Index]->Pthen, E, true); // PC_R_Enter at Refs[E->Index] + Append(sp, sp->strings[E->Pcode]); +- AppendMF(sp, sp->Refs[E->Index - 1]); ++ AppendMF(sp, E); + AppendC(sp, ')'); +- ER = sp->Refs[E->Index - 1]->Pthen; + break; + + case PC_Arb_Y: +--- spipat_image.h.orig 2010-05-15 01:24:44.000000000 +0200 ++++ spipat_image.h 2010-05-15 19:37:32.039626005 +0200 +@@ -78,7 +78,7 @@ + void (*fdf)(struct state *, struct pe *); + }; + +-extern const char *image_strs[PC_NUM_CODES]; ++extern const char *image_strs[]; + + void spipat_image_seq(struct state *sp, + struct pe *E, struct pe *Succ, bool Paren); +--- image_strs.c.orig 2010-05-12 01:50:05.000000000 +0200 ++++ image_strs.c 2010-05-16 06:27:43.228365501 +0200 +@@ -14,6 +14,7 @@ + [PC_Any_VP] = "Any", + [PC_Arb_X] = "Arb", + [PC_Arbno_S] = "Arbno", ++ [PC_Arbno_X] = "Arbno", + [PC_Assign_Imm] = " . ", + [PC_Assign_OnM] = " $ ", + [PC_Bal] = "Bal", +@@ -64,4 +65,5 @@ + [PC_Tab_NF] = "Tab", + [PC_Tab_NP] = "Tab", + [PC_Tab_Nat] = "Tab", ++ [PC_Dynamic_Func] = "Dynamic" + }; diff --git a/src/Makefile.am b/src/Makefile.am new file mode 100644 index 0000000..de6160b --- /dev/null +++ b/src/Makefile.am @@ -0,0 +1,28 @@ +# Main lspipat Automake file +# processed automatically + +AM_CFLAGS = -std=c99 -Wall + +lualib_lspipat_LTLIBRARIES = core.la +core_la_SOURCES = lspipat.c lspipat.h \ + call.c compose.c unary.c render.c misc.c \ + simple.c string.c uint.c +core_la_LDFLAGS = -module + +if LUA_PRECOMPILE + +lualib_DATA = lspipat.out +CLEANFILES = $(lualib_DATA) +EXTRA_DIST = lspipat.lua + +lspipat.out : lspipat.lua + @LUAC@ @LUAC_FLAGS@ -o $@ $< + +install-data-hook : + mv -f $(DESTDIR)$(lualibdir)/lspipat.out $(DESTDIR)$(lualibdir)/lspipat.lua + +else + +dist_lualib_DATA = lspipat.lua + +endif diff --git a/src/call.c b/src/call.c new file mode 100644 index 0000000..818ebdb --- /dev/null +++ b/src/call.c @@ -0,0 +1,86 @@ +/* + * LSPIPAT - LUA SPIPAT WRAPPER + * Copyright (C) 2010, Robin Haberkorn + * License: LGPL + * + * CORE: CALL OPERATIONS + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include "lspipat.h" + +static void +callFncHandler(VString matched, void *global __attribute__((unused)), void *local) +{ + struct callRefs *call = local; + lua_State *L = call->cb.L; + + lua_rawgeti(L, LUA_REGISTRYINDEX, call->cb.function); + lua_pushlstring(L, matched.ptr, matched.len); + lua_rawgeti(L, LUA_REGISTRYINDEX, call->cb.cookie); +#if 0 + lua_rawgeti(L, LUA_REGISTRYINDEX, *(int *)global); +#endif + + lua_call(L, 2, 0); +} + +struct callOperator { + struct pat *(*call)(struct pat *, void (*)(VString, void *, void *), void *); +}; + + /* TODO: local cookie support, this would also allow helper functions for assignment to global variables */ + /* at least one parameter is a pattern, the lvalue has to be it */ +static int +genericCallOperator(lua_State *L, struct callOperator spipat) +{ + PATTERN_WRAPPER *new; + struct callRefs *call; + + PATTERN_WRAPPER *lvalue = luaL_checkudata(L, 1, PATTERN_MT); + if (!lvalue->pattern) + L_ERROR(L_FREED); + if (!lua_isfunction(L, 2)) + L_ERROR(L_TYPE); + + if (!(new = lua_newuserdata(L, sizeof(PATTERN_WRAPPER)))) + L_ERROR(L_ALLOC); + memset(new, 0, sizeof(PATTERN_WRAPPER)); + lua_insert(L, 1); /* move wrapper below lvalue */ + + new->type = PATTERN_CALL; + + call = &new->u.call; + call->cb.L = L; + call->cb.cookie = LUA_REFNIL; + call->cb.function = luaL_ref(L, LUA_REGISTRYINDEX); + call->pattern = luaL_ref(L, LUA_REGISTRYINDEX); + /* wrapper at top again */ + + new->pattern = spipat.call(lvalue->pattern, callFncHandler, call); + if (!new->pattern) + L_ERROR(L_ALLOC); + + luaL_getmetatable(L, PATTERN_MT); + lua_setmetatable(L, -2); + + return 1; +} + +#define STDCALLOP(LFNC, SPIFNC) \ + LUA_SIG(LFNC) \ + { \ + return genericCallOperator(L, (struct callOperator) { \ + .call = SPIFNC \ + }); \ + } + +STDCALLOP(l_op_call_immed, spipat_call_immed) +STDCALLOP(l_op_call_onmatch, spipat_call_onmatch) + +#undef STDCALLOP diff --git a/src/compose.c b/src/compose.c new file mode 100644 index 0000000..b8be248 --- /dev/null +++ b/src/compose.c @@ -0,0 +1,106 @@ +/* + * LSPIPAT - LUA SPIPAT WRAPPER + * Copyright (C) 2010, Robin Haberkorn + * License: LGPL + * + * CORE: COMPOSITION OPERATIONS + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include "lspipat.h" + +struct composeOperator { + struct pat *(*str_pat)(VString, struct pat *); + struct pat *(*pat_str)(struct pat *, VString); + struct pat *(*chr_pat)(Character, struct pat *); + struct pat *(*pat_chr)(struct pat *, Character); + struct pat *(*pat_pat)(struct pat *, struct pat *); +}; + + /* at least one parameter must be a pattern, both are only allowed to be numbers, strings or patterns */ + +static int +genericComposeOperator(lua_State *L, struct composeOperator spipat) +{ + VString str = VSTRING_INITIALIZER; + PATTERN_WRAPPER *new; + + if (!(new = lua_newuserdata(L, sizeof(PATTERN_WRAPPER)))) + L_ERROR(L_ALLOC); + memset(new, 0, sizeof(PATTERN_WRAPPER)); + lua_insert(L, 1); + + if (lua_isstring(L, 2)) { /* lvalue number/string, rvalue is pattern */ + PATTERN_WRAPPER *rvalue = lua_touserdata(L, 3); + + if (!rvalue->pattern) + L_ERROR(L_FREED); + str.ptr = lua_tolstring(L, 2, (size_t *)&str.len); + + new->type = PATTERN_ONESUBPAT; + new->u.onesubpat.pattern = luaL_ref(L, LUA_REGISTRYINDEX); + + new->pattern = str.len == 1 ? spipat.chr_pat(*str.ptr, rvalue->pattern) + : spipat.str_pat(str, rvalue->pattern); + + lua_pop(L, 1); /* `new' at stack top */ + } else { /* lvalue must be pattern */ + PATTERN_WRAPPER *lvalue = luaL_checkudata(L, 2, PATTERN_MT); + + if (!lvalue->pattern) + L_ERROR(L_FREED); + + if (lua_isstring(L, 3)) { /* rvalue number/string */ + str.ptr = lua_tolstring(L, 3, (size_t *)&str.len); + + new->pattern = str.len == 1 ? spipat.pat_chr(lvalue->pattern, *str.ptr) + : spipat.pat_str(lvalue->pattern, str); + + lua_pop(L, 1); + + new->type = PATTERN_ONESUBPAT; + new->u.onesubpat.pattern = luaL_ref(L, LUA_REGISTRYINDEX); + } else { /* rvalue must be pattern */ + PATTERN_WRAPPER *rvalue = luaL_checkudata(L, 3, PATTERN_MT); + + if (!rvalue->pattern) + L_ERROR(L_FREED); + + new->type = PATTERN_TWOSUBPAT; + new->u.twosubpat.pattern2 = luaL_ref(L, LUA_REGISTRYINDEX); + new->u.twosubpat.pattern1 = luaL_ref(L, LUA_REGISTRYINDEX); + + new->pattern = spipat.pat_pat(lvalue->pattern, rvalue->pattern); + } + } + + if (!new->pattern) + L_ERROR(L_ALLOC); + + luaL_getmetatable(L, PATTERN_MT); + lua_setmetatable(L, -2); + + return 1; +} + +#define STDCOMPOSEOP(LFNC, SPIFNC) \ + LUA_SIG(LFNC) \ + { \ + return genericComposeOperator(L, (struct composeOperator) { \ + .str_pat = SPIFNC##_str_pat, \ + .pat_str = SPIFNC##_pat_str, \ + .chr_pat = SPIFNC##_chr_pat, \ + .pat_chr = SPIFNC##_pat_chr, \ + .pat_pat = SPIFNC##_pat_pat \ + }); \ + } + +STDCOMPOSEOP(l_op_and, spipat_and) +STDCOMPOSEOP(l_op_or, spipat_or) + +#undef STDCOMPOSEOP diff --git a/src/lspipat.c b/src/lspipat.c new file mode 100644 index 0000000..5075961 --- /dev/null +++ b/src/lspipat.c @@ -0,0 +1,336 @@ +/* + * LSPIPAT - LUA SPIPAT WRAPPER + * Copyright (C) 2010, Robin Haberkorn + * License: LGPL + * + * CORE: LIBSPIPAT <-> LUA INTERACTION + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include "lspipat.h" + +/* + * Module and Pattern methods + */ + +/* TODO: support global cookies */ + +LUA_SIG(l_smatch) +{ + int top = lua_gettop(L); + + struct spipat_match match; + enum spipat_match_ret ret; + + luaL_argcheck(L, top == 2 || top == 3, top, L_NUMBER); + + memset(&match, 0, sizeof(match)); + match.subject.ptr = luaL_checklstring(L, 1, (size_t *)&match.subject.len); + match.flags = luaL_optint(L, 3, 0); + + if (lua_isstring(L, 2)) { + VString str = VSTRING_INITIALIZER; + str.ptr = lua_tolstring(L, 2, (size_t *)&str.len); + + match.pattern = str.len == 1 ? spipat_char(*str.ptr) + : spipat_string(str); + if (!match.pattern) + L_ERROR(L_ALLOC); + } else { + PATTERN_WRAPPER *wrapper = luaL_checkudata(L, 2, PATTERN_MT); + luaL_argcheck(L, wrapper->pattern, 2, L_FREED); + + match.pattern = wrapper->pattern; + spipat_hold(match.pattern); + } + + ret = spipat_match2(&match); + spipat_free(match.pattern); /* only frees the temporary pattern for string params */ + if (ret == SPIPAT_MATCH_EXCEPTION) + L_ERROR("%s", match.exception); + + if (ret == SPIPAT_MATCH_FAILURE) { + lua_pushnil(L); + return 1; + } + + /* SPIPAT_MATCH_SUCCESS */ + lua_pushinteger(L, match.start); + lua_pushinteger(L, match.stop); + return 2; +} + + /* should we check __topattern operations in types metatables just like tostring does? */ +LUA_SIG(l_topattern) +{ + int top = lua_gettop(L); + + luaL_argcheck(L, top == 1, top, L_NUMBER); + + switch (lua_type(L, 1)) { + case LUA_TNUMBER: + case LUA_TSTRING: { + PATTERN_WRAPPER *wrapper; + VString str = VSTRING_INITIALIZER; + + if (!(wrapper = lua_newuserdata(L, sizeof(PATTERN_WRAPPER)))) + L_ERROR(L_ALLOC); + memset(wrapper, 0, sizeof(PATTERN_WRAPPER)); + + str.ptr = lua_tolstring(L, 1, (size_t *)&str.len); + + wrapper->pattern = str.len == 1 ? spipat_char(*str.ptr) + : spipat_string(str); + if (!wrapper->pattern) + L_ERROR(L_ALLOC); + + luaL_getmetatable(L, PATTERN_MT); + lua_setmetatable(L, -2); + + return 1; + } + case LUA_TUSERDATA: + /* FIXME: check whether it's a PATTERN_MT (without raising an error) */ + return 1; + + default: + return 0; + } + + /* not reached */ +} + +LUA_SIG(l_dump) +{ + PATTERN_WRAPPER *wrapper; + int top = lua_gettop(L); + + luaL_argcheck(L, top == 1, top, L_NUMBER); + wrapper = luaL_checkudata(L, 1, PATTERN_MT); + luaL_argcheck(L, wrapper->pattern, 1, L_FREED); + + spipat_dump(wrapper->pattern); + return 0; +} + +/* + * Finalizer + */ + +static inline void +unrefCallback(struct cbRefs *cb) +{ + luaL_unref(cb->L, LUA_REGISTRYINDEX, cb->function); + luaL_unref(cb->L, LUA_REGISTRYINDEX, cb->cookie); +} + +LUA_SIG(l_finalize_pattern) +{ + int top = lua_gettop(L); + PATTERN_WRAPPER *wrapper; + + luaL_argcheck(L, top == 1, top, L_NUMBER); + wrapper = luaL_checkudata(L, 1, PATTERN_MT); + + if (!wrapper->pattern) + return 0; /* already freed */ + + spipat_free(wrapper->pattern); /* should also release any strings/patterns */ + wrapper->pattern = NULL; /* (remove from registry using release functions) returned by some callback */ + + switch (wrapper->type) { + case PATTERN_OTHER: + break; + case PATTERN_ONESUBPAT: + luaL_unref(L, LUA_REGISTRYINDEX, wrapper->u.onesubpat.pattern); + break; + case PATTERN_TWOSUBPAT: + luaL_unref(L, LUA_REGISTRYINDEX, wrapper->u.twosubpat.pattern1); + luaL_unref(L, LUA_REGISTRYINDEX, wrapper->u.twosubpat.pattern2); + break; + case PATTERN_CALL: + luaL_unref(L, LUA_REGISTRYINDEX, wrapper->u.call.pattern); + unrefCallback(&wrapper->u.call.cb); + break; + case PATTERN_RETFNC: + unrefCallback(&wrapper->u.retfnc.cb); + break; + case PATTERN_SIMPLEFNC: + unrefCallback(&wrapper->u.simplefnc.cb); + break; + default: + L_ERROR(L_MISC); + } + + return 0; +} + +/* + * Cookie release function for function return values + */ + +void +retfncUnrefRet(void *arg) +{ + struct retfncRefs *retfnc = arg; + + luaL_unref(retfnc->cb.L, LUA_REGISTRYINDEX, retfnc->ret); +} + +/* + * Loader + */ + +int +luaopen_lspipat_core(lua_State *L) +{ + static const luaL_Reg spipat[] = { + {"smatch", l_smatch}, + + {"topattern", l_topattern}, + {"dump", l_dump}, + + {"free", l_finalize_pattern}, + {NULL, NULL} + }; + + static const luaL_Reg primitives[] = { + /* string primitives */ + {"Any", l_primitive_any}, + {"Break", l_primitive_break}, + {"BreakX", l_primitive_breakx}, + {"NotAny", l_primitive_notany}, + {"NSpan", l_primitive_nspan}, + {"Span", l_primitive_span}, + + /* unsigned integer primitives */ + {"Len", l_primitive_len}, + {"Pos", l_primitive_pos}, + {"RPos", l_primitive_rpos}, + {"RTab", l_primitive_rtab}, + {"Tab", l_primitive_tab}, + + /* simple primitives */ + {"Abort", l_primitive_abort}, + {"Arb", l_primitive_arb}, + {"Bal", l_primitive_bal}, + {"Fail", l_primitive_fail}, + {"Rem", l_primitive_rem}, + {"Succeed", l_primitive_succeed}, + + /* misc. primitives */ + {"Arbno", l_primitive_arbno}, + {"Fence", l_primitive_fence}, + + /* primitives for unary operators */ + {"Setcur", l_setcur}, + {"Pred", l_pred}, + {NULL, NULL} + }; + + static const luaL_Reg methods[] = { + {"free", l_finalize_pattern}, + {NULL, NULL} + }; + + static const luaL_Reg operations[] = { + {"__mul", l_op_and}, + {"__add", l_op_or}, + {"__mod", l_op_call_immed}, + {"__div", l_op_call_onmatch}, + + {"__tostring", l_tostring}, + + {"__gc", l_finalize_pattern}, + {NULL, NULL} + }; + + static const LUA_CONSTANT mapping[] = { + {"match_debug", SPIPAT_DEBUG}, + {"match_anchored", SPIPAT_ANCHORED}, + {NULL, 0} + }; + + /* module methods, primitives & constants */ + + luaL_register(L, "spipat", spipat); + luaL_register(L, NULL, primitives); + + for (const LUA_CONSTANT *m = mapping; m->lua; m++) { + lua_pushinteger(L, m->c); + lua_setfield(L, -2, m->lua); + } + /* module table should be at stack index 2 */ + + /* global methods & primitives */ + /* FIXME: make it optional (function or submodule) */ + + for (const luaL_Reg *p = primitives; p->name; p++) { + lua_pushcfunction(L, p->func); + lua_setglobal(L, p->name); + } + lua_pushcfunction(L, l_topattern); + lua_setglobal(L, "topattern"); + + /* "patch" string meta table with some methods */ + + lua_pushstring(L, "foo"); /* FIXME: use luaL_getmetatable */ + lua_getmetatable(L, -1); + lua_getfield(L, 2, "_Pred"); /* ok, this is hairy: will only be available if string cannot be converted to a number */ + lua_setfield(L, -2, "__unm"); + lua_getfield(L, -1, "__index"); + lua_pushcfunction(L, l_smatch); /* maybe split "spipat" and use luaL_register */ + lua_setfield(L, -2, "smatch"); + lua_getfield(L, 2, "ssub"); /* maybe write aux function to register Lua functions */ + lua_setfield(L, -2, "ssub"); + lua_getfield(L, 2, "siter"); + lua_setfield(L, -2, "siter"); + lua_pushcfunction(L, l_topattern); + lua_setfield(L, -2, "topattern"); + lua_pop(L, 3); + /* TODO: maybe also set the pattern-specific operations - adapt l_op_or/and to cope with two strings + however, arithmetic ops are already defined for strings if they can be converted to numbers */ + + /* "patch" number meta table with some methods */ + + lua_pushinteger(L, 23); /* FIXME: use luaL_getmetatable */ + if (!lua_getmetatable(L, -1)) { + lua_newtable(L); + lua_newtable(L); + } else + lua_getfield(L, -1, "__index"); + lua_pushcfunction(L, l_topattern); + lua_setfield(L, -2, "topattern"); + lua_setfield(L, -2, "__index"); + lua_setmetatable(L, -2); + lua_pop(L, 1); + + /* "patch" function meta table with operators */ + + lua_pushcfunction(L, l_smatch); /* FIXME: use luaL_getmetatable */ + if (!lua_getmetatable(L, -1)) + lua_newtable(L); + lua_pushcfunction(L, l_setcur); + lua_setfield(L, -2, "__len"); + lua_pushcfunction(L, l_pred); + lua_setfield(L, -2, "__unm"); + lua_setmetatable(L, -2); + lua_pop(L, 1); + + /* pattern metatable: methods & operations/events */ + + luaL_newmetatable(L, PATTERN_MT); + luaL_register(L, NULL, operations); + lua_newtable(L); + luaL_register(L, NULL, methods); + lua_setfield(L, -2, "__index"); + lua_pop(L, 1); + + /* module table should be on top of the stack again */ + return 1; +} \ No newline at end of file diff --git a/src/lspipat.h b/src/lspipat.h new file mode 100644 index 0000000..dbae4b2 --- /dev/null +++ b/src/lspipat.h @@ -0,0 +1,149 @@ +/* + * LSPIPAT - LUA SPIPAT WRAPPER + * Copyright (C) 2010, Robin Haberkorn + * License: LGPL + */ + +#ifndef _LSPIPAT_H +#define _LSPIPAT_H + +#ifdef HAVE_LUA5_1_LUA_H +#include +#include +#include +#else +#include +#include +#include +#endif + +#include +#include +#include + +#if defined(HAVE_SPIPAT_IMPL_H) && defined(HAVE_SPIPAT_IMAGE_H) +#define USE_SPIPAT_IMAGE_CUSTOM + +#include +#include + +#endif + +#define VSTRING_INITIALIZER {NULL, 0, NULL, NULL} + + /* Lua error raising */ + +#define L_ALLOC "Allocation error" +#define L_MISC "Miscellaneous error" +#define L_TYPE "Invalid type" +#define L_NUMBER "Invalid number of parameters" +#define L_VALUE "Invalid value for this parameter" +#define L_FREED "Pattern already freed" +#define L_RETURN "Invalid return value" + +#define L_ERROR(MSG, ...) do { \ + luaL_error(L, MSG "\n", ##__VA_ARGS__); \ +} while (0) /* return omitted, so it works for all functions */ + + /* metatables */ + +#define PATTERN_MT "SPIPAT.PATTERN_MT" + + /* structures */ + +typedef struct { + const char *lua; + int c; +} LUA_CONSTANT; + +struct cbRefs { /* wraps references necessary for callbacks */ + lua_State *L; + + int function; + int cookie; /* local cookie */ +}; + +typedef struct { + struct pat *pattern; + + enum { /* Lua reference classes of patterns */ + PATTERN_OTHER = 0, + PATTERN_ONESUBPAT, + PATTERN_TWOSUBPAT, + PATTERN_CALL, + PATTERN_RETFNC, + PATTERN_SIMPLEFNC + } type; + + union { /* references to control garbage collection */ + struct onesubpatRefs { + int pattern; + } onesubpat; + + struct twosubpatRefs { + int pattern1; + int pattern2; + } twosubpat; + + struct callRefs { + int pattern; + struct cbRefs cb; + } call; + + struct retfncRefs { + struct cbRefs cb; + int ret; + } retfnc; + + struct simplefncRefs { + struct cbRefs cb; + } simplefnc; + } u; +} PATTERN_WRAPPER; + + /* Lua functions */ + +#define LUA_SIG(FNC) \ + int FNC(lua_State *L) + +LUA_SIG(l_smatch); +LUA_SIG(l_topattern); +LUA_SIG(l_dump); + +LUA_SIG(l_tostring); + +LUA_SIG(l_op_and); +LUA_SIG(l_op_or); + +LUA_SIG(l_op_call_immed); +LUA_SIG(l_op_call_onmatch); + +LUA_SIG(l_setcur); +LUA_SIG(l_pred); + +LUA_SIG(l_primitive_any); +LUA_SIG(l_primitive_break); +LUA_SIG(l_primitive_breakx); +LUA_SIG(l_primitive_notany); +LUA_SIG(l_primitive_nspan); +LUA_SIG(l_primitive_span); + +LUA_SIG(l_primitive_len); +LUA_SIG(l_primitive_pos); +LUA_SIG(l_primitive_rpos); +LUA_SIG(l_primitive_rtab); +LUA_SIG(l_primitive_tab); + +LUA_SIG(l_primitive_abort); +LUA_SIG(l_primitive_arb); +LUA_SIG(l_primitive_bal); +LUA_SIG(l_primitive_fail); +LUA_SIG(l_primitive_rem); +LUA_SIG(l_primitive_succeed); + +LUA_SIG(l_primitive_arbno); +LUA_SIG(l_primitive_fence); + +void retfncUnrefRet(void *); + +#endif \ No newline at end of file diff --git a/src/lspipat.lua b/src/lspipat.lua new file mode 100644 index 0000000..9db2082 --- /dev/null +++ b/src/lspipat.lua @@ -0,0 +1,155 @@ +-- +-- LSPIPAT - LUA SPIPAT WRAPPER +-- Copyright (C) 2010, Robin Haberkorn +-- License: LGPL +-- +-- ADDITIONAL METHODS IMPLEMENTED IN LUA +-- + +module("spipat", package.seeall) + +-- +-- Module and Pattern methods +-- + +function ssub(str, pattern, repl, n, flags) + assert(type(repl) == "string" or type(repl) == "function", + "Invalid replacement specified!") + assert(type(n) == "nil" or type(n) == "number", + "Invalid repeat value specified!") + + local cMatches = 0 + repeat + -- cares about the remaining checks + local s, e = smatch(str, pattern, flags) + if not s then break end + + local res = type(repl) == "string" and repl or repl(s, e) + assert(type(res) == "nil" or type(res) == "string", + "Replacement function returned invalid value!") + + if res then str = str:sub(1, s - 1)..res..str:sub(e + 1) end + + if type(n) == "number" then n = n - 1 end + cMatches = cMatches + 1 + until n == 0 + + return str, cMatches +end + +function siter(str, pattern, flags) + local endPos = 0 + pattern = Pos(function() return endPos end) * Arb() * + #function(p) startPos = p + 1 end * pattern * #function(p) endPos = p end + + return function() + if not smatch(str, pattern, flags) then return end + return startPos, endPos + end +end + +-- +-- Primitives (shortcuts for deferring global variables) +-- + +local function genericSetGlobal(val, name) _G[name] = val end + +function _Setcur(name) return Setcur(genericSetGlobal, name) end +_G._Setcur = _Setcur +-- unfortunately, we can't register this as __len to strings... + + -- NOTE: if global `name' is of an invalid type, + -- lspipat will raise an error automatically +local function genericGetGlobal(name) return _G[name] end + +for _, prim in ipairs{ + "Pred", -- _Pred will be registered as __unm to strings + "Any", "Break", "BreakX", "NotAny", "NSpan", "Span", -- string primitives + "Len", "Pos", "RPos", "RTab", "Tab" -- number primitives +} do + local _prim = "_"..prim + + spipat[_prim] = function(name) return spipat[prim](genericGetGlobal, name) end + _G[_prim] = spipat[_prim] +end + +-- FIXME: local cookie support for assignments -> shortcuts for assignment of global variables + +-- +-- POSIX Extended Regular Expressions To SPITBOL Pattern Compiler +-- + +function RegExp(str, captures) + assert(type(captures) == "nil" or type(captures) == "table", + "Invalid captures table given!") + + local stack = {} + local function push(v) table.insert(stack, v) end + local function pop() return table.remove(stack) end + local r2p = {["."] = Len(1), ["^"] = Pos(0), ["$"] = RPos(0)} + + local set + local function add(c) table.insert(set, c) return c end + + local classes = { + blank = " \t", + punct = [[-!"#$%&'()*+,./:;<=>?@[\]^_`{|}~]], + lower = "abcdefghijklmnopqrstuvwxyz", + digit = "0123456789" + } + classes.upper = classes.lower:upper() + classes.alpha = classes.upper..classes.lower + classes.alnum = classes.alpha..classes.digit + classes.word = classes.alnum.."_" + classes.xdigit = classes.upper:sub(1, 6)..classes.lower:sub(1, 6)..classes.digit + classes.space = classes.blank.."\r\n\v\f" + -- TODO: some character classes are still missing... + + local function exp() return exp end + local function seq() return seq end + local atom = ( "\\" * (Len(1) % push) + + NotAny(".[]^$()*+?|{}") % push + + Any(".^$") % function(r) push(r2p[r]) end + + "[" * ( "^" * -function() push(NotAny) set = {} end + + -function() push(Any) set = {} end ) + * (topattern("]") % add + "") + * Arbno( "[:" * (Break(":") % push) * ":]" * -function() return add(classes[pop()]) ~= nil end + + Len(1) * "-" * Len(1) + % function(range) for c = range:byte(), range:byte(3) do add(string.char(c)) end end + + Len(1) % add ) + * "]" * -function() push(pop()(table.concat(set))) end + + "(" * -exp * ")" + * -function() if captures then + push(topattern(pop()) / function(cap) table.insert(captures, cap) end) end end ) + * ( "*" * ( "?" * -function() push(Arbno(pop())) end + + -function() local r; r = pop() * -function() return r end + "" + push(r) end ) + + "+" * -function() local r; r = pop() * (-function() return r end + "") + push(r) end + + "?" * -function() push(topattern("") + pop()) end + + "{" * ( Span(classes.digit) % push ) * "," + * ( Span(classes.digit) + % function(max) local min, c = pop() + local r; r = pop() * -function() c = c + 1 + return c >= tonumber(max) or r end + "" + push(-function() c = 0 end * r * -function() return c >= tonumber(min) end) end ) + * "}" + + "" ) + seq = ( atom * -function() local rvalue, lvalue = pop(), pop() + push(type(lvalue) == "string" and type(rvalue) == "string" and + lvalue..rvalue or lvalue * rvalue) end + * (-seq + "") + "" ) + * ( "|" * -exp * -function() local pat = pop() push(pop() + topattern(pat)) end + + "" ) + exp = atom * seq + + assert(smatch(str, exp * RPos(0), match_anchored), + "Invalid regular expression!") + + return stack[1] +end +_G.RegExp = RegExp + + -- load C core, also registers Lua functions into metatables we cannot + -- access from Lua +require "lspipat.core" \ No newline at end of file diff --git a/src/misc.c b/src/misc.c new file mode 100644 index 0000000..2bea4c8 --- /dev/null +++ b/src/misc.c @@ -0,0 +1,89 @@ +/* + * LSPIPAT - LUA SPIPAT WRAPPER + * Copyright (C) 2010, Robin Haberkorn + * License: LGPL + * + * CORE: MISCELLANEOUS PRIMITIVES/CONSTRUCTORS + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include "lspipat.h" + +LUA_SIG(l_primitive_arbno) +{ + int top = lua_gettop(L); + + VString str = VSTRING_INITIALIZER; + PATTERN_WRAPPER *new; + + luaL_argcheck(L, top == 1, top, L_NUMBER); + + if (!(new = lua_newuserdata(L, sizeof(PATTERN_WRAPPER)))) + L_ERROR(L_ALLOC); + memset(new, 0, sizeof(PATTERN_WRAPPER)); + + if (lua_isstring(L, 1)) { + str.ptr = lua_tolstring(L, 1, (size_t *)&str.len); + + new->pattern = str.len == 1 ? spipat_arbno_chr(*str.ptr) + : spipat_arbno_str(str); + } else { + PATTERN_WRAPPER *wrapper = luaL_checkudata(L, 1, PATTERN_MT); + luaL_argcheck(L, wrapper->pattern, 1, L_FREED); + + lua_insert(L, 1); /* move wrapper to bottom */ + new->type = PATTERN_ONESUBPAT; + new->u.onesubpat.pattern = luaL_ref(L, LUA_REGISTRYINDEX); + /* wrapper at top again */ + + new->pattern = spipat_arbno(wrapper->pattern); + } + + if (!new->pattern) + L_ERROR(L_ALLOC); + + luaL_getmetatable(L, PATTERN_MT); + lua_setmetatable(L, -2); + + return 1; + +} + +LUA_SIG(l_primitive_fence) +{ + int top = lua_gettop(L); + PATTERN_WRAPPER *new; + + luaL_argcheck(L, top < 2, top, L_NUMBER); + + if (!(new = lua_newuserdata(L, sizeof(PATTERN_WRAPPER)))) + L_ERROR(L_ALLOC); + memset(new, 0, sizeof(PATTERN_WRAPPER)); + + if (!top) { + new->pattern = spipat_fence_simple(); + } else { + PATTERN_WRAPPER *wrapper = luaL_checkudata(L, 1, PATTERN_MT); + luaL_argcheck(L, wrapper->pattern, 1, L_FREED); + + lua_insert(L, 1); /* move wrapper to bottom */ + new->type = PATTERN_ONESUBPAT; + new->u.onesubpat.pattern = luaL_ref(L, LUA_REGISTRYINDEX); + /* wrapper at top again */ + + new->pattern = spipat_fence_function(wrapper->pattern); + } + + if (!new->pattern) + L_ERROR(L_ALLOC); + + luaL_getmetatable(L, PATTERN_MT); + lua_setmetatable(L, -2); + + return 1; +} diff --git a/src/render.c b/src/render.c new file mode 100644 index 0000000..28c96ce --- /dev/null +++ b/src/render.c @@ -0,0 +1,138 @@ +/* + * LSPIPAT - LUA SPIPAT WRAPPER + * Copyright (C) 2010, Robin Haberkorn + * License: LGPL + * + * CORE: RENDER-TO-STRING OPERATION + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include "lspipat.h" + +#ifdef USE_SPIPAT_IMAGE_CUSTOM + +static const char *lspipat_strs[] = { /* left out elements that can't be constructed with lspipat */ + [PC_Abort] = "Abort", + [PC_Alt] = " + ", + [PC_Any_CH] = "Any", + [PC_Any_CS] = "Any", + [PC_Any_VF] = "Any", + [PC_Arb_X] = "Arb", + [PC_Arbno_S] = "Arbno", + [PC_Arbno_X] = "Arbno", + [PC_Bal] = "Bal", + [PC_BreakX_CH] = "BreakX", + [PC_BreakX_CS] = "BreakX", + [PC_BreakX_VF] = "BreakX", + [PC_Break_CH] = "Break", + [PC_Break_CS] = "Break", + [PC_Break_VF] = "Break", + [PC_Call_Imm] = " % ", + [PC_Call_OnM] = " / ", + [PC_Fail] = "Fail", + [PC_Fence] = "Fence", + [PC_Fence_X] = "Fence", + [PC_Len_NF] = "Len", + [PC_Len_Nat] = "Len", + [PC_NSpan_CH] = "NSpan", + [PC_NSpan_CS] = "NSpan", + [PC_NSpan_VF] = "NSpan", + [PC_NotAny_CH] = "NotAny", + [PC_NotAny_CS] = "NotAny", + [PC_NotAny_VF] = "NotAny", + [PC_Null] = "\"\"", + [PC_Pos_NF] = "Pos", + [PC_Pos_Nat] = "Pos", + [PC_RPos_NF] = "RPos", + [PC_RPos_Nat] = "RPos", + [PC_RTab_NF] = "RTab", + [PC_RTab_Nat] = "RTab", + [PC_Rem] = "Rem", + [PC_Setcur_Func] = "#", /* also: Setcur */ + [PC_Span_CH] = "Span", + [PC_Span_CS] = "Span", + [PC_Span_VF] = "Span", + [PC_Succeed] = "Succeed", + [PC_Tab_NF] = "Tab", + [PC_Tab_Nat] = "Tab", + [PC_Dynamic_Func] = "-" /* also: Pred */ +}; + +/* TODO: Define some custom Append functions */ + +LUA_SIG(l_tostring) +{ + char buf[1024], *bigbuf; + unsigned len; + + struct state state = { + .ptr = buf, + .size = sizeof(buf) + }; + + PATTERN_WRAPPER *wrapper = lua_touserdata(L, 1); /* parameter is definitely a pattern */ + + luaL_argcheck(L, wrapper->pattern, 1, L_FREED); + + spipat_image_init_state(&state); + state.cquote = "\""; + state.concat = " * "; + state.strings = lspipat_strs; + + len = spipat_image_custom(&state, wrapper->pattern); + if (len < sizeof(buf)) { + lua_pushlstring(L, buf, len); + return 1; + } + + /* sizeof(buf) was too small */ + + state.size = len + 1; + if (!(bigbuf = malloc(state.size))) + L_ERROR(L_ALLOC); + state.ptr = bigbuf; + + spipat_image_custom(&state, wrapper->pattern); + lua_pushlstring(L, bigbuf, len); + + free(bigbuf); + + return 1; +} + +#else + +LUA_SIG(l_tostring) +{ + char buf[1024], *bigbuf; + unsigned len; + + PATTERN_WRAPPER *wrapper = lua_touserdata(L, 1); /* parameter is definitely a pattern */ + + luaL_argcheck(L, wrapper->pattern, 1, L_FREED); + + len = spipat_image(wrapper->pattern, buf, sizeof(buf)); + if (len < sizeof(buf)) { + lua_pushlstring(L, buf, len); + return 1; + } + + /* sizeof(buf) was too small */ + + if (!(bigbuf = malloc(len + 1))) + L_ERROR(L_ALLOC); + + spipat_image(wrapper->pattern, bigbuf, len + 1); + lua_pushlstring(L, bigbuf, len); + + free(bigbuf); + + return 1; +} + +#endif diff --git a/src/simple.c b/src/simple.c new file mode 100644 index 0000000..d407129 --- /dev/null +++ b/src/simple.c @@ -0,0 +1,57 @@ +/* + * LSPIPAT - LUA SPIPAT WRAPPER + * Copyright (C) 2010, Robin Haberkorn + * License: LGPL + * + * CORE: SIMPLE PRIMITIVES/CONSTRUCTORS + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include "lspipat.h" + +struct simplePrimitive { + struct pat *(*simple)(void); +}; + +static int +genericSimplePrimitive(lua_State *L, struct simplePrimitive spipat) +{ + int top = lua_gettop(L); + PATTERN_WRAPPER *new; + + luaL_argcheck(L, !top, top, L_NUMBER); + + if (!(new = lua_newuserdata(L, sizeof(PATTERN_WRAPPER)))) + L_ERROR(L_ALLOC); + memset(new, 0, sizeof(PATTERN_WRAPPER)); + + if (!(new->pattern = spipat.simple())) + L_ERROR(L_ALLOC); + + luaL_getmetatable(L, PATTERN_MT); + lua_setmetatable(L, -2); + + return 1; +} + +#define STDSIMPLEPRIM(LFNC, SPIFNC) \ + LUA_SIG(LFNC) \ + { \ + return genericSimplePrimitive(L, (struct simplePrimitive) { \ + .simple = SPIFNC \ + }); \ + } + +STDSIMPLEPRIM(l_primitive_abort, spipat_abort) +STDSIMPLEPRIM(l_primitive_arb, spipat_arb) +STDSIMPLEPRIM(l_primitive_bal, spipat_bal) +STDSIMPLEPRIM(l_primitive_fail, spipat_fail) +STDSIMPLEPRIM(l_primitive_rem, spipat_rem) +STDSIMPLEPRIM(l_primitive_succeed, spipat_succeed) + +#undef STDSIMPLEPRIM diff --git a/src/string.c b/src/string.c new file mode 100644 index 0000000..43f266c --- /dev/null +++ b/src/string.c @@ -0,0 +1,131 @@ +/* + * LSPIPAT - LUA SPIPAT WRAPPER + * Copyright (C) 2010, Robin Haberkorn + * License: LGPL + * + * CORE: STRING PRIMITIVES/CONSTRUCTORS + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include "lspipat.h" + +static VString +stringFncHandler(void *global __attribute__((unused)), void *local) +{ + struct retfncRefs *retfnc = local; + lua_State *L = retfnc->cb.L; + + VString ret; + + lua_rawgeti(L, LUA_REGISTRYINDEX, retfnc->cb.function); + lua_rawgeti(L, LUA_REGISTRYINDEX, retfnc->cb.cookie); +#if 0 + lua_rawgeti(L, LUA_REGISTRYINDEX, *(int *)global); +#endif + + lua_call(L, 1, 1); + + if (!lua_isstring(L, -1)) { + lua_pop(L, 1); + L_ERROR(L_RETURN); /* FIXME: is it safe to raise errors? */ + } + + ret.ptr = lua_tolstring(L, -1, (size_t *)&ret.len); + ret.release = retfncUnrefRet; + ret.cookie = retfnc; + + /* + * Register value so Lua doesn't free it until spipat + * doesn't need it anymore (value has to be popped now) + */ + retfnc->ret = luaL_ref(L, LUA_REGISTRYINDEX); + return ret; +} + +struct stringPrimitive { + struct pat *(*chr)(Character); + struct pat *(*str)(VString); + struct pat *(*fnc)(VString (*)(void *, void*), void *); +}; + +static int +genericStringPrimitive(lua_State *L, struct stringPrimitive spipat) +{ + int top = lua_gettop(L); + + VString str = VSTRING_INITIALIZER; + PATTERN_WRAPPER *new; + + luaL_argcheck(L, top, top, L_NUMBER); + + if (!(new = lua_newuserdata(L, sizeof(PATTERN_WRAPPER)))) + L_ERROR(L_ALLOC); + memset(new, 0, sizeof(PATTERN_WRAPPER)); + + switch (lua_type(L, 1)) { + case LUA_TNUMBER: + case LUA_TSTRING: + luaL_argcheck(L, top == 1, top, L_NUMBER); + + str.ptr = lua_tolstring(L, 1, (size_t *)&str.len); + + new->pattern = str.len == 1 ? spipat.chr(*str.ptr) + : spipat.str(str); + break; + + case LUA_TFUNCTION: { + struct retfncRefs *retfnc; + + luaL_argcheck(L, top == 1 || top == 2, top, L_NUMBER); + + lua_insert(L, 1); /* move wrapper to bottom */ + if (top == 1) + lua_pushnil(L); /* cookie will be LUA_REFNIL */ + + new->type = PATTERN_RETFNC; + + retfnc = &new->u.retfnc; + retfnc->cb.L = L; + retfnc->cb.cookie = luaL_ref(L, LUA_REGISTRYINDEX); + retfnc->cb.function = luaL_ref(L, LUA_REGISTRYINDEX); + /* wrapper at top again */ + + new->pattern = spipat.fnc(stringFncHandler, retfnc); + break; + } + default: + return luaL_argerror(L, 1, L_TYPE); + } + + if (!new->pattern) + L_ERROR(L_ALLOC); + + luaL_getmetatable(L, PATTERN_MT); + lua_setmetatable(L, -2); + + return 1; +} + +#define STDSTRPRIM(LFNC, SPIFNC) \ + LUA_SIG(LFNC) \ + { \ + return genericStringPrimitive(L, (struct stringPrimitive) { \ + .chr = SPIFNC##_chr, \ + .str = SPIFNC##_str, \ + .fnc = SPIFNC##_fnc \ + }); \ + } + +STDSTRPRIM(l_primitive_any, spipat_any) +STDSTRPRIM(l_primitive_break, spipat_break) +STDSTRPRIM(l_primitive_breakx, spipat_breakx) +STDSTRPRIM(l_primitive_notany, spipat_notany) +STDSTRPRIM(l_primitive_nspan, spipat_nspan) +STDSTRPRIM(l_primitive_span, spipat_span) + +#undef STDSTRPRIM diff --git a/src/uint.c b/src/uint.c new file mode 100644 index 0000000..1a0530b --- /dev/null +++ b/src/uint.c @@ -0,0 +1,128 @@ +/* + * LSPIPAT - LUA SPIPAT WRAPPER + * Copyright (C) 2010, Robin Haberkorn + * License: LGPL + * + * CORE: UNSIGNED INTEGER PRIMITIVES/CONSTRUCTORS + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include + +#include "lspipat.h" + +static unsigned +uintFncHandler(void *global __attribute__((unused)), void *local) +{ + struct simplefncRefs *simplefnc = local; + lua_State *L = simplefnc->cb.L; + + int val; + + lua_rawgeti(L, LUA_REGISTRYINDEX, simplefnc->cb.function); + lua_rawgeti(L, LUA_REGISTRYINDEX, simplefnc->cb.cookie); +#if 0 + lua_rawgeti(L, LUA_REGISTRYINDEX, *(int *)global); +#endif + + lua_call(L, 1, 1); + + if (lua_isnil(L, -1)) { + lua_pop(L, 1); + return 0; /* default value */ + } + + if (!lua_isnumber(L, -1)) { + lua_pop(L, 1); + L_ERROR(L_RETURN); /* FIXME: is it safe to raise errors? */ + } + + val = lua_tointeger(L, -1); + lua_pop(L, 1); + if (val < 0) + L_ERROR(L_RETURN); + + return (unsigned)val; +} + +struct uintPrimitive { + struct pat *(*uint)(unsigned); + struct pat *(*fnc)(unsigned (*)(void *, void *), void *); +}; + +static int +genericUIntPrimitive(lua_State *L, struct uintPrimitive spipat) +{ + int top = lua_gettop(L); + PATTERN_WRAPPER *new; + + if (!(new = lua_newuserdata(L, sizeof(PATTERN_WRAPPER)))) + L_ERROR(L_ALLOC); + memset(new, 0, sizeof(PATTERN_WRAPPER)); + + switch (lua_type(L, 1)) { + case LUA_TNONE: + case LUA_TNIL: + case LUA_TNUMBER: + case LUA_TSTRING: { + int val; + + luaL_argcheck(L, top < 2, top, L_NUMBER); + val = luaL_optint(L, 1, 0); + luaL_argcheck(L, val >= 0, 1, L_VALUE); + + new->pattern = spipat.uint((unsigned)val); + break; + } + case LUA_TFUNCTION: { + struct simplefncRefs *simplefnc; + + luaL_argcheck(L, top == 1 || top == 2, top, L_NUMBER); + + lua_insert(L, 1); /* move wrapper to bottom */ + if (top == 1) + lua_pushnil(L); /* cookie will be LUA_REFNIL */ + + new->type = PATTERN_SIMPLEFNC; + + simplefnc = &new->u.simplefnc; + simplefnc->cb.L = L; + simplefnc->cb.cookie = luaL_ref(L, LUA_REGISTRYINDEX); + simplefnc->cb.function = luaL_ref(L, LUA_REGISTRYINDEX); + /* wrapper at top again */ + + new->pattern = spipat.fnc(uintFncHandler, simplefnc); + break; + } + default: + return luaL_argerror(L, 1, L_TYPE); + } + + if (!new->pattern) + L_ERROR(L_ALLOC); + + luaL_getmetatable(L, PATTERN_MT); + lua_setmetatable(L, -2); + + return 1; +} + +#define STDUINTPRIM(LFNC, SPIFNC) \ + LUA_SIG(LFNC) \ + { \ + return genericUIntPrimitive(L, (struct uintPrimitive) { \ + .uint = SPIFNC, \ + .fnc = SPIFNC##_fnc \ + }); \ + } + +STDUINTPRIM(l_primitive_len, spipat_len) +STDUINTPRIM(l_primitive_pos, spipat_pos) +STDUINTPRIM(l_primitive_rpos, spipat_rpos) +STDUINTPRIM(l_primitive_rtab, spipat_rtab) +STDUINTPRIM(l_primitive_tab, spipat_tab) + +#undef STDUINTPRIM diff --git a/src/unary.c b/src/unary.c new file mode 100644 index 0000000..b3f40ef --- /dev/null +++ b/src/unary.c @@ -0,0 +1,182 @@ +/* + * LSPIPAT - LUA SPIPAT WRAPPER + * Copyright (C) 2010, Robin Haberkorn + * License: LGPL + * + * CORE: UNARY OPERATORS (ALSO USED AS PRIMITIVES) + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include + +#include "lspipat.h" + +static void +setcurFncHandler(unsigned pos, void *global __attribute__((unused)), void *local) +{ + struct simplefncRefs *simplefnc = local; + lua_State *L = simplefnc->cb.L; + + lua_rawgeti(L, LUA_REGISTRYINDEX, simplefnc->cb.function); + lua_pushinteger(L, pos); + lua_rawgeti(L, LUA_REGISTRYINDEX, simplefnc->cb.cookie); +#if 0 + lua_rawgeti(L, LUA_REGISTRYINDEX, *(int *)global); +#endif + + lua_call(L, 2, 0); +} + + /* + * if called as an operator, there will be a nil on top of the stack + */ +LUA_SIG(l_setcur) +{ + int top = lua_gettop(L); + + PATTERN_WRAPPER *new; + struct simplefncRefs *simplefnc; + + luaL_argcheck(L, top == 1 || top == 2, top, L_NUMBER); + luaL_argcheck(L, lua_isfunction(L, 1), 1, L_TYPE); + + if (!(new = lua_newuserdata(L, sizeof(PATTERN_WRAPPER)))) + L_ERROR(L_ALLOC); + memset(new, 0, sizeof(PATTERN_WRAPPER)); + + lua_insert(L, 1); /* move wrapper to bottom */ + if (top == 1) + lua_pushnil(L); /* cookie will be LUA_REFNIL */ + + new->type = PATTERN_SIMPLEFNC; + + simplefnc = &new->u.simplefnc; + simplefnc->cb.L = L; + simplefnc->cb.cookie = luaL_ref(L, LUA_REGISTRYINDEX); + simplefnc->cb.function = luaL_ref(L, LUA_REGISTRYINDEX); + /* wrapper at top again */ + + new->pattern = spipat_setcur_fnc(setcurFncHandler, simplefnc); + if (!new->pattern) + L_ERROR(L_ALLOC); + + luaL_getmetatable(L, PATTERN_MT); + lua_setmetatable(L, -2); + + return 1; +} + +static void +predFncHandler(void *global __attribute__((unused)), void *local, struct dynamic *ret) +{ + struct retfncRefs *retfnc = local; + lua_State *L = retfnc->cb.L; + + lua_rawgeti(L, LUA_REGISTRYINDEX, retfnc->cb.function); + lua_rawgeti(L, LUA_REGISTRYINDEX, retfnc->cb.cookie); +#if 0 + lua_rawgeti(L, LUA_REGISTRYINDEX, *(int *)global); +#endif + + lua_call(L, 1, 1); + + switch (lua_type(L, -1)) { + case LUA_TNUMBER: + case LUA_TSTRING: { + VString *str = &ret->val.str; + + ret->type = DY_VSTR; + + str->ptr = lua_tolstring(L, -1, (size_t *)&str->len); + str->release = retfncUnrefRet; + str->cookie = retfnc; + + /* + * Register value so Lua doesn't free it until spipat + * doesn't need it anymore (value has to be popped now) + */ + retfnc->ret = luaL_ref(L, LUA_REGISTRYINDEX); + return; + } + case LUA_TNIL: /* default behaviour: continue matching (Succeed) */ + ret->type = DY_BOOL; + + ret->val.pred = true; + + lua_pop(L, 1); + return; + + case LUA_TBOOLEAN: + ret->type = DY_BOOL; + + ret->val.pred = lua_toboolean(L, -1); + + lua_pop(L, 1); + return; + + case LUA_TUSERDATA: { /* FIXME: check whether it's really a Pattern */ + PATTERN_WRAPPER *wrapper = lua_touserdata(L, -1); + if (!wrapper->pattern) { + lua_pop(L, 1); + L_ERROR(L_RETURN); + } + + ret->type = DY_PAT; + + ret->val.pat.p = wrapper->pattern; + ret->val.pat.release = retfncUnrefRet; + ret->val.pat.cookie = retfnc; + + /* + * Register value so Lua doesn't free it until spipat + * doesn't need it anymore (value has to be popped now) + */ + retfnc->ret = luaL_ref(L, LUA_REGISTRYINDEX); + return; + } + default: + lua_pop(L, 1); + L_ERROR(L_RETURN); + } + + /* not reached */ +} + +LUA_SIG(l_pred) +{ + int top = lua_gettop(L); + + PATTERN_WRAPPER *new; + struct retfncRefs *retfnc; + + luaL_argcheck(L, top == 1 || top == 2, top, L_NUMBER); + luaL_argcheck(L, lua_isfunction(L, 1), 1, L_TYPE); + + if (!(new = lua_newuserdata(L, sizeof(PATTERN_WRAPPER)))) + L_ERROR(L_ALLOC); + memset(new, 0, sizeof(PATTERN_WRAPPER)); + + lua_insert(L, 1); /* move wrapper to bottom */ + if (top == 1) + lua_pushnil(L); /* cookie will be LUA_REFNIL */ + + new->type = PATTERN_RETFNC; + retfnc = &new->u.retfnc; + retfnc->cb.L = L; + retfnc->cb.cookie = luaL_ref(L, LUA_REGISTRYINDEX); + retfnc->cb.function = luaL_ref(L, LUA_REGISTRYINDEX); + /* wrapper at top again */ + + new->pattern = spipat_dynamic_fnc(predFncHandler, retfnc); + if (!new->pattern) + L_ERROR(L_ALLOC); + + luaL_getmetatable(L, PATTERN_MT); + lua_setmetatable(L, -2); + + return 1; +} -- cgit v1.2.3