From d3148268857e01116d5d3c99ac0a43bc6a54b13c Mon Sep 17 00:00:00 2001 From: Robin Haberkorn Date: Wed, 29 Dec 2010 16:26:25 +0100 Subject: initial checkin (v0.1 release) --- doc/Makefile.am | 15 + doc/html_custom.xsl | 6 + doc/html_titlepage.spec.xml | 688 +++++++++++++++ doc/lspipat.png | Bin 0 -> 4266 bytes doc/pattern.txt | 1017 ++++++++++++++++++++++ doc/reference.xml | 2005 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 3731 insertions(+) create mode 100644 doc/Makefile.am create mode 100644 doc/html_custom.xsl create mode 100644 doc/html_titlepage.spec.xml create mode 100644 doc/lspipat.png create mode 100644 doc/pattern.txt create mode 100644 doc/reference.xml (limited to 'doc') diff --git a/doc/Makefile.am b/doc/Makefile.am new file mode 100644 index 0000000..f8ff134 --- /dev/null +++ b/doc/Makefile.am @@ -0,0 +1,15 @@ +## Docbook processing - very simplistic at the moment + +DB_URI = http://docbook.sourceforge.net/release/xsl/current + +dist_doc_DATA = pattern.txt + +dist_html_DATA = reference.html lspipat.png +CLEANFILES = reference.html html_titlepage.xsl +EXTRA_DIST = reference.xml html_custom.xsl html_titlepage.spec.xml + +reference.html : reference.xml html_custom.xsl html_titlepage.xsl + @XSLTPROC@ @XSLT_FLAGS@ -o $@ html_custom.xsl $< + +html_titlepage.xsl : html_titlepage.spec.xml + @XSLTPROC@ @XSLT_FLAGS@ -o $@ $(DB_URI)/template/titlepage.xsl $< diff --git a/doc/html_custom.xsl b/doc/html_custom.xsl new file mode 100644 index 0000000..af94064 --- /dev/null +++ b/doc/html_custom.xsl @@ -0,0 +1,6 @@ + + + + + + diff --git a/doc/html_titlepage.spec.xml b/doc/html_titlepage.spec.xml new file mode 100644 index 0000000..ea44036 --- /dev/null +++ b/doc/html_titlepage.spec.xml @@ -0,0 +1,688 @@ + + + + + + + + + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <hr/> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="set" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <hr/> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="book" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <mediaobject/> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <hr/> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="part" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title + t:force="1" + t:named-template="division.title" + param:node="ancestor-or-self::part[1]"/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<t:titlepage t:element="partintro" t:wrapper="div"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="reference" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <hr/> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="refentry" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> +<!-- uncomment this if you want refentry titlepages + <title t:force="1" + t:named-template="refentry.title" + param:node="ancestor-or-self::refentry[1]"/> +--> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator/> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + + <t:titlepage t:element="dedication" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title + t:force="1" + t:named-template="component.title" + param:node="ancestor-or-self::dedication[1]"/> + <subtitle/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="acknowledgements" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title + t:force="1" + t:named-template="component.title" + param:node="ancestor-or-self::acknowledgements[1]"/> + <subtitle/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="preface" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="chapter" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="appendix" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="section" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <xsl:if test="count(parent::*)='0'"><hr/></xsl:if> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<t:titlepage t:element="sect1" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <xsl:if test="count(parent::*)='0'"><hr/></xsl:if> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<t:titlepage t:element="sect2" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <xsl:if test="count(parent::*)='0'"><hr/></xsl:if> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<t:titlepage t:element="sect3" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <xsl:if test="count(parent::*)='0'"><hr/></xsl:if> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<t:titlepage t:element="sect4" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <xsl:if test="count(parent::*)='0'"><hr/></xsl:if> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<t:titlepage t:element="sect5" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <xsl:if test="count(parent::*)='0'"><hr/></xsl:if> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<t:titlepage t:element="simplesect" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title/> + <subtitle/> + <corpauthor/> + <authorgroup/> + <author/> + <othercredit/> + <releaseinfo/> + <copyright/> + <legalnotice/> + <pubdate/> + <revision/> + <revhistory/> + <abstract/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + <xsl:if test="count(parent::*)='0'"><hr/></xsl:if> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="bibliography" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title + t:force="1" + t:named-template="component.title" + param:node="ancestor-or-self::bibliography[1]"/> + <subtitle/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="glossary" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title + t:force="1" + t:named-template="component.title" + param:node="ancestor-or-self::glossary[1]"/> + <subtitle/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="index" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title + t:force="1" + t:named-template="component.title" + param:node="ancestor-or-self::index[1]"/> + <subtitle/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +<t:titlepage t:element="setindex" t:wrapper="div" class="titlepage"> + <t:titlepage-content t:side="recto"> + <title + t:force="1" + t:named-template="component.title" + param:node="ancestor-or-self::setindex[1]"/> + <subtitle/> + </t:titlepage-content> + + <t:titlepage-content t:side="verso"> + </t:titlepage-content> + + <t:titlepage-separator> + </t:titlepage-separator> + + <t:titlepage-before t:side="recto"> + </t:titlepage-before> + + <t:titlepage-before t:side="verso"> + </t:titlepage-before> +</t:titlepage> + +<!-- ==================================================================== --> + +</t:templates> diff --git a/doc/lspipat.png b/doc/lspipat.png new file mode 100644 index 0000000..317751d Binary files /dev/null and b/doc/lspipat.png differ diff --git a/doc/pattern.txt b/doc/pattern.txt new file mode 100644 index 0000000..e0d1719 --- /dev/null +++ b/doc/pattern.txt @@ -0,0 +1,1017 @@ +Copyright (C) 2007,2008, Philip L. Budne +Copyright (C) 1998-2005, AdaCore + +This documentation (and the underlying software) developed from the +GNAT.SPITBOL.PATTERNS package of GNU Ada. GNAT was originally +developed by the GNAT team at New York University. Extensive +contributions were provided by Ada Core Technologies Inc. + +SPITBOL-like pattern construction and matching + +This child package of GNAT.SPITBOL provides a complete implementation +of the SPITBOL-like pattern construction and matching operations. This +package is based on Macro-SPITBOL created by Robert Dewar. + +This is a completely general patterm matching package based on the +pattern language of SNOBOL4, as implemented in SPITBOL. The pattern +language is modeled on context free grammars, with context sensitive +extensions that provide full (type 0) computational capabilities. + +------------------------------- +Pattern Matching Tutorial +------------------------------- + +A pattern matching operation (a call to one of the Match subprograms) +takes a subject string and a pattern, and optionally a replacement +string. The replacement string option is only allowed if the subject +is a variable. + +The pattern is matched against the subject string, and either the +match fails, or it succeeds matching a contiguous substring. If a +replacement string is specified, then the subject string is modified +by replacing the matched substring with the given replacement. + +Concatenation and Alternation +============================= + +A pattern consists of a series of pattern elements. The pattern is +built up using either the concatenation operator: + + A & B + + which means match A followed immediately by matching B, or the + alternation operator: + + A | B + + which means first attempt to match A, and then if that does not + succeed, match B. + + There is full backtracking, which means that if a given pattern + element fails to match, then previous alternatives are matched. + For example if we have the pattern: + + (A | B) & (C | D) & (E | F) + + First we attempt to match A, if that succeeds, then we go on to try + to match C, and if that succeeds, we go on to try to match E. If E + fails, then we try F. If F fails, then we go back and try matching + D instead of C. Let's make this explicit using a specific example, + and introducing the simplest kind of pattern element, which is a + literal string. The meaning of this pattern element is simply to + match the characters that correspond to the string characters. Now + let's rewrite the above pattern form with specific string literals + as the pattern elements: + + ("ABC" | "AB") & ("DEF" | "CDE") & ("GH" | "IJ") + + The following strings will be attempted in sequence: + + ABC . DEF . GH + ABC . DEF . IJ + ABC . CDE . GH + ABC . CDE . IJ + AB . DEF . GH + AB . DEF . IJ + AB . CDE . GH + AB . CDE . IJ + + Here we use the dot simply to separate the pieces of the string + matched by the three separate elements. + + Moving the Start Point + ====================== + + A pattern is not required to match starting at the first character + of the string, and is not required to match to the end of the string. + The first attempt does indeed attempt to match starting at the first + character of the string, trying all the possible alternatives. But + if all alternatives fail, then the starting point of the match is + moved one character, and all possible alternatives are attempted at + the new anchor point. + + The entire match fails only when every possible starting point has + been attempted. As an example, suppose that we had the subject + string + + "ABABCDEIJKL" + + matched using the pattern in the previous example: + + ("ABC" | "AB") & ("DEF" | "CDE") & ("GH" | "IJ") + + would succeed, afer two anchor point moves: + + "ABABCDEIJKL" + ^^^^^^^ + matched + section + + This mode of pattern matching is called the unanchored mode. It is + also possible to put the pattern matcher into anchored mode by + setting the global variable Anchored_Mode to True. This will cause + all subsequent matches to be performed in anchored mode, where the + match is required to start at the first character. + + We will also see later how the effect of an anchored match can be + obtained for a single specified anchor point if this is desired. + + Other Pattern Elements + ====================== + + In addition to strings (or single characters), there are many special + pattern elements that correspond to special predefined alternations: + + Arb Matches any string. First it matches the null string, and + then on a subsequent failure, matches one character, and + then two characters, and so on. It only fails if the + entire remaining string is matched. + + Bal Matches a non-empty string that is parentheses balanced + with respect to ordinary () characters. Examples of + balanced strings are "ABC", "A((B)C)", and "A(B)C(D)E". + Bal matches the shortest possible balanced string on the + first attempt, and if there is a subsequent failure, + attempts to extend the string. + + Abort Immediately aborts the entire pattern match, signalling + failure. This is a specialized pattern element, which is + useful in conjunction with some of the special pattern + elements that have side effects. + + Fail The null alternation. Matches no possible strings, so it + always signals failure. This is a specialized pattern + element, which is useful in conjunction with some of the + special pattern elements that have side effects. + + Fence Matches the null string at first, and then if a failure + causes alternatives to be sought, aborts the match (like + a Cancel). Note that using Fence at the start of a pattern + has the same effect as matching in anchored mode. + + Rem Matches from the current point to the last character in + the string. This is a specialized pattern element, which + is useful in conjunction with some of the special pattern + elements that have side effects. + + Succeed Repeatedly matches the null string (it is equivalent to + the alternation ("" | "" | "" ....). This is a special + pattern element, which is useful in conjunction with some + of the special pattern elements that have side effects. + + Pattern Construction Functions + ============================== + + The following functions construct additional pattern elements + + Any(S) Where S is a string, matches a single character that is + any one of the characters in S. Fails if the current + character is not one of the given set of characters. + + Arbno(P) Where P is any pattern, matches any number of instances + of the pattern, starting with zero occurrences. It is + thus equivalent to ("" | (P & ("" | (P & ("" ....)))). + The pattern P may contain any number of pattern elements + including the use of alternatiion and concatenation. + + Break(S) Where S is a string, matches a string of zero or more + characters up to but not including a break character + that is one of the characters given in the string S. + Can match the null string, but cannot match the last + character in the string, since a break character is + required to be present. + + BreakX(S) Where S is a string, behaves exactly like Break(S) when + it first matches, but if a string is successfully matched, + then a susequent failure causes an attempt to extend the + matched string. + + Fence(P) Where P is a pattern, attempts to match the pattern P + including trying all possible alternatives of P. If none + of these alternatives succeeds, then the Fence pattern + fails. If one alternative succeeds, then the pattern + match proceeds, but on a subsequent failure, no attempt + is made to search for alternative matches of P. The + pattern P may contain any number of pattern elements + including the use of alternatiion and concatenation. + + Len(N) Where N is a natural number, matches the given number of + characters. For example, Len(10) matches any string that + is exactly ten characters long. + + NotAny(S) Where S is a string, matches a single character that is + not one of the characters of S. Fails if the current + characer is one of the given set of characters. + + NSpan(S) Where S is a string, matches a string of zero or more + characters that is among the characters given in the + string. Always matches the longest possible such string. + Always succeeds, since it can match the null string. + + Pos(N) Where N is a natural number, matches the null string + if exactly N characters have been matched so far, and + otherwise fails. + + Rpos(N) Where N is a natural number, matches the null string + if exactly N characters remain to be matched, and + otherwise fails. + + Rtab(N) Where N is a natural number, matches characters from + the current position until exactly N characters remain + to be matched in the string. Fails if fewer than N + unmatched characters remain in the string. + + Tab(N) Where N is a natural number, matches characters from + the current position until exactly N characters have + been matched in all. Fails if more than N characters + have already been matched. + + Span(S) Where S is a string, matches a string of one or more + characters that is among the characters given in the + string. Always matches the longest possible such string. + Fails if the current character is not one of the given + set of characters. + + Recursive Pattern Matching + ========================== + + The plus operator (+P) where P is a pattern variable, creates + a recursive pattern that will, at pattern matching time, follow + the pointer to obtain the referenced pattern, and then match this + pattern. This may be used to construct recursive patterns. Consider + for example: + + P := ("A" | ("B" & (+P))) + + On the first attempt, this pattern attempts to match the string "A". + If this fails, then the alternative matches a "B", followed by an + attempt to match P again. This second attempt first attempts to + match "A", and so on. The result is a pattern that will match a + string of B's followed by a single A. + + This particular example could simply be written as NSpan('B') & 'A', + but the use of recursive patterns in the general case can construct + complex patterns which could not otherwise be built. + + Pattern Assignment Operations + ============================= + + In addition to the overall result of a pattern match, which indicates + success or failure, it is often useful to be able to keep track of + the pieces of the subject string that are matched by individual + pattern elements, or subsections of the pattern. + + The pattern assignment operators allow this capability. The first + form is the immediate assignment: + + P * S + + Here P is an arbitrary pattern, and S is a variable of type VString + that will be set to the substring matched by P. This assignment + happens during pattern matching, so if P matches more than once, + then the assignment happens more than once. + + The deferred assignment operation: + + P ** S + + avoids these multiple assignments by deferring the assignment to the + end of the match. If the entire match is successful, and if the + pattern P was part of the successful match, then at the end of the + matching operation the assignment to S of the string matching P is + performed. + + The cursor assignment operation: + + Setcur(N) + + assigns the current cursor position to the natural variable N. The + cursor position is defined as the count of characters that have been + matched so far (including any start point moves). + + Finally the operations * and ** may be used with values of type + Text_IO.File_Access. The effect is to do a Put_Line operation of + the matched substring. These are particularly useful in debugging + pattern matches. + + Deferred Matching + ================= + + The pattern construction functions (such as Len and Any) all permit + the use of pointers to natural or string values, or functions that + return natural or string values. These forms cause the actual value + to be obtained at pattern matching time. This allows interesting + possibilities for constructing dynamic patterns as illustrated in + the examples section. + + In addition the (+S) operator may be used where S is a pointer to + string or function returning string, with a similar deferred effect. + + A special use of deferred matching is the construction of predicate + functions. The element (+P) where P is an access to a function that + returns a Boolean value, causes the function to be called at the + time the element is matched. If the function returns True, then the + null string is matched, if the function returns False, then failure + is signalled and previous alternatives are sought. + + Deferred Replacement + ==================== + + The simple model given for pattern replacement (where the matched + substring is replaced by the string given as the third argument to + Match) works fine in simple cases, but this approach does not work + in the case where the expression used as the replacement string is + dependent on values set by the match. + + For example, suppose we want to find an instance of a parenthesized + character, and replace the parentheses with square brackets. At first + glance it would seem that: + + Match (Subject, '(' & Len (1) * Char & ')', '[' & Char & ']'); + + would do the trick, but that does not work, because the third + argument to Match gets evaluated too early, before the call to + Match, and before the pattern match has had a chance to set Char. + + To solve this problem we provide the deferred replacement capability. + With this approach, which of course is only needed if the pattern + involved has side effects, is to do the match in two stages. The + call to Match sets a pattern result in a variable of the private + type Match_Result, and then a subsequent Replace operation uses + this Match_Result object to perform the required replacement. + + Using this approach, we can now write the above operation properly + in a manner that will work: + + M : Match_Result; + ... + Match (Subject, '(' & Len (1) * Char & ')', M); + Replace (M, '[' & Char & ']'); + + As with other Match cases, there is a function and procedure form + of this match call. A call to Replace after a failed match has no + effect. Note that Subject should not be modified between the calls. + + Examples of Pattern Matching + ============================ + + First a simple example of the use of pattern replacement to remove + a line number from the start of a string. We assume that the line + number has the form of a string of decimal digits followed by a + period, followed by one or more spaces. + + Digs : constant Pattern := Span("0123456789"); + + Lnum : constant Pattern := Pos(0) & Digs & '.' & Span(' '); + + Now to use this pattern we simply do a match with a replacement: + + Match (Line, Lnum, ""); + + which replaces the line number by the null string. Note that it is + also possible to use an Ada.Strings.Maps.Character_Set value as an + argument to Span and similar functions, and in particular all the + useful constants 'in Ada.Strings.Maps.Constants are available. This + means that we could define Digs as: + + Digs : constant Pattern := Span(Decimal_Digit_Set); + + The style we use here, of defining constant patterns and then using + them is typical. It is possible to build up patterns dynamically, + but it is usually more efficient to build them in pieces in advance + using constant declarations. Note in particular that although it is + possible to construct a pattern directly as an argument for the + Match routine, it is much more efficient to preconstruct the pattern + as we did in this example. + + Now let's look at the use of pattern assignment to break a + string into sections. Suppose that the input string has two + unsigned decimal integers, separated by spaces or a comma, + with spaces allowed anywhere. Then we can isolate the two + numbers with the following pattern: + + Num1, Num2 : aliased VString; + + B : constant Pattern := NSpan(' '); + + N : constant Pattern := Span("0123456789"); + + T : constant Pattern := + NSpan(' ') & N * Num1 & Span(" ,") & N * Num2; + + The match operation Match (" 124, 257 ", T) would assign the + string 124 to Num1 and the string 257 to Num2. + + Now let's see how more complex elements can be built from the + set of primitive elements. The following pattern matches strings + that have the syntax of Ada 95 based literals: + + Digs : constant Pattern := Span(Decimal_Digit_Set); + UDigs : constant Pattern := Digs & Arbno('_' & Digs); + + Edig : constant Pattern := Span(Hexadecimal_Digit_Set); + UEdig : constant Pattern := Edig & Arbno('_' & Edig); + + Bnum : constant Pattern := Udigs & '#' & UEdig & '#'; + + A match against Bnum will now match the desired strings, e.g. + it will match 16#123_abc#, but not a#b#. However, this pattern + is not quite complete, since it does not allow colons to replace + the pound signs. The following is more complete: + + Bchar : constant Pattern := Any("#:"); + Bnum : constant Pattern := Udigs & Bchar & UEdig & Bchar; + + but that is still not quite right, since it allows # and : to be + mixed, and they are supposed to be used consistently. We solve + this by using a deferred match. + + Temp : aliased VString; + + Bnum : constant Pattern := + Udigs & Bchar * Temp & UEdig & (+Temp) + + Here the first instance of the base character is stored in Temp, and + then later in the pattern we rematch the value that was assigned. + + For an example of a recursive pattern, let's define a pattern + that is like the built in Bal, but the string matched is balanced + with respect to square brackets or curly brackets. + + The language for such strings might be defined in extended BNF as + + ELEMENT ::= <any character other than [] or {}> + | '[' BALANCED_STRING ']' + | '{' BALANCED_STRING '}' + + BALANCED_STRING ::= ELEMENT {ELEMENT} + + Here we use {} to indicate zero or more occurrences of a term, as + is common practice in extended BNF. Now we can translate the above + BNF into recursive patterns as follows: + + Element, Balanced_String : aliased Pattern; + . + . + . + Element := NotAny ("[]{}") + | + ('[' & (+Balanced_String) & ']') + | + ('{' & (+Balanced_String) & '}'); + + Balanced_String := Element & Arbno (Element); + + Note the important use of + here to refer to a pattern not yet + defined. Note also that we use assignments precisely because we + cannot refer to as yet undeclared variables in initializations. + + Now that this pattern is constructed, we can use it as though it + were a new primitive pattern element, and for example, the match: + + Match ("xy[ab{cd}]", Balanced_String * Current_Output & Fail); + + will generate the output: + + x + xy + xy[ab{cd}] + y + y[ab{cd}] + [ab{cd}] + a + ab + ab{cd} + b + b{cd} + {cd} + c + cd + d + + Note that the function of the fail here is simply to force the + pattern Balanced_String to match all possible alternatives. Studying + the operation of this pattern in detail is highly instructive. + + Finally we give a rather elaborate example of the use of deferred + matching. The following declarations build up a pattern which will + find the longest string of decimal digits in the subject string. + + Max, Cur : VString; + Loc : Natural; + + function GtS return Boolean is + begin + return Length (Cur) > Length (Max); + end GtS; + + Digit : constant Character_Set := Decimal_Digit_Set; + + Digs : constant Pattern := Span(Digit); + + Find : constant Pattern := + "" * Max & Fence & -- initialize Max to null + BreakX (Digit) & -- scan looking for digits + ((Span(Digit) * Cur & -- assign next string to Cur + (+GtS) & -- check size(Cur) > Size(Max) + Setcur(Loc)) -- if so, save location + * Max) & -- and assign to Max + Fail; -- seek all alternatives + + As we see from the comments here, complex patterns like this take + on aspects of sequential programs. In fact they are sequential + programs with general backtracking. In this pattern, we first use + a pattern assignment that matches null and assigns it to Max, so + that it is initialized for the new match. Now BreakX scans to the + next digit. Arb would do here, but BreakX will be more efficient. + Once we have found a digit, we scan out the longest string of + digits with Span, and assign it to Cur. The deferred call to GtS + tests if the string we assigned to Cur is the longest so far. If + not, then failure is signalled, and we seek alternatives (this + means that BreakX will extend and look for the next digit string). + If the call to GtS succeeds then the matched string is assigned + as the largest string so far into Max and its location is saved + in Loc. Finally Fail forces the match to fail and seek alternatives, + so that the entire string is searched. + + If the pattern Find is matched against a string, the variable Max + at the end of the pattern will have the longest string of digits, + and Loc will be the starting character location of the string. For + example, Match("ab123cd4657ef23", Find) will assign "4657" to Max + and 11 to Loc (indicating that the string ends with the eleventh + character of the string). + + Correspondence with Pattern Matching in SPITBOL + =============================================== + + Generally the Ada syntax and names correspond closely to SPITBOL + syntax for pattern matching construction. + + The basic pattern construction operators are renamed as follows: + + Spitbol Ada + + (space) & + | or + $ * + . ** + + The Ada operators were chosen so that the relative precedences of + these operators corresponds to that of the Spitbol operators, but + as always, the use of parentheses is advisable to clarify. + + The pattern construction operators all have similar names. + + The actual pattern matching syntax is modified in Ada as follows: + + Spitbol Ada + + X Y Match (X, Y); + X Y = Z Match (X, Y, Z); + + and pattern failure is indicated by returning a Boolean result from + the Match function (True for success, False for failure). + +----------------------- +Type Declarations +----------------------- + +type Pattern is private; + Type representing a pattern. This package provides a complete set of + operations for constructing patterns that can be used in the pattern + matching operations provided. + +type Boolean_Func is access function return Boolean; + General Boolean function type. When this type is used as a formal + parameter type in this package, it indicates a deferred predicate + pattern. The function will be called when the pattern element is + matched and failure signalled if False is returned. + +type Natural_Func is access function return Natural; + General Natural function type. When this type is used as a formal + parameter type in this package, it indicates a deferred pattern. + The function will be called when the pattern element is matched + to obtain the currently referenced Natural value. + +type VString_Func is access function return VString; + General VString function type. When this type is used as a formal + parameter type in this package, it indicates a deferred pattern. + The function will be called when the pattern element is matched + to obtain the currently referenced string value. + +subtype PString is String; + This subtype is used in the remainder of the package to indicate a + formal parameter that is converted to its corresponding pattern, + i.e. a pattern that matches the characters of the string. + +subtype PChar is Character; + Similarly, this subtype is used in the remainder of the package to + indicate a formal parameter that is converted to its corresponding + pattern, i.e. a pattern that matches this one character. + +subtype VString_Var is VString; +subtype Pattern_Var is Pattern; + These synonyms are used as formal parameter types to a function where, + if the language allowed, we would use in out parameters, but we are + not allowed to have in out parameters for functions. Instead we pass + actuals which must be variables, and with a bit of trickery in the + body, manage to interprete them properly as though they were indeed + in out parameters. + +-------------------------------- +Basic Pattern Construction +-------------------------------- + +function "&" (L : Pattern; R : Pattern) return Pattern; +function "&" (L : PString; R : Pattern) return Pattern; +function "&" (L : Pattern; R : PString) return Pattern; +function "&" (L : PChar; R : Pattern) return Pattern; +function "&" (L : Pattern; R : PChar) return Pattern; + + Pattern concatenation. Matches L followed by R + +function "or" (L : Pattern; R : Pattern) return Pattern; +function "or" (L : PString; R : Pattern) return Pattern; +function "or" (L : Pattern; R : PString) return Pattern; +function "or" (L : PString; R : PString) return Pattern; +function "or" (L : PChar; R : Pattern) return Pattern; +function "or" (L : Pattern; R : PChar) return Pattern; +function "or" (L : PChar; R : PChar) return Pattern; +function "or" (L : PString; R : PChar) return Pattern; +function "or" (L : PChar; R : PString) return Pattern; + Pattern alternation. Creates a pattern that will first try to match + L and then on a subsequent failure, attempts to match R instead. + +---------------------------------- +Pattern Assignment Functions +---------------------------------- + +function "*" (P : Pattern; Var : VString_Var) return Pattern; +function "*" (P : PString; Var : VString_Var) return Pattern; +function "*" (P : PChar; Var : VString_Var) return Pattern; + Matches P, and if the match succeeds, assigns the matched substring + to the given VString variable S. This assignment happens as soon as + the substring is matched, and if the pattern P1 is matched more than + once during the course of the match, then the assignment will occur + more than once. + +function "**" (P : Pattern; Var : VString_Var) return Pattern; +function "**" (P : PString; Var : VString_Var) return Pattern; +function "**" (P : PChar; Var : VString_Var) return Pattern; + Like "*" above, except that the assignment happens at most once + after the entire match is completed successfully. If the match + fails, then no assignment takes place. + +---------------------------------- +Deferred Matching Operations +---------------------------------- + +function "+" (Str : VString_Var) return Pattern; + Here Str must be a VString variable. This function constructs a + pattern which at pattern matching time will access the current + value of this variable, and match against these characters. + +function "+" (Str : VString_Func) return Pattern; + Constructs a pattern which at pattern matching time calls the given + function, and then matches against the string or character value + that is returned by the call. + +function "+" (P : Pattern_Var) return Pattern; + Here P must be a Pattern variable. This function constructs a + pattern which at pattern matching time will access the current + value of this variable, and match against the pattern value. + +function "+" (P : Boolean_Func) return Pattern; + Constructs a predicate pattern function that at pattern matching time + calls the given function. If True is returned, then the pattern matches. + If False is returned, then failure is signalled. + +-------------------------------- +Pattern Building Functions +-------------------------------- + +function Arb return Pattern; + Constructs a pattern that will match any string. On the first attempt, + the pattern matches a null string, then on each successive failure, it + matches one more character, and only fails if matching the entire rest + of the string. + +function Arbno (P : Pattern) return Pattern; +function Arbno (P : PString) return Pattern; +function Arbno (P : PChar) return Pattern; + Pattern repetition. First matches null, then on a subsequent failure + attempts to match an additional instance of the given pattern. + Equivalent to (but more efficient than) P & ("" | (P & ("" | ... + +function Any (Str : String) return Pattern; +function Any (Str : VString) return Pattern; +function Any (Str : Character) return Pattern; +function Any (Str : Character_Set) return Pattern; +function Any (Str : access VString) return Pattern; +function Any (Str : VString_Func) return Pattern; + Constructs a pattern that matches a single character that is one of + the characters in the given argument. The pattern fails if the current + character is not in Str. + +function Bal return Pattern; + Constructs a pattern that will match any non-empty string that is + parentheses balanced with respect to the normal parentheses characters. + Attempts to extend the string if a subsequent failure occurs. + +function Break (Str : String) return Pattern; +function Break (Str : VString) return Pattern; +function Break (Str : Character) return Pattern; +function Break (Str : Character_Set) return Pattern; +function Break (Str : access VString) return Pattern; +function Break (Str : VString_Func) return Pattern; + Constructs a pattern that matches a (possibly null) string which + is immediately followed by a character in the given argument. This + character is not part of the matched string. The pattern fails if + the remaining characters to be matched do not include any of the + characters in Str. + +function BreakX (Str : String) return Pattern; +function BreakX (Str : VString) return Pattern; +function BreakX (Str : Character) return Pattern; +function BreakX (Str : Character_Set) return Pattern; +function BreakX (Str : access VString) return Pattern; +function BreakX (Str : VString_Func) return Pattern; + Like Break, but the pattern attempts to extend on a failure to find + the next occurrence of a character in Str, and only fails when the + last such instance causes a failure. + +function Cancel return Pattern; + Constructs a pattern that immediately aborts the entire match + +function Fail return Pattern; + Constructs a pattern that always fails + +function Fence return Pattern; + Constructs a pattern that matches null on the first attempt, and then + causes the entire match to be aborted if a subsequent failure occurs. + +function Fence (P : Pattern) return Pattern; + Constructs a pattern that first matches P. if P fails, then the + constructed pattern fails. If P succeeds, then the match proceeds, + but if subsequent failure occurs, alternatives in P are not sought. + The idea of Fence is that each time the pattern is matched, just + one attempt is made to match P, without trying alternatives. + +function Len (Count : Natural) return Pattern; +function Len (Count : access Natural) return Pattern; +function Len (Count : Natural_Func) return Pattern; + Constructs a pattern that matches exactly the given number of + characters. The pattern fails if fewer than this number of characters + remain to be matched in the string. + +function NotAny (Str : String) return Pattern; +function NotAny (Str : VString) return Pattern; +function NotAny (Str : Character) return Pattern; +function NotAny (Str : Character_Set) return Pattern; +function NotAny (Str : access VString) return Pattern; +function NotAny (Str : VString_Func) return Pattern; + Constructs a pattern that matches a single character that is not + one of the characters in the given argument. The pattern Fails if + the current character is in Str. + +function NSpan (Str : String) return Pattern; +function NSpan (Str : VString) return Pattern; +function NSpan (Str : Character) return Pattern; +function NSpan (Str : Character_Set) return Pattern; +function NSpan (Str : access VString) return Pattern; +function NSpan (Str : VString_Func) return Pattern; + Constructs a pattern that matches the longest possible string + consisting entirely of characters from the given argument. The + string may be empty, so this pattern always succeeds. + +function Pos (Count : Natural) return Pattern; +function Pos (Count : access Natural) return Pattern; +function Pos (Count : Natural_Func) return Pattern; + Constructs a pattern that matches the null string if exactly Count + characters have already been matched, and otherwise fails. + +function Rem return Pattern; + Constructs a pattern that always succeeds, matching the remaining + unmatched characters in the pattern. + +function Rpos (Count : Natural) return Pattern; +function Rpos (Count : access Natural) return Pattern; +function Rpos (Count : Natural_Func) return Pattern; + Constructs a pattern that matches the null string if exactly Count + characters remain to be matched in the string, and otherwise fails. + +function Rtab (Count : Natural) return Pattern; +function Rtab (Count : access Natural) return Pattern; +function Rtab (Count : Natural_Func) return Pattern; + Constructs a pattern that matches from the current location until + exactly Count characters remain to be matched in the string. The + pattern fails if fewer than Count characters remain to be matched. + +function Setcur (Var : access Natural) return Pattern; + Constructs a pattern that matches the null string, and assigns the + current cursor position in the string. This value is the number of + characters matched so far. So it is zero at the start of the match. + +function Span (Str : String) return Pattern; +function Span (Str : VString) return Pattern; +function Span (Str : Character) return Pattern; +function Span (Str : Character_Set) return Pattern; +function Span (Str : access VString) return Pattern; +function Span (Str : VString_Func) return Pattern; + Constructs a pattern that matches the longest possible string + consisting entirely of characters from the given argument. The + string cannot be empty , so the pattern fails if the current + character is not one of the characters in Str. + +function Succeed return Pattern; + Constructs a pattern that succeeds matching null, both on the first + attempt, and on any rematch attempt, i.e. it is equivalent to an + infinite alternation of null strings. + +function Tab (Count : Natural) return Pattern; +function Tab (Count : access Natural) return Pattern; +function Tab (Count : Natural_Func) return Pattern; + Constructs a pattern that from the current location until Count + characters have been matched. The pattern fails if more than Count + characters have already been matched. + +--------------------------------- +Pattern Matching Operations +--------------------------------- + + The Match function performs an actual pattern matching operation. + The versions with three parameters perform a match without modifying + the subject string and return a Boolean result indicating if the + match is successful or not. The Anchor parameter is set to True to + obtain an anchored match in which the pattern is required to match + the first character of the string. In an unanchored match, which is + + the default, successive attempts are made to match the given pattern + at each character of the subject string until a match succeeds, or + until all possibilities have failed. + + Note that pattern assignment functions in the pattern may generate + side effects, so these functions are not necessarily pure. + +Anchored_Mode : Boolean := False; + This global variable can be set True to cause all subsequent pattern + matches to operate in anchored mode. In anchored mode, no attempt is + made to move the anchor point, so that if the match succeeds it must + succeed starting at the first character. Note that the effect of + anchored mode may be achieved in individual pattern matches by using + Fence or Pos(0) at the start of the pattern. + +Pattern_Stack_Overflow : exception; + Exception raised if internal pattern matching stack overflows. This + is typically the result of runaway pattern recursion. If there is a + genuine case of stack overflow, then either the match must be broken + down into simpler steps, or the stack limit must be reset. + +Stack_Size : constant Positive := 2000; + Size used for internal pattern matching stack. Increase this size if + complex patterns cause Pattern_Stack_Overflow to be raised. + + Simple match functions. The subject is matched against the pattern. + Any immediate or deferred assignments or writes are executed, and + the returned value indicates whether or not the match succeeded. + +function Match + (Subject : VString; + Pat : Pattern) return Boolean; + +function Match + (Subject : VString; + Pat : PString) return Boolean; + +function Match + (Subject : String; + Pat : Pattern) return Boolean; + +function Match + (Subject : String; + Pat : PString) return Boolean; + + Replacement functions. The subject is matched against the pattern. + Any immediate or deferred assignments or writes are executed, and + the returned value indicates whether or not the match succeeded. + If the match succeeds, then the matched part of the subject string + is replaced by the given Replace string. + +function Match + (Subject : VString_Var; + Pat : Pattern; + Replace : VString) return Boolean; + +function Match + (Subject : VString_Var; + Pat : PString; + Replace : VString) return Boolean; + +function Match + (Subject : VString_Var; + Pat : Pattern; + Replace : String) return Boolean; + +function Match + (Subject : VString_Var; + Pat : PString; + Replace : String) return Boolean; + +Deferred Replacement + +type Match_Result is private; + Type used to record result of pattern match + +subtype Match_Result_Var is Match_Result; + This synonyms is used as a formal parameter type to a function where, + if the language allowed, we would use an in out parameter, but we are + not allowed to have in out parameters for functions. Instead we pass + actuals which must be variables, and with a bit of trickery in the + body, manage to interprete them properly as though they were indeed + in out parameters. + +function Match + (Subject : VString_Var; + Pat : Pattern; + Result : Match_Result_Var) return Boolean; + +procedure Match + (Subject : in out VString; + Pat : Pattern; + Result : out Match_Result); + +procedure Replace + (Result : in out Match_Result; + Replace : VString); + Given a previous call to Match which set Result, performs a pattern + replacement if the match was successful. Has no effect if the match + failed. This call should immediately follow the Match call. + +------------------------ +Debugging Routines +------------------------ + + Debugging pattern matching operations can often be quite complex, + since there is no obvious way to trace the progress of the match. + The declarations in this section provide some debugging assistance. + +Debug_Mode : Boolean := False; + This global variable can be set True to generate debugging on all + subsequent calls to Match. The debugging output is a full trace of + the actions of the pattern matcher, written to Standard_Output. The + level of this information is intended to be comprehensible at the + abstract level of this package declaration. However, note that the + use of this switch often generates large amounts of output. + +function "*" (P : Pattern; Fil : File_Access) return Pattern; +function "*" (P : PString; Fil : File_Access) return Pattern; +function "*" (P : PChar; Fil : File_Access) return Pattern; +function "**" (P : Pattern; Fil : File_Access) return Pattern; +function "**" (P : PString; Fil : File_Access) return Pattern; +function "**" (P : PChar; Fil : File_Access) return Pattern; + These are similar to the corresponding pattern assignment operations + except that instead of setting the value of a variable, the matched + substring is written to the appropriate file. This can be useful in + following the progress of a match without generating the full amount + of information obtained by setting Debug_Mode to True. + +Terminal : constant File_Access := Standard_Error; +Output : constant File_Access := Standard_Output; + Two handy synonyms for use with the above pattern write operations + + Finally we have some routines that are useful for determining what + patterns are in use, particularly if they are constructed dynamically. + +function Image (P : Pattern) return String; +function Image (P : Pattern) return VString; + This procedures yield strings that corresponds to the syntax needed + to create the given pattern using the functions in this package. The + form of this string is such that it could actually be compiled and + evaluated to yield the required pattern except for references to + variables and functions, which are output using one of the following + forms: +-- + access Natural NP(16#...#) + access Pattern PP(16#...#) + access VString VP(16#...#) +-- + Natural_Func NF(16#...#) + VString_Func VF(16#...#) +-- + where 16#...# is the hex representation of the integer address that + corresponds to the given access value + +procedure Dump (P : Pattern); + This procedure writes information about the pattern to Standard_Out. + The format of this information is keyed to the internal data structures + used to implement patterns. The information provided by Dump is thus + more precise than that yielded by Image, but is also a bit more obscure + (i.e. it cannot be interpreted solely in terms of this spec, you have + to know something about the data structures). + +procedure Finalize (Object : in out Pattern); + Finalization routine used to release storage allocated for a pattern + + + + diff --git a/doc/reference.xml b/doc/reference.xml new file mode 100644 index 0000000..0e4d5be --- /dev/null +++ b/doc/reference.xml @@ -0,0 +1,2005 @@ +<?xml version="1.0"?> + +<!-- + LSPIPAT - LUA SPIPAT WRAPPER + Copyright (C) 2010, Robin Haberkorn + License: LGPL + + DOCUMENTATION AND MODULE REFERENCE +--> + +<book xmlns="http://docbook.org/ns/docbook" + xmlns:xlink="http://www.w3.org/1999/xlink"> + <info> + <title>SNOBOL/SPITBOL Patterns for Lua + libspipat Lua wrapper + lspipat + + + Robin Haberkorn + robin.haberkorn at googlemail.com + + + 2010Robin Haberkorn + + + + + + + + The following document is the lspipat + Lua 5.1 module documentation and reference. + + + + + Thanks To... + + + lspipat would not be possible without: + + + + Phil Budne, for spipat. + lspipat is merely a spipat wrapper. + + Robert Dewar who has created Macro SPITBOL and + the GNAT.Spitbol package. + spipat was derived from GNAT.Spitbol, which is based on Macro SPITBOL. + + + + + + Introduction + + + lspipat is a wrapper to spipat + that brings support for a first-class SNOBOL/SPITBOL-like pattern data type. + Patterns can be constructed and subsequently combined with other patterns, + strings, numbers and functions using binary and unary operators allowing + the construction of grammars describing any Context Free Language. + Patterns can be matched against any Lua string. + A major difference to other pattern matching techniques like regular expressions, besides + the supported language class, is the possibility to construct patterns/grammars in a + readable and intuitive way, somewhat reminiscent of the BNF. + + They can include pattern elements that have side-effects (i.e. Lua code executed during + pattern matching) or produce and influence pattern elements dynamically. + For instance, functions can be specified that are executed during matching to produce + the parameters necessary for the interpretation of a pattern element. + Code can be embedded that generates entire patterns on the fly. + Matching previously matched substrings and implementing recursive patterns + is only one application of the powerful dynamic pattern elements traditionally + offered by SNOBOL pattern matching and thus by lspipat. + + SNOBOL/SPITBOL pattern matching was traditionally used in compiler construction + and prototyping, artificial intelligence research and the humanities. + + + + + Resources + + + These internet resources are more or less directly related to lspipat and + might be useful to you: + + + + http://luaforge.net/projects/lspipat/: + lspipat project page at LuaForge, downloads, bug tracker, etc. + + http://www.snobol4.org/spipat/: + libspipat downloads + + http://pypi.python.org/pypi/spipat/: + libspipat's Python wrapper (included in libspipat + packages). + + http://www.infeig.unige.ch/support/ada/gnatlb/g-spipat.html: + GNAT.Spitbol description. Also installed as pattern.txt by lspipat. + + ftp://ftp.cs.arizona.edu/snobol/gb.pdf: + The SNOBOL4 Programming Language (The famous Green Book) + + ftp://ftp.snobol4.com/spitman.pdf: + Macro SPITBOL Reference Manual + + other interesting resources compiled by Phil Budne... + + + + + + + Comparison with SNOBOL + + + Just as patterns in SNOBOL are combined and constructed dynamically with + binary and unary operators, lspipat also uses operators available in + Lua to construct patterns in a simple and intuitive way. + The operators and pattern-construction functions were chosen, so the pattern construction syntax + is as similar as possible to SNOBOL/SPITBOL. + The following table shows a comparision of operators between + SPITBOL and lspipat: + + + Comparision of SPITBOL and lspipat operators + + + + + + + + Operation + SPITBOL + lspipat + Notes + + + + Alternation + | + + + + Refer to . + Cannot be used to combine two strings. + + + Concatenation + (space) + * + + Immediate Assignment/Call + $ + % + + % and / have the + same precedence + as * in Lua. + Also only call versions are supported (see ). + + + Deferred Assignment/Call + . + / + + Cursor Assignment + @ (unary) + # (unary) + + Refer to . + lspipat only supports a call version + (see ). + + + + + Setcur + + Defer Expression + * (unary) + - (unary) or Pred + + Refer to . + In general, expressions can be wrapped in (anonymous) functions to defer them. + + + Interrogation/Predicate + ? (unary) + + + Pattern Match + ? + smatch + + Refer to . + S ? P is roughly equivalent to S:smatch(P) in Lua. + + + + (space) + + + Substring Replacement + = + ssub + + Refer to . + S P = R is roughly equivalent to S:ssub(P, R, 1) in Lua. + + + + +
+
+ + + Installation + + + lspipat uses an autotools buildsystem. The standard + INSTALL file contains instructions on how to use it from + a package builder's perspective. + Nevertheless, there are some quirks that should be mentioned. + + +
+ Dependencies + + + + spipat 0.9.3+: + You are advised to apply the patch spipat-patches/0.9.3+_image.patch first + before building spipat, even though it is not mandatory. + It fixes a header file (so lspipat can make use of customized + render-to-string functionality) and various bugs. + + + Lua 5.1: + You probably have this already. The configure script + should be able to cope with Ubuntu and + Lua Binaries + distributions. The standalone Lua compiler is only required if + compilation of Lua scripts is enabled. + + +
+ +
+ Configuration Options + + + The following special configure script options + are supported: + + + + --enable-lua-libdir=DIR + + Change the installation directory of lspipat. + It defaults to LIBDIR/lua/5.1. You probably want this to + point to some directory in Lua's + + module search path, so the default should be ok. + + + + --disable-lua-precompile + + Disable precompilation of Lua source files. + Naturally, a Lua compiler will not be required when this option + is used. + + + + --disable-lua-strip + + Do not strip (i.e. remove debugging symbols from) compiled + Lua sources. + + + + --disable-html-doc + + Do not generate HTML documentation. The documentation is usually + derived from Docbook using + XSLTProc. + Disabling this may be useful if you have got some problem + with the tool chain but are satisfied with the precompiled + documentation in the distribution. + + + + + Furthermore, you should note that render-to-string results are not + reminiscent of lspipat syntax (used in this document) by default. + For lspipat to be able to customize these renderings, + configure has to find some spipat headers which + are not normally installed. + Therefore it is highly recommended to add spipat's source directory to the C include search path + using the CPPFLAGS variable before running configure. + +
+ + + Thus, supposing that spipat sources are located in your home directory, + the most common way to install lspipat would be: + + + + +
+ + + Usage + + + After lspipat has been installed properly, you will + be able use it in your Lua program by simply requiring lspipat + (i.e. require "lspipat"). + + The module table will be called spipat, but many functions + (especially pattern constructors) will be registered as globals as well. + Also, some operators will be overloaded. + For details on all that (operators, globals, etc.) refer to + . + + + + + Examples + + + The samples directory in the lspipat source package + contains some small examples that I hope give you some inspiration on how and where to use + lspipat. + + + + samples/exp2bf.lua + + exp2bf.lua expression + + Compiles simple arithmetic expressions to Brainfuck programs that when + executed evaluate the expression and print the result + (8-bit unsigned integer arithmetics). + Prints these programs to stdout. + + Use that for whatever you can imagine ;-) + + + + samples/wave.lua + + wave.lua wavefile + + Validates/parses WAV files + and prints some information about it. + + This is an example of how to use lspipat + to do pattern matching on "binary" data (formats, protocols). Some + primitives were implemented in Lua for that reason - in the future + there might be a separate C-module to do the encoding/decoding of + integers in different byte-orders more efficiently. + + + + samples/regexp.lua + + Small regular expression example/test - uses a comprehensive regular + expression describing IPs. + + + + + + + Variable Deferring Techniques + + + In SNOBOL, arbitrary expressions could be deferred + (i.e. their evaluation could be deferred) by using the unary asterisk operator. + With lspipat however, you will have to pass functions + (which can be constructed anonymously) to the appropriate constructors to achieve + the same goal. + + Deferring expressions which should be combined with other patterns is one + application of the Pred constructor + and - operator respectively. + + Deferring variables is just a special case of deferring expressions. + In this chapter, different ways of optimizing variable deferrings will be + explained using a simple example. + + For instance if you would like to assign a + matched quotation character to a local variable and use that to subsequently match + a simple quote/string, you could use function closures to write something like that: + + + Function Closures for Deferring Purposes + + local cquote +string = Any("\"'") / function(c) cquote = c end + * Break(function() return cquote end) + * -function() return cquote end + + + You may find this solution a bit verbose, compared with + SNOBOL's elegant syntax. + To save some typing you could define your own constructors + that take the name of a global variable (as a string) + and construct patterns whose arguments are retrieved by + a function closure accessing the globals table. + + + Custom Constructors for Deferring Purposes + + function _Break(name) + return Break(function() return _G[name] end) +end +function _Pred(name) + return -function() return _G[name] end +end + +string = Any("\"'") / function(c) cquote = c end + * _Break "cquote" + * _Pred "cquote" + + + Of course, if you do not want to pollute the global namespace + your custom functions could just as well access a local table. + Furthermore, you could optimize the code by defining one generic + table access function which is suitable to be used for + lspipat's pattern constructors - + being able to pass so called cookies + to functions comes in handy. + + + Generic Retrievers for Deferring Purposes + + function getGlobal(name) return _G[name] end +function _Break(name) return Break(getGlobal, name) end +function _Pred(name) return Pred(getGlobal, name) end +-- ... + + + Fortunately, lspipat already defines + such constructors (deferring global variables) for you. + Whereever possible, there will be versions of constructors + with leading underscores that work similar to the ones in + the example above. + You can of course overwrite these constructors, e.g. with + versions accessing a special local table. + + +
+ Recursive Patterns + + + Recursive patterns can be implemented just as described above. + Supposing you want to match the repetition of the predefined pattern + P (greedy) you could write + something like that: + + + Recursive Patterns + + + + + Sometimes however when using global variables is inappropriate, + you might want to do the following trick: + + + Recursive Pattern Trick + + + + + It works because foo is still a function in the scope + of the assignment's right side, but a pattern afterwards so the + function - to which no (direct) reference exists anymore - will return + the pattern foo after the assignment. + +
+
+ + + Module Reference + + + A compilation of all functions in the lspipat + module, global functions registered by the module, methods + and overloaded operators follows. + + + + smatch + + + smatch + Perform pattern match on a subject string + + + + + spipat.smatch + ( subject + , pattern + , flags ) + + subject:smatch + ( pattern + , flags ) + + + + + Description + + + Tries to match pattern against subject + using the given flags. + + + Parameters + + + subject (string): A string against which the pattern match will be performed + pattern (userdata): The pattern used for matching + + flags (number or nil): + Optional spipat flags. + + + + + Spipat Flags + + + Flags are added (e.g. spipat.match_anchored + spipat.match_debug), + due to the lack of a logical/binary or operator in Lua. + + + + spipat.match_anchored: Match in anchored mode + + spipat.match_debug: + Match with progress being printed to stdout. + Useful for pattern debugging as the name suggests. + + + + + Return Values + + + In case of an exception during matching, raises an error. + In case no substring matches, returns a single nil value. + Otherwise returns + + + number: Start of matched substring + number: End of matched substring + + + + + + ssub + + + ssub + Substitute substrings matching a pattern in a subject + + + + + spipat.ssub + ( subject + , pattern + , replacement + , n, flags ) + + subject:ssub + ( pattern + , replacement + , n, flags ) + + + + + Description + + + Substitutes regions in subject matching pattern either with a string + if replacement is a string or if replacement is a function, the result + of calling that function. This may be useful for deferring the evaluation of replacement strings + which depend on (are built from) results of the matching process (e.g. call-on-match or call-immediately function executions). + + + Parameters + + + subject (string): The subject for the first pattern match + pattern (userdata): The pattern used for matching + + replacement (string or function): + Replacement string or a function that's executed after matching to produce the replacement string + + n (number or nil): + Optional maximal number of match/replacement operations. The first match + is performed on subject, subsequent matches on the result of the preceding + replacements. Naturally replacement stops when the pattern does not match anymore. + If n is absent or nil, replacement only stops when pattern + does not match anymore. + + flags (number or nil): + Optional spipat flags, as in . + + + + Return Values + + + In case of an exception during matching, raises an error. + Otherwise returns + + + + string: The result of the last replacement performed or the original + subject if no substring matched at all + + number: The number of match/replacement operations actually performed + + + + Example + + + Replacements with spipat.ssub + + > print(spipat.ssub("abc ccC bab", Span("abc") / function(s) str = s end, function() return "["..str:upper().."]" end, 2)) +[ABC] [CC]C BaB +> + + + + + + siter + + + siter + Return iterator of substrings matching a pattern in a subject + + + + + spipat.siter + ( subject + , pattern + , flags ) + + subject:siter + ( pattern + , flags ) + + + + + Description + + + Returns an iterator function performing a pattern match on subject + and returning the matched substring (start/end positions in subject). + Each time it is called, it begins matching where the last substring ended, but using the same + subject. + + + Parameters + + + subject (string): The subject used for pattern matching + + pattern (userdata): The pattern used for matching. + Naturally, anchoring the pattern using any of the possible methods is nonsense. + + flags (number or nil): + Optional spipat flags, as in . + + + + Return Values + + + In case of an exception during matching, raises an error. + Otherwise returns + + + function: The iterator function. Calling it returns + + number: Start of matched substring + number: End of matched substring + + + + Example + + + Iterating through substrings with spipat.siter + + > str = "abc" +> for s, e in str:siter(Len(1)) do print(str:sub(s, e)) end +a +b +c +> + + + + + + free + + + free + Finalize pattern + + + + + spipat.free( pattern ) + + pattern:free() + + + + + Description + + + Finalizes pattern, i.e. frees memory associated with it and unreferences any + other Lua values (other patterns, functions, etc.) so they can get garbage collected. + + Finalizing an already finalized pattern does nothing. + Using a finalized pattern in any function or operator working with a pattern + will raise an error. + + + free does early what would otherwise be done when the pattern is garbage + collected, so in most cases you will not need it at all. + It may be useful when you would like to free a large pattern you do not need anymore but + removing all references to that pattern and enforcing a full garbage collection cycle + is not feasible. + + + Parameters + + + pattern (userdata): The pattern to be finalized + + + Return Values + + + Returns nothing. + + + Example + + + Finalizing a pattern + + > p = Arb() +> p:free() +> print(p * "foo") +stdin:1: Pattern already freed +> + + + + + + Conversion + + + topattern + Convert a value to a pattern + + + tostring + Render a pattern as a string + + + + + spipat.topattern( value ) + + topattern( value ) + + value:topattern() + + + + tostring( pattern ) + + + + + Description + + + topattern creates a pattern for a string or number, matching that string or number. + If value is already a pattern it returns that pattern without modification. + In case of an unsupported value type or miscelleaneous error, topattern always + returns nil. + + + topattern is useful to explicitly create pattern, e.g. when an operator requires + at least one operand to be a pattern but both are strings, numbers or functions. + + + Lua's built-in tostring + function called on a pattern renders that pattern as a string reminiscent of + lspipat's pattern construction syntax. + + + + + Example + + + Explicit pattern construction & implicit conversion to strings + + print("2" + 3) +5 +> print(topattern("2") + 3) +("2" + "3") +>]]> + + + + + + dump + + + dump + Dump a pattern to stdout + + + + + spipat.dump( pattern ) + + + + + Description + + + dump prints information about a pattern to + stdout. + The kind of information displayed is similar to + tostring's rendering. + + It is useful for debugging purposes. + + + Parameters + + + pattern (userdata): The pattern to be dumped + + + Return Values + + + Returns nothing. + + + + + + Concatenation and Alternation + + + * + Concatenate patterns + + + + + Alternate patterns + + + + + pattern* + value + + value* + pattern + + pattern* + pattern + + + + pattern+ + value + + value+ + pattern + + pattern+ + pattern + + + + + Description + + + The * operator constructs a concatenation of two values + if at least one of them is a pattern and returns the result as a pattern. + A concatenation matches the left operand immediately followed by the right operand. + + The + operator constructs an alternation between two values + if at least one of them is a pattern and returns the result as a pattern. + An alternation matches the left operand and if unsuccessful the right operand. + + The non-pattern values may be strings or numbers, which are matched + just like a pattern built by + topattern. + + + Even though the patterns participating in the composition will be copied, + references will be kept, so they will not be garbage collected until all patterns + using them are garbage collected. + + + Return Values + + + pattern (userdata): Result of the pattern composition + + + Example + + + Concatenations and Alternations + + > pat = (topattern("ABC") + "AB") * (topattern("DEF") + "CDE") * (topattern("GH") + "IJ") +> assert(spipat.smatch("ABCCDEGH", pat)) +> assert(spipat.smatch("ABCDEFIJ", pat)) +> + + + + + + Assignment Calls + + + % + Call Immediately + + / + Deferred Call + + + + + pattern% + function + + + + pattern/ + function + + + + + Description + + + The % operator constructs a pattern matching operand pattern and + calling a Lua function whenever pattern matches during a pattern + match (i.e. function may be called more than once while matching regardless of whether + the match fails or succeeds). + + On the other hand, the / operator constructs a pattern matching operand + pattern and calling a Lua function at most once - only if + the match succeeds. + + In both cases, function receives the following arguments when called: + + string: The substring matched by pattern + + Its return value is ignored. + + + Unlike assignment operators in SNOBOL, the % and / + operators in Lua have the same precedence + as the concatenation operator *, + so using parentheses is advised. + + + Deferred assignments (assign on match & assign immediately) are not directly possible but can be + easily implemented using function closures as described in . + + + + Even though the pattern operands will be copied, references will be kept, + so they will not be garbage collected until all patterns + using them are garbage collected. + + Furthermore, references to functions will be kept so they will not be + garbage collected until the patterns constructed by the operators are garbage collected. + + + + Return Values + + + pattern (userdata): Pattern built by the operators + + + Example + + See . + + + + + Cursor Assignment Calls + + + Setcur + Cursor Assignment + + + + + spipat.Setcur + ( function, cookie ) + + Setcur + ( function, cookie ) + + #function + + + + spipat._Setcur( string ) + + _Setcur( string ) + + + + + Description + + + Setcur is a pattern constructor returning a pattern matching the null string "" + (i.e. always succeeds when matched) and immediately calling a Lua function when matched. + This function receives the following arguments when called: + + + number: The cursor in the subject string. + In other words, the number of characters matched so far from the beginning of the subject string. + + cookie: Any Lua value specified as a cookie in the pattern constructor or + nil if no cookie was specified. + + + Its return value is ignored. + + + The unary # operator is equivalent to the Setcur constructor with no + cookie specified. + + + _Setcur is similar to Setcur but actually assigns the cursor position to + the global variable whose name is specified by a string value. + This means that _Setcur(str) does not assign the cursor position to the global variable str + but rather to the variable with the name str contains, e.g. foo if str == "foo". + So generally _Setcur is equivalent to: + + + + In a similar manner, other kinds of deferred assignments can be implemented + using function closures as described in . + + + References to function and cookie will be kept so they will not be + garbage collected until the pattern constructed by Setcur is garbage collected. + + + Return Values + + + pattern (userdata): Pattern built by the constructor + + + + + + + Predicates + + + Pred + Predicate Constructor + + + + + spipat.Pred + ( function, cookie ) + + Pred + ( function, cookie ) + + -function + + + + spipat._Pred( string ) + + _Pred( string ) + + -string + + + + + Description + + + Pred constructs a pattern which allows you to transparently define its matching behaviour + using a function called when this pattern is attempted to be matched. + It receives the following arguments when invoked: + + cookie: Any Lua value specified as a cookie in the pattern constructor or + nil if no cookie was specified. + + + The function's return value defines the behaviour dynamically, as shown in the following table: + + + Dynamic Function Return Values + + + + + + + + + Value + Type + Behaviour + + + + nil + nil + + Match the "" string, i.e. succeed. + + + true + boolean + + false + + + Pattern match fails, like when using the + Fail primitive. + + + any number + + Try to match that number as a string, as if + converted to a pattern. + + + any string + + Try to match that string, as if + converted to a pattern. + + + any pattern + + Try to match that pattern. Returning a pattern assigned to a variable is the way + to implement recursive patterns. + + + + +
+
+ + The unary - operator applied to a function is equivalent + to the Pred constructor with no cookie specified. + + + _Pred is similar to Pred but actually gets the Lua value defining its behaviour from + the global variable whose name is specified by a string value. + This means that _Pred(str) does not get the value from the global variable str + but rather from the variable with the name str contains, e.g. foo if str == "foo". + So generally _Pred is equivalent to: + + + + In a similar manner, other kinds of variable deferring as well as recursive patterns can be implemented + using function closures as described in . + + + The unary - operator applied to a string which is not convertable to + a number is equivalent to the _Pred constructor - naturally this + should be true for all global variable names. + This constraint comes from the way Lua handles operations by default (it checks whether it is an arithmetic operation + before evaluating any metamethod - see metatables). + + + References to function and cookie will be kept so they will not be + garbage collected until the pattern constructed by Pred is garbage collected. + +
+ Return Values + + + pattern (userdata): Pattern built by the constructor + + + +
+ + + String Primitives + + + Any + Match any character in a set + + + NotAny + Match any character not in a set + + + Break + Match characters up to a break character + + + BreakX + Match characters up to a break character (extending) + + + NSpan + Match nothing or characters from a set + + + Span + Match characters from a set + + + + + spipat.Any( set ) + + spipat.Any + ( function, cookie ) + + spipat._Any( string ) + + + + spipat.NotAny( set ) + + spipat.NotAny + ( function, cookie ) + + spipat._NotAny( string ) + + + + spipat.Break( set ) + + spipat.Break + ( function, cookie ) + + spipat._Break( string ) + + + + spipat.BreakX( set ) + + spipat.BreakX + ( function, cookie ) + + spipat._BreakX( string ) + + + + spipat.NSpan( set ) + + spipat.NSpan + ( function, cookie ) + + spipat._NSpan( string ) + + + + spipat.Span( set ) + + spipat.Span + ( function, cookie ) + + spipat._Span( string ) + + + + + Description + + + String primitives are pattern constructors that in their first form all take a string or + number (which is converted to a string) as their sole argument + (set). + + In their second form they take a Lua function and an optional cookie + as arguments. When the constructed pattern is about to be matched, the function is called + and is supposed to return a string or number (which is converted to + a string) to supply the primitive's argument dynamically. + It receives the following arguments when invoked: + + cookie: Any Lua value specified as a cookie in the pattern constructor or + nil if no cookie was specified. + + + The primitives with a leading underscore (e.g. _Any) are similar but actually get their argument + from a global variable with the name a string argument contains. + This means that for instance _Any(str) does not get its character set from the global variable str + but rather from the variable with the name str contains, e.g. foo if str == "foo". + So generally _Any is equivalent to: + + + + In a similar manner, other kinds of variable deferring can be implemented + using function closures as described in . + + + References to function and cookie will be kept so they will not be + garbage collected until the pattern constructed is garbage collected. + + + The following table describes what these primitives do: + + + + String Primitives + + + + + + + Primitive + Description + + + + Any( S ) + + Where S is a string, matches a single character that is + any one of the characters in S. Fails if the current + character is not one of the given set of characters. + + + NotAny( S ) + + Where S is a string, matches a single character that is + not one of the characters of S. Fails if the current + characer is one of the given set of characters. + + + Break( S ) + + Where S is a string, matches a string of zero or more + characters up to but not including a break character + that is one of the characters given in the string S. + Can match the null string, but cannot match the last + character in the string, since a break character is + required to be present. + + + BreakX( S ) + + Where S is a string, behaves exactly like Break(S) when + it first matches, but if a string is successfully matched, + then a susequent failure causes an attempt to extend the + matched string. + + + NSpan( S ) + + Where S is a string, matches a string of zero or more + characters that is among the characters given in the + string. Always matches the longest possible such string. + Always succeeds, since it can match the null string. + + + Span( S ) + + Where S is a string, matches a string of one or more + characters that is among the characters given in the + string. Always matches the longest possible such string. + Fails if the current character is not one of the given + set of characters. + + + + +
+
+
+ Return Values + + + pattern (userdata): Pattern built by the constructor + + + +
+ + + Arbno + + + Arbno + Matches a pattern any number of times + + + + + spipat.Arbno( P ) + + Arbno( P ) + + + + + Description + + + Where P is any pattern, matches any number of instances + of the pattern, starting with zero occurrences. It is + thus equivalent to ("" + (P * ("" + (P * ("" ....)))). + The pattern P may contain any number of pattern elements + including the use of alternation and concatenation. + + Arbno is a pattern constructor taking exactly one argument which is + either a pattern or string (which is treated + like it is converted to a pattern first). + + + A reference to P will be kept if it is a pattern + so it will not be garbage collected until the pattern constructed is garbage collected. + + + Return Values + + + pattern (userdata): Pattern built by Arbno + + + + + + + Fence + + + Fence + Abort match when alternations are sought + + + + + spipat.Fence( P ) + + Fence( P ) + + + + + Description + + + Fence is a pattern constructor taking no or exactly one + pattern as an argument. + + + A reference to pattern P will be kept so it will not + be garbage collected until the pattern constructed is garbage collected. + + + The following table describes what the two versions do: + + + + Fence Primitive + + + + + + + Primitive + Description + + + + Fence() + + Matches the null string at first, and then if a failure + causes alternatives to be sought, aborts the match (like + a Cancel). Note that using Fence at the + start of a pattern has the same effect as matching in anchored mode. + + + Fence( P ) + + Where P is a pattern, attempts to match the pattern P + including trying all possible alternatives of P. If none + of these alternatives succeeds, then the Fence pattern + fails. If one alternative succeeds, then the pattern + match proceeds, but on a subsequent failure, no attempt + is made to search for alternative matches of P. The + pattern P may contain any number of pattern elements + including the use of alternatiion and concatenation. + + + + +
+
+
+ Return Values + + + pattern (userdata): Pattern built by Fence + + + +
+ + + Integer Primitives + + + Len + Match a number of characters + + + Pos + Match null string if number of characters have been matched + + + RPos + Match null string if number of characters remain to be matched + + + Tab + Match characters until number of characters have been matched + + + RTab + Match characters until number of characters remain to be matched + + + + + spipat.Len( n ) + + spipat.Len + ( function, cookie ) + + spipat._Len( string ) + + + + spipat.Pos( n ) + + spipat.Pos + ( function, cookie ) + + spipat._Pos( string ) + + + + spipat.RPos( n ) + + spipat.RPos + ( function, cookie ) + + spipat._RPos( string ) + + + + spipat.Tab( n ) + + spipat.Tab + ( function, cookie ) + + spipat._Tab( string ) + + + + spipat.RTab( n ) + + spipat.RTab + ( function, cookie ) + + spipat._RTab( string ) + + + + + Description + + + Integer primitives are pattern constructors that in their first form all take a number or + string (which is converted to a number) as their sole argument + (n). + This number has to be an unsigned integer - sometimes a natural number depending on the + primitive. + + + If the argument is ommitted, zero is assumed. + + + In their second form the primitives take a Lua function and an optional cookie + as arguments. When the constructed pattern is about to be matched, the function is called + and is supposed to return a number or string (which is converted to + a number) to supply the primitive's argument dynamically. + It receives the following arguments when invoked: + + cookie: Any Lua value specified as a cookie in the pattern constructor or + nil if no cookie was specified. + + + The primitives with a leading underscore (e.g. _Len) are similar but actually get their argument + from a global variable with the name a string argument contains. + This means that for instance _Len(str) does not get its argument from the global variable str + but rather from the variable with the name str contains, e.g. foo if str == "foo". + So generally _Len is equivalent to: + + + + In a similar manner, other kinds of variable deferring can be implemented + using function closures as described in . + + + References to function and cookie will be kept so they will not be + garbage collected until the pattern constructed is garbage collected. + + + The following table describes what these primitives do: + + + + Integer Primitives + + + + + + + Primitive + Description + + + + Len( N ) + + Where N is a natural number, matches the given number of + characters. For example, Len(10) matches any string that + is exactly ten characters long. + + + Pos( N ) + + Where N is a natural number, matches the null string + if exactly N characters have been matched so far, and + otherwise fails. + + + RPos( N ) + + Where N is a natural number, matches the null string + if exactly N characters remain to be matched, and + otherwise fails. + + + Tab( N ) + + Where N is a natural number, matches characters from + the current position until exactly N characters have + been matched in all. Fails if more than N characters + have already been matched. + + + RTab( N ) + + Where N is a natural number, matches characters from + the current position until exactly N characters remain + to be matched in the string. Fails if fewer than N + unmatched characters remain in the string. + + + + +
+
+
+ Return Values + + + pattern (userdata): Pattern built by the constructor + + + +
+ + + Miscelleanous Primitives + + + Arb + Matches any string + + + Bal + Matches parentheses balanced strings + + + Abort + Immediately abort pattern match + + + Fail + Null alternation + + + Rem + Match the entire remaining subject string + + + Succeed + Match the null string in every alternative + + + + + spipat.Arb() + + Arb() + + spipat.Bal() + + Bal() + + spipat.Abort() + + Abort() + + spipat.Fail() + + Fail() + + spipat.Rem() + + Rem() + + spipat.Succeed() + + Succeed() + + + + + Description + + + These are simple pattern constructor + functions. + + The following table describes what these primitives do: + + + + Miscelleanous Primitives + + + + + + + Primitive + Description + + + + Arb() + + Matches any string. First it matches the null string, and + then on a subsequent failure, matches one character, and + then two characters, and so on. It only fails if the + entire remaining string is matched. + + + Bal() + + Matches a non-empty string that is parentheses balanced + with respect to ordinary () characters. + Examples of balanced strings are "ABC", + "A((B)C)", and "A(B)C(D)E". + Bal matches the shortest possible balanced + string on the first attempt, and if there is a subsequent failure, + attempts to extend the string. + + + Abort() + + Immediately aborts the entire pattern match, signalling + failure. This is a specialized pattern element, which is + useful in conjunction with some of the special pattern + elements that have side effects. + + + Fail() + + The null alternation. Matches no possible strings, so it + always signals failure. This is a specialized pattern + element, which is useful in conjunction with some of the + special pattern elements that have side effects. + + + Rem() + + Matches from the current point to the last character in + the string. This is a specialized pattern element, which + is useful in conjunction with some of the special pattern + elements that have side effects. + + + Succeed() + + Repeatedly matches the null string (it is equivalent to + the alternation ("" + "" + "" ....). This is a special + pattern element, which is useful in conjunction with some + of the special pattern elements that have side effects. + + + + +
+
+
+ Return Values + + + pattern (userdata): Pattern built by the constructor + + + +
+ + + POSIX Extended Regular Expressions + + + RegExp + Matches a pattern equivalent to a regular expression + + + + + spipat.RegExp + ( expression, captures ) + + RegExp + ( expression, captures ) + + + + + Description + + + RegExp constructs from a + + POSIX Extended Regular Expression, a pattern that is equivalent to that regular + expression and can be combined with other patterns freely. + + It can optionally construct the pattern to save the captures + from a regular expression match in a Lua table. + + + Even though this implementation should support almost all elements of EREs, + it is considered experimental. + You are advised to use the usual pattern construction primitives. + + + Parameters + + + + expression (string): The POSIX ERE which is compiled + to a pattern. + + captures (table): Optional table, or more precisely + array, to hold subexpression captures. + Naturally, it has to exist when RegExp is called. + When a subexpression is captured (i.e. the pattern equivalent to what is + enclosed in parentheses), the matching string is added to the + end of the table. + Thus taken that captures is initially empty, if + RegExp("(a(b))", captures) matches, captures + will be {"b", "ab"}. + + + + Return Values + + + pattern (userdata): Pattern built by RegExp + + + Example + + + Regular Expressions + + print(RegExp "^[[:digit:]]*?(abc\\.|de?)") +Pos(0) * Arbno(Any()) * ("abc." + "d" * ("" + "e")) +>]]> + + + +
+ -- cgit v1.2.3