aboutsummaryrefslogtreecommitdiffhomepage
path: root/libslang/modules/pcre-module.c
diff options
context:
space:
mode:
authorRobin Haberkorn <robin.haberkorn@googlemail.com>2011-10-14 04:55:05 +0200
committerRobin Haberkorn <robin.haberkorn@googlemail.com>2011-10-14 04:55:05 +0200
commit6aa0e0017d7d0cddc006da885946934b06949a91 (patch)
tree66b688ec32e2f91266db760b1762f2a50cc52036 /libslang/modules/pcre-module.c
parenta966db5b71328f6adf9dd767e64b322a3bd7ed9c (diff)
downloaderlang-slang-fork-6aa0e0017d7d0cddc006da885946934b06949a91.tar.gz
include libslang-1.4.9 and automatically build it and link erlang-slang against it
few (erlang) people will still have libslang-1.4.9 installed or spend time to get it to link against the driver
Diffstat (limited to 'libslang/modules/pcre-module.c')
-rw-r--r--libslang/modules/pcre-module.c618
1 files changed, 618 insertions, 0 deletions
diff --git a/libslang/modules/pcre-module.c b/libslang/modules/pcre-module.c
new file mode 100644
index 0000000..926e477
--- /dev/null
+++ b/libslang/modules/pcre-module.c
@@ -0,0 +1,618 @@
+#include <stdio.h>
+#include <slang.h>
+#include <pcre.h>
+
+SLANG_MODULE(pcre);
+
+static int PCRE_Type_Id;
+
+typedef struct
+{
+ pcre *p;
+ pcre_extra *extra;
+ int *ovector;
+ unsigned int ovector_len; /* must be a multiple of 3 */
+ unsigned int num_matches; /* return value of pcre_exec (>= 1)*/
+}
+PCRE_Type;
+
+static void free_pcre_type (PCRE_Type *pt)
+{
+ if (pt->ovector != NULL)
+ SLfree ((char *) pt->ovector);
+
+ SLfree ((char *) pt);
+}
+
+static SLang_MMT_Type *allocate_pcre_type (pcre *p, pcre_extra *extra)
+{
+ PCRE_Type *pt;
+ SLang_MMT_Type *mmt;
+ int ovector_len;
+
+ pt = (PCRE_Type *) SLmalloc (sizeof (PCRE_Type));
+ if (pt == NULL)
+ return NULL;
+ memset ((char *) pt, 0, sizeof (PCRE_Type));
+
+ pt->p = p;
+ pt->extra = extra;
+
+ if (0 != pcre_fullinfo (p, extra, PCRE_INFO_CAPTURECOUNT, &ovector_len))
+ {
+ free_pcre_type (pt);
+ SLang_verror (SL_INTRINSIC_ERROR, "pcre_fullinfo failed");
+ return NULL;
+ }
+
+ ovector_len += 1; /* allow for pattern matched */
+ ovector_len *= 3; /* required to be multiple of 3 */
+ if (NULL == (pt->ovector = (int *)SLmalloc (ovector_len * sizeof (int))))
+ {
+ free_pcre_type (pt);
+ return NULL;
+ }
+ pt->ovector_len = ovector_len;
+
+ if (NULL == (mmt = SLang_create_mmt (PCRE_Type_Id, (VOID_STAR) pt)))
+ {
+ free_pcre_type (pt);
+ return NULL;
+ }
+ return mmt;
+}
+
+static int _pcre_compile_1 (char *pattern, int options)
+{
+ pcre *p;
+ pcre_extra *extra;
+ SLCONST char *err;
+ int erroffset;
+ unsigned char *table;
+ SLang_MMT_Type *mmt;
+
+ table = NULL;
+ p = pcre_compile (pattern, options, &err, &erroffset, table);
+ if (NULL == p)
+ {
+ SLang_verror (SL_INTRINSIC_ERROR, "Error compiling pattern '%s' at offset %d: %s",
+ pattern, erroffset, err);
+ return -1;
+ }
+
+ extra = pcre_study (p, 0, &err);
+ /* apparantly, a NULL return is ok */
+ if (err != NULL)
+ {
+ SLang_verror (SL_INTRINSIC_ERROR, "pcre_study failed: %s", err);
+ pcre_free (p);
+ return -1;
+ }
+
+ if (NULL == (mmt = allocate_pcre_type (p, extra)))
+ {
+ pcre_free ((char *) p);
+ pcre_free ((char *) extra);
+ return -1;
+ }
+
+ if (-1 == SLang_push_mmt (mmt))
+ {
+ SLang_free_mmt (mmt);
+ return -1;
+ }
+ return 0;
+}
+
+static void _pcre_compile (void)
+{
+ char *pattern;
+ int options = 0;
+
+ switch (SLang_Num_Function_Args)
+ {
+ case 2:
+ if (-1 == SLang_pop_integer (&options))
+ return;
+ /* drop */
+ case 1:
+ default:
+ if (-1 == SLang_pop_slstring (&pattern))
+ return;
+ }
+ (void) _pcre_compile_1 (pattern, options);
+ SLang_free_slstring (pattern);
+}
+
+
+
+/* returns number of matches */
+static int _pcre_exec_1 (PCRE_Type *pt, char *str, int pos, int options)
+{
+ int rc;
+ unsigned int len;
+
+ pt->num_matches = 0;
+ len = strlen (str);
+ if ((unsigned int) pos > len)
+ return 0;
+
+ rc = pcre_exec (pt->p, pt->extra, str, len, pos,
+ options, pt->ovector, pt->ovector_len);
+
+ if (rc == PCRE_ERROR_NOMATCH)
+ return 0;
+
+ if (rc <= 0)
+ {
+ SLang_verror (SL_INTRINSIC_ERROR, "pcre_exec returned %d", rc);
+ return -1;
+ }
+ pt->num_matches = (unsigned int) rc;
+ return rc;
+}
+
+static int _pcre_exec (void)
+{
+ PCRE_Type *p;
+ SLang_MMT_Type *mmt;
+ char *str;
+ int pos = 0;
+ int options = 0;
+ int ret = -1;
+
+ switch (SLang_Num_Function_Args)
+ {
+ case 4:
+ if (-1 == SLang_pop_integer (&options))
+ return -1;
+ case 3:
+ if (-1 == SLang_pop_integer (&pos))
+ return -1;
+ default:
+ if (-1 == SLang_pop_slstring (&str))
+ return -1;
+
+ if (NULL == (mmt = SLang_pop_mmt (PCRE_Type_Id)))
+ goto free_and_return;
+ p = SLang_object_from_mmt (mmt);
+ }
+ ret = _pcre_exec_1 (p, str, pos, options);
+
+ free_and_return:
+ SLang_free_slstring (str);
+ SLang_free_mmt (mmt);
+ return ret;
+}
+
+
+static int get_nth_start_stop (PCRE_Type *pt, unsigned int n,
+ unsigned int *a, unsigned int *b)
+{
+ int start, stop;
+
+ if (n >= pt->num_matches)
+ return -1;
+
+ start = pt->ovector[2*n];
+ stop = pt->ovector[2*n+1];
+ if ((start < 0) || (stop < start))
+ return -1;
+
+ *a = (unsigned int) start;
+ *b = (unsigned int) stop;
+ return 0;
+}
+
+static void _pcre_nth_match (PCRE_Type *pt, int *np)
+{
+ unsigned int start, stop;
+ SLang_Array_Type *at;
+ int two = 2;
+ int *data;
+
+ if (-1 == get_nth_start_stop (pt, (unsigned int) *np, &start, &stop))
+ {
+ SLang_push_null ();
+ return;
+ }
+
+ if (NULL == (at = SLang_create_array (SLANG_INT_TYPE, 0, NULL, &two, 1)))
+ return;
+
+ data = (int *)at->data;
+ data[0] = (int)start;
+ data[1] = (int)stop;
+ (void) SLang_push_array (at, 1);
+}
+
+static void _pcre_nth_substr (PCRE_Type *pt, char *str, int *np)
+{
+ unsigned int start, stop;
+ unsigned int len;
+
+ len = strlen (str);
+
+ if ((-1 == get_nth_start_stop (pt, (unsigned int) *np, &start, &stop))
+ || (start > len) || (stop > len))
+ {
+ SLang_push_null ();
+ return;
+ }
+
+ str = SLang_create_nslstring (str + start, stop - start);
+ (void) SLang_push_string (str);
+ SLang_free_slstring (str);
+}
+
+/* This function converts a slang RE to a pcre expression. It performs the
+ * following transformations:
+ * ( --> \(
+ * ) --> \)
+ * # --> \#
+ * | --> \|
+ * { --> \{
+ * } --> \}
+ * \< --> \b
+ * \> --> \b
+ * \C --> (?i)
+ * \c --> (?-i)
+ * \( --> (
+ * \) --> )
+ * \{ --> {
+ * \} --> }
+ * Anything else?
+ */
+static char *_slang_to_pcre (char *slpattern)
+{
+ char *pattern, *p, *s;
+ unsigned int len;
+ int in_bracket;
+ char ch;
+
+ len = strlen (slpattern);
+ pattern = SLmalloc (3*len + 1);
+ if (pattern == NULL)
+ return NULL;
+
+ p = pattern;
+ s = slpattern;
+ in_bracket = 0;
+ while ((ch = *s++) != 0)
+ {
+ switch (ch)
+ {
+ case '{':
+ case '}':
+ case '(':
+ case ')':
+ case '#':
+ case '|':
+ if (0 == in_bracket) *p++ = '\\';
+ *p++ = ch;
+ break;
+
+ case '[':
+ in_bracket = 1;
+ *p++ = ch;
+ break;
+
+ case ']':
+ in_bracket = 0;
+ *p++ = ch;
+ break;
+
+ case '\\':
+ ch = *s++;
+ switch (ch)
+ {
+ case 0:
+ s--;
+ break;
+
+ case '<':
+ case '>':
+ *p++ = '\\'; *p++ = 'b';
+ break;
+
+ case '(':
+ case ')':
+ case '{':
+ case '}':
+ *p++ = ch;
+ break;
+
+ case 'C':
+ *p++ = '('; *p++ = '?'; *p++ = 'i'; *p++ = ')';
+ break;
+ case 'c':
+ *p++ = '('; *p++ = '?'; *p++ = '-'; *p++ = 'i'; *p++ = ')';
+ break;
+
+ default:
+ *p++ = '\\';
+ *p++ = ch;
+ }
+ break;
+
+ default:
+ *p++ = ch;
+ break;
+ }
+ }
+ *p = 0;
+
+ s = SLang_create_slstring (pattern);
+ SLfree (pattern);
+ return s;
+}
+
+static void slang_to_pcre (char *pattern)
+{
+ /* NULL ok in code below */
+ pattern = _slang_to_pcre (pattern);
+ (void) SLang_push_string (pattern);
+ SLang_free_slstring (pattern);
+}
+
+static void destroy_pcre (SLtype type, VOID_STAR f)
+{
+ PCRE_Type *pt;
+ (void) type;
+
+ pt = (PCRE_Type *) f;
+ if (pt->extra != NULL)
+ pcre_free ((char *) pt->extra);
+ if (pt->p != NULL)
+ pcre_free ((char *) pt->p);
+ free_pcre_type (pt);
+}
+
+#define DUMMY_PCRE_TYPE 255
+#define P DUMMY_PCRE_TYPE
+#define I SLANG_INT_TYPE
+#define V SLANG_VOID_TYPE
+#define S SLANG_STRING_TYPE
+static SLang_Intrin_Fun_Type PCRE_Intrinsics [] =
+{
+ MAKE_INTRINSIC_0("pcre_compile", _pcre_compile, V),
+/*%+
+ *\function{pcre_compile}
+ *\synopsis{Compile a regular expression}
+ *\usage{PCRE_Type pcre_compile (String_Type pattern [, Int_Type options])}
+ *\description
+ * The \var{pcre_compile} function compiles a PCRE style regular expression
+ * and returns the result. The optional \var{options} argument may be used
+ * to provide addition information affecting the compilation of the pattern.
+ * Specifically, it is a bit-mapped value formed from the logical-or of zero
+ * or more of the following symbolic constants:
+ *#v+
+ * PCRE_ANCHORED Force the match to be at the start of a string
+ * PCRE_CASELESS Matches are to be case-insensitive
+ * PCRE_DOLLAR_ENDONLY (See PCRE docs for more information)
+ * PCRE_DOTALL The dot pattern matches all characters
+ * PCRE_EXTENDED Ignore whitespace in the pattern
+ * PCRE_EXTRA (See PCRE docs for features this activates)
+ * PCRE_MULTILINE Treat the subject string as multi-lines
+ * PCRE_UNGREEDY Make the matches greedy
+ * PCRE_UTF8 Regard the pattern and subject strings as UTF-8
+ *#v-
+ * Many of these flags may be set within the pattern itself. See the PCRE
+ * library documentation for more information about the precise details
+ * of these flags and the supported regular expressions.
+ *
+ * Upon success, this function returns a \var{PCRE_Type} object representing
+ * the compiled patterned. If compilation fails, an error will be thrown.
+ *\seealso{pcre_exec, pcre_nth_match, pcre_nth_substr}
+ *%-
+ */
+ MAKE_INTRINSIC_0("pcre_exec", _pcre_exec, I),
+/*%+
+ *\function{pcre_exec}
+ *\synopsis{Match a string against a compiled PCRE pattern}
+ *\usage{Int_Type pcre_exec(p, str [,pos [,options]]);
+ *#v+
+ * PCRE_Type p;
+ * String_Type str;
+ * Int_Type pos, options;
+ *#v-
+ *\description
+ * The \var{pcre_exec} function applies a pre-compiled pattern \var{p} to a
+ * string \var{str} and returns the result of the match. The optional third
+ * argument \var{pos} may be used to specify the point, as an offset from the
+ * start of the string, where matching is to start. The fourth argument, if
+ * present, may be used to provide additional information about how matching
+ * is to take place. Its value may be specified as a logical-or of zero or
+ * more of the following flags:
+ *#v+
+ * PCRE_NOTBOL
+ * The first character in the string is not at the beginning of a line.
+ * PCRE_NOTEOL
+ * The last character in the string is not at the end of a line.
+ * PCRE_NOTEMPTY
+ * An empty string is not a valid match.
+ *#v-
+ * See the PCRE library documentation for more information about the meaning
+ * of these flags.
+ *
+ * Upon success, this function returns a positive integer equal to 1 plus the
+ * number of so-called captured substrings. It returns 0 if the pattern
+ * fails to match the string.
+ *\seealso{pcre_compile, pcre_nth_match, pcre_nth_substr}
+ *%-
+ */
+ MAKE_INTRINSIC_2("pcre_nth_match", _pcre_nth_match, V, P, I),
+/*%+
+ *\function{pcre_nth_match}
+ *\synopsis{Return the location of the nth match of a PCRE}
+ *\usage{Int_Type[2] pcre_nth_match (PCRE_Type p, Int_Type nth)}
+ *\description
+ * The \var{pcre_nth_match} function returns an integer array whose values
+ * specify the locations of the beginning and end of the \var{nth} captured
+ * substrings of the most recent call to \var{pcre_exec} with the compiled
+ * pattern. A value of \var{nth} equal to 0 represents the substring
+ * representing the entire match of the pattern.
+ *
+ * If the \var{nth} match did not take place, the function returns \var{NULL}.
+ *\example
+ * After the execution of:
+ *#v+
+ * str = "Error in file foo.c, line 127, column 10";
+ * pattern = "file ([^,]+), line (\\d+)";
+ * p = pcre_compile (pattern);
+ * if (pcre_exec (p, str))
+ * {
+ * match_pos = pcre_nth_match (p, 0);
+ * file_pos = pcre_nth_match (p, 1);
+ * line_pos = pcre_nth_match (p, 2);
+ * }
+ *#v-
+ * \exmp{match_pos} will be set to \exmp{[9,29]}, \exmp{file_pos} to \exmp{[14,19,]}
+ * and \exmp{line_pos} to \exmp{[26,29]}. These integer arrays may be used to
+ * extract the substrings matched by the pattern, e.g.,
+ *#v+
+ * file = substr (str, file_pos[0]+1, file_pos[1]-file_pos[0]);
+ *#v-
+ * Alternatively, the function \var{pcre_nth_substr} may be used to get the
+ * matched substrings:
+ *#v+
+ * file = pcre_nth_substr (p, str, 0);
+ *#v-
+ *\seealso{pcre_compile, pcre_exec, pcre_nth_substr}
+ *%-
+ */
+ MAKE_INTRINSIC_3("pcre_nth_substr", _pcre_nth_substr, V, P, S, I),
+/*%+
+ *\function{pcre_nth_substr}
+ *\synopsis{Extract the nth substring from a PCRE match}
+ *\usage{String_Type pcre_nth_substr (PCRE_Type p, String_Type str, Int_Type nth)}
+ *\description
+ * This function may be used to extract the \var{nth} captured substring
+ * resulting from the most recent use of the compiled pattern \var{p} by the
+ * \var{pcre_exec} function. Unlike \var{pcre_nth_match}, this function returns
+ * the specified captured substring itself and not the position of the substring.
+ * For this reason, the subject string of the pattern is a required argument.
+ *\seealso{pcre_compile, pcre_exec, pcre_nth_match}
+ *%-
+ */
+ MAKE_INTRINSIC_1("slang_to_pcre", slang_to_pcre, V, S),
+/*%+
+ *\function{slang_to_pcre}
+ *\synopsis{Convert a S-Lang regular expression to a PCRE one}
+ *\usage{String_Type slang_to_pcre (String_Type pattern)}
+ *\description
+ * This function may be used to convert a slang regular expression to a PCRE
+ * compatible one. The converted is returned.
+ *\seealso{pcre_compile, string_match}
+ *%-
+ */
+ SLANG_END_INTRIN_FUN_TABLE
+};
+
+static SLang_IConstant_Type PCRE_Consts [] =
+{
+ /* compile options */
+ MAKE_ICONSTANT("PCRE_ANCHORED", PCRE_ANCHORED),
+ MAKE_ICONSTANT("PCRE_CASELESS", PCRE_CASELESS),
+ MAKE_ICONSTANT("PCRE_DOLLAR_ENDONLY", PCRE_DOLLAR_ENDONLY),
+ MAKE_ICONSTANT("PCRE_DOTALL", PCRE_DOTALL),
+ MAKE_ICONSTANT("PCRE_EXTENDED", PCRE_EXTENDED),
+ MAKE_ICONSTANT("PCRE_EXTRA", PCRE_EXTRA),
+ MAKE_ICONSTANT("PCRE_MULTILINE", PCRE_MULTILINE),
+ MAKE_ICONSTANT("PCRE_UNGREEDY", PCRE_UNGREEDY),
+ MAKE_ICONSTANT("PCRE_UTF8", PCRE_UTF8),
+
+ /* exec options */
+ MAKE_ICONSTANT("PCRE_NOTBOL", PCRE_NOTBOL),
+ MAKE_ICONSTANT("PCRE_NOTEOL", PCRE_NOTEOL),
+ MAKE_ICONSTANT("PCRE_NOTEMPTY", PCRE_NOTEMPTY),
+ SLANG_END_ICONST_TABLE
+};
+
+#undef P
+#undef I
+#undef V
+#undef S
+
+static void patchup_intrinsic_table (SLang_Intrin_Fun_Type *table,
+ unsigned char dummy, unsigned char type)
+{
+ while (table->name != NULL)
+ {
+ unsigned int i, nargs;
+ unsigned char *args;
+
+ nargs = table->num_args;
+ args = table->arg_types;
+ for (i = 0; i < nargs; i++)
+ {
+ if (args[i] == dummy)
+ args[i] = type;
+ }
+
+ /* For completeness */
+ if (table->return_type == dummy)
+ table->return_type = type;
+
+ table++;
+ }
+}
+
+
+static int register_pcre_type (void)
+{
+ SLang_Class_Type *cl;
+
+ if (NULL == (cl = SLclass_allocate_class ("PCRE_Type")))
+ return -1;
+
+ if (-1 == SLclass_set_destroy_function (cl, destroy_pcre))
+ return -1;
+
+ /* By registering as SLANG_VOID_TYPE, slang will dynamically allocate a
+ * type.
+ */
+ if (-1 == SLclass_register_class (cl, SLANG_VOID_TYPE, sizeof (PCRE_Type), SLANG_CLASS_TYPE_MMT))
+ return -1;
+
+ PCRE_Type_Id = SLclass_get_class_id (cl);
+ patchup_intrinsic_table (PCRE_Intrinsics, DUMMY_PCRE_TYPE, PCRE_Type_Id);
+
+ return 0;
+}
+
+static void *do_malloc (size_t n)
+{
+ return (void *) SLmalloc (n);
+}
+
+static void do_free (void *x)
+{
+ SLfree ((char *) x);
+}
+
+int init_pcre_module_ns (char *ns_name)
+{
+ SLang_NameSpace_Type *ns = SLns_create_namespace (ns_name);
+ if (ns == NULL)
+ return -1;
+
+ if (-1 == register_pcre_type ())
+ return -1;
+
+ pcre_malloc = do_malloc;
+ pcre_free = do_free;
+
+ if ((-1 == SLns_add_intrin_fun_table (ns, PCRE_Intrinsics, "__PCRE__"))
+ || (-1 == SLns_add_iconstant_table (ns, PCRE_Consts, NULL)))
+ return -1;
+
+ return 0;
+}
+
+
+/* This function is optional */
+void deinit_pcre_module (void)
+{
+}
+