aboutsummaryrefslogtreecommitdiffhomepage
path: root/libslang/src/util
diff options
context:
space:
mode:
Diffstat (limited to 'libslang/src/util')
-rw-r--r--libslang/src/util/bcdump.c457
-rw-r--r--libslang/src/util/chkproto.c225
-rw-r--r--libslang/src/util/keywords.lis73
-rw-r--r--libslang/src/util/perfhash.c600
4 files changed, 1355 insertions, 0 deletions
diff --git a/libslang/src/util/bcdump.c b/libslang/src/util/bcdump.c
new file mode 100644
index 0000000..04c64e4
--- /dev/null
+++ b/libslang/src/util/bcdump.c
@@ -0,0 +1,457 @@
+#include <stdio.h>
+#include <stdio.h>
+#include "sl-feat.h"
+#include "slang.h"
+#include "_slang.h"
+
+static void dump_token (_SLang_Token_Type *t)
+{
+ char buf [256], *b;
+
+ b = buf;
+
+ if (SLang_Error)
+ return;
+
+ switch (t->type)
+ {
+ case IDENT_TOKEN:
+ b = t->v.s_val;
+ break;
+
+ case CHAR_TOKEN:
+ case INT_TOKEN:
+ sprintf (buf, "%ld", t->v.long_val);
+ break;
+
+ case DOUBLE_TOKEN:
+ b = t->v.s_val;
+ break;
+
+ case STRING_TOKEN:
+ sprintf (buf, "\"%s\"", t->v.s_val);
+ break;
+
+ case PLUSPLUS_TOKEN:
+ sprintf (buf, "++%s", t->v.s_val);
+ break;
+
+ case POST_PLUSPLUS_TOKEN:
+ sprintf (buf, "%s++", t->v.s_val);
+ break;
+
+ case MINUSMINUS_TOKEN:
+ sprintf (buf, "--%s", t->v.s_val);
+ break;
+
+ case POST_MINUSMINUS_TOKEN:
+ sprintf (buf, "%s--", t->v.s_val);
+ break;
+
+ case MINUSEQS_TOKEN:
+ sprintf (buf, "-=%s", t->v.s_val);
+ break;
+
+ case PLUSEQS_TOKEN:
+ sprintf (buf, "+=%s", t->v.s_val);
+ break;
+
+ case ASSIGN_TOKEN:
+ sprintf (buf, "=%s", t->v.s_val);
+ break;
+
+ case EOF_TOKEN:
+ b = "EOF_TOKEN";
+ break;
+
+ case NOP_TOKEN:
+ b = "NOP_TOKEN";
+ break;
+
+ case FOREVER_TOKEN:
+ b = "forever";
+ break;
+
+ case ARG_TOKEN:
+ b = "__args";
+ break;
+
+ case EARG_TOKEN:
+ b = "__eargs";
+ break;
+
+ case FARG_TOKEN:
+ b = "__farg";
+ break;
+
+ case _INLINE_ARRAY_TOKEN:
+ b = "__inline_array";
+ break;
+
+ case _INLINE_IMPLICIT_ARRAY_TOKEN:
+ b = "__inline_implicit_array";
+ break;
+
+ case IFNOT_TOKEN:
+ b = "!if";
+ break;
+
+ case ABS_TOKEN:
+ b = "abs";
+ break;
+
+ case LT_TOKEN:
+ b = "<";
+ break;
+
+ case LE_TOKEN:
+ b = "<=";
+ break;
+
+ case GT_TOKEN:
+ b = ">";
+ break;
+
+ case GE_TOKEN:
+ b = ">=";
+ break;
+
+ case EQ_TOKEN:
+ b = "==";
+ break;
+
+ case NE_TOKEN:
+ b = "!=";
+ break;
+
+ case AND_TOKEN:
+ b = "and";
+ break;
+
+ case IF_TOKEN:
+ b = "if";
+ break;
+
+ case POP_TOKEN:
+ b = "pop";
+ break;
+
+ case ANDELSE_TOKEN:
+ b = "andelse";
+ break;
+
+ case BXOR_TOKEN:
+ b = "xor";
+ break;
+
+ case BAND_TOKEN:
+ b = "&";
+ break;
+
+ case BOR_TOKEN:
+ b = "|";
+ break;
+
+ case BNOT_TOKEN:
+ b = "~";
+ break;
+
+ case SHR_TOKEN:
+ b = "shr";
+ break;
+
+ case CHS_TOKEN:
+ b = "chs";
+ break;
+
+ case SHL_TOKEN:
+ b = "shl";
+ break;
+
+ case SQR_TOKEN:
+ b = "sqr";
+ break;
+
+ case CASE_TOKEN:
+ b = "case";
+ break;
+
+ case SIGN_TOKEN:
+ b = "sign";
+ break;
+
+ case BREAK_TOKEN:
+ b = "break";
+ break;
+
+ case STATIC_TOKEN:
+ b = "static";
+ break;
+
+ case STRUCT_TOKEN:
+ b = "struct";
+ break;
+
+ case RETURN_TOKEN:
+ b = "return";
+ break;
+
+ case SWITCH_TOKEN:
+ b = "switch";
+ break;
+
+ case EXCH_TOKEN:
+ b = "exch";
+ break;
+
+ case CONT_TOKEN:
+ b = "continue";
+ break;
+
+ case EXITBLK_TOKEN:
+ b = "EXIT_BLOCK";
+ break;
+
+ case ERRBLK_TOKEN:
+ b = "ERROR_BLOCK";
+ break;
+
+ case USRBLK0_TOKEN:
+ b = "USER_BLOCK0";
+ break;
+
+ case USRBLK1_TOKEN:
+ b = "USER_BLOCK1";
+ break;
+
+ case USRBLK2_TOKEN:
+ b = "USER_BLOCK2";
+ break;
+
+ case USRBLK3_TOKEN:
+ b = "USER_BLOCK3";
+ break;
+
+ case USRBLK4_TOKEN:
+ b = "USER_BLOCK4";
+ break;
+
+ case ELSE_TOKEN:
+ b = "else";
+ break;
+
+ case MUL2_TOKEN:
+ b = "mul2";
+ break;
+
+ case DEFINE_TOKEN:
+ b = ")";
+ break;
+
+ case DEFINE_STATIC_TOKEN:
+ b = ")static";
+ break;
+
+ case LOOP_TOKEN:
+ b = "loop";
+ break;
+
+ case MOD_TOKEN:
+ b = "mod";
+ break;
+
+ case DO_TOKEN:
+ b = "do";
+ break;
+
+ case DOWHILE_TOKEN:
+ b = "do_while";
+ break;
+
+ case WHILE_TOKEN:
+ b = "while";
+ break;
+
+ case OR_TOKEN:
+ b = "or";
+ break;
+
+ case VARIABLE_TOKEN:
+ b = "variable";
+ break;
+
+ case _SCALAR_ASSIGN_TOKEN:
+ sprintf (buf, "=%s", t->v.s_val);
+ break;
+
+ case _SCALAR_PLUSEQS_TOKEN:
+ sprintf (buf, "+=%s", t->v.s_val);
+ break;
+
+ case _SCALAR_MINUSEQS_TOKEN:
+ sprintf (buf, "-=%s", t->v.s_val);
+ break;
+
+ case _SCALAR_PLUSPLUS_TOKEN:
+ sprintf (buf, "++%s", t->v.s_val);
+ break;
+
+ case _SCALAR_POST_PLUSPLUS_TOKEN:
+ sprintf (buf, "%s++", t->v.s_val);
+ break;
+
+ case _SCALAR_MINUSMINUS_TOKEN:
+ sprintf (buf, "--%s", t->v.s_val);
+ break;
+
+ case _SCALAR_POST_MINUSMINUS_TOKEN:
+ sprintf (buf, "%s--", t->v.s_val);
+ break;
+
+ case _DEREF_ASSIGN_TOKEN:
+ sprintf (buf, "=@%s", t->v.s_val);
+ break;
+
+ case _REF_TOKEN:
+ sprintf (buf, "%s __ref", t->v.s_val);
+ break;
+
+ case ORELSE_TOKEN:
+ b = "orelse";
+ break;
+
+ case _FOR_TOKEN:
+ b = "_for";
+ break;
+
+ case FOR_TOKEN:
+ b = "for";
+ break;
+
+ case NOT_TOKEN:
+ b = "not";
+ break;
+
+ case OBRACKET_TOKEN:
+ b = "[";
+ break;
+
+ case CBRACKET_TOKEN:
+ b = "]";
+ break;
+
+ case OPAREN_TOKEN:
+ b = "(";
+ break;
+
+ case CPAREN_TOKEN:
+ b = ")";
+ break;
+
+ case OBRACE_TOKEN:
+ b = "{";
+ break;
+
+ case CBRACE_TOKEN:
+ b = "}";
+ break;
+
+ case DEREF_TOKEN:
+ b = "@";
+ break;
+
+ case COMMA_TOKEN:
+ b = ",";
+ break;
+
+ case SEMICOLON_TOKEN:
+ b = ";";
+ break;
+
+ case COLON_TOKEN:
+ b = ":";
+ break;
+
+ case ADD_TOKEN:
+ b = "+";
+ break;
+
+ case SUB_TOKEN:
+ b = "-";
+ break;
+
+ /* case MUL_TOKEN: */
+ /* b = "*"; */
+ /* break; */
+
+ case DIV_TOKEN:
+ b = "/";
+ break;
+
+ case ARRAY_TOKEN:
+ b = "__aget";
+ break;
+
+ case _ARRAY_ASSIGN_TOKEN:
+ b = "__aput";
+ break;
+
+ case DOT_TOKEN:
+ sprintf (buf, "%s .", t->v.s_val);
+ break;
+
+ case METHOD_TOKEN:
+ sprintf (buf, "%s __eargs __method_call", t->v.s_val);
+ break;
+
+ case _STRUCT_ASSIGN_TOKEN:
+ b = "__struct_eqs"; break;
+ case _STRUCT_PLUSEQS_TOKEN:
+ b = "__struct_pluseqs"; break;
+ case _STRUCT_MINUSEQS_TOKEN:
+ b = "__struct_minuseqs"; break;
+ case _STRUCT_PLUSPLUS_TOKEN:
+ b = "__struct_plusplus"; break;
+ case _STRUCT_POST_PLUSPLUS_TOKEN:
+ b = "__struct_pplusplus"; break;
+ case _STRUCT_MINUSMINUS_TOKEN:
+ b = "__struct_minusminus"; break;
+ case _STRUCT_POST_MINUSMINUS_TOKEN:
+ b = "__struct_pminusminus"; break;
+
+ case _NULL_TOKEN: b = "NULL"; break;
+
+ case USING_TOKEN:
+ b = "__using__"; break;
+ case FOREACH_TOKEN:
+ b = "__foreach__"; break;
+
+ default:
+ sprintf (buf, "____UNKNOWN___0x%X", t->type);
+ break;
+ }
+
+ fprintf (stdout, "0x%2X: %s\n", t->type, b);
+}
+
+
+int main (int argc, char **argv)
+{
+ char *file;
+
+ if (argc == 2)
+ {
+ /* fprintf (stderr, "Usage: %s <filename>\n", argv[0]); */
+ file = argv[1];
+ /* return 1; */
+ }
+ else file = NULL;
+
+ if (-1 == SLang_init_slang ())
+ return 1;
+
+ _SLcompile_ptr = dump_token;
+ SLang_load_file (file);
+
+ return SLang_Error;
+}
diff --git a/libslang/src/util/chkproto.c b/libslang/src/util/chkproto.c
new file mode 100644
index 0000000..d4a6e23
--- /dev/null
+++ b/libslang/src/util/chkproto.c
@@ -0,0 +1,225 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+
+static char *Make_Intrinsic_Forms [] =
+{
+ "0",
+ "I",
+ "S",
+ "II",
+ "SS",
+ "SI",
+ "IS",
+ "III",
+ "IIS",
+ "ISI",
+ "ISS",
+ "SII",
+ "SIS",
+ "SSI",
+ "SSS",
+ "IIII",
+ "SSSS",
+ NULL
+};
+
+
+static char *output_start (FILE *fp)
+{
+ char **form;
+
+ form = Make_Intrinsic_Forms;
+
+ while (*form != NULL)
+ {
+ char *r = "0ISD";
+ char *f;
+
+ while (*r != 0)
+ {
+ switch (*r)
+ {
+ case '0':
+ fprintf (fp, "static void (*V_F");
+ break;
+
+ case 'I':
+ fprintf (fp, "static int (*I_F");
+ break;
+
+ case 'S':
+ fprintf (fp, "static char *(*S_F");
+ break;
+
+ case 'D':
+ fprintf (fp, "static double (*D_F");
+ break;
+ }
+
+ f = *form;
+ fprintf (fp, "%s)(", f);
+ while (*f != 0)
+ {
+ if (f != *form) fputc (',', fp);
+ switch (*f)
+ {
+ case 'I':
+ fputs ("int*", fp);
+ break;
+
+ case 'S':
+ fputs ("char*", fp);
+ break;
+
+ case '0':
+ fputs ("void", fp);
+ break;
+ }
+ f++;
+ }
+ fputs (");\n", fp);
+
+ r++;
+ }
+ form++;
+ }
+
+ fputs ("\n", fp);
+
+ fprintf (fp, "static void chkproto_not_used_fun (void)\n{\n");
+
+ return 0;
+}
+
+static void output_finish (FILE *fp)
+{
+ fputs ("\n}\n", fp);
+}
+
+
+static char *skip_whitespace (char *p)
+{
+ while ((*p == ' ') || (*p == '\t') || (*p == '\n'))
+ p++;
+
+ return p;
+}
+
+static int do_make_intrinsic (unsigned int linenum, char *p)
+{
+ char *e;
+ char *name;
+ int ret_type;
+
+ e = p;
+ while (*e && (*e != ' ') && (*e != '\t') && (*e != '('))
+ e++;
+
+ if (*e == 0)
+ {
+ return -1;
+ }
+ *e++ = 0;
+
+ /* We expect form: IIS("name", function_name, RETURN_TYPE) */
+ while (*e && (*e != ',')) e++;
+ if (*e == 0)
+ {
+ return -1;
+ }
+ e = skip_whitespace (e + 1);
+ name = e;
+ if (*name == 0)
+ {
+ return -1;
+ }
+ while (*e && (*e != ',')) e++;
+ if (*e == 0)
+ {
+ return -1;
+ }
+ *e = 0;
+
+ e = skip_whitespace (e + 1);
+ if (0 == strncmp (e, "SLANG_", 6))
+ e += 6;
+
+ if (0 == strncmp (e, "VOID_TYPE", 8))
+ ret_type = 'V';
+ else if (0 == strncmp (e, "STRING_TYPE", 11))
+ ret_type = 'S';
+ else if (0 == strncmp (e, "INT_TYPE", 8))
+ ret_type = 'I';
+ else if (0 == strncmp (e, "DOUBLE_TYPE", 8))
+ ret_type = 'D';
+ else
+ {
+ fprintf (stderr, "return type on line %u is not supported\n", linenum);
+ return -1;
+ }
+
+ fprintf (stdout, " %c_F%s = %s;\n", ret_type, p, name);
+ return 0;
+}
+
+
+int main (int argc, char **argv)
+{
+ char line [1024];
+ FILE *fp;
+ unsigned int linenum;
+
+ if (isatty (0)
+ || (argc > 1))
+ {
+ fprintf (stderr, "Usage: %s < infile > outfile\n", argv[0]);
+ exit (1);
+ }
+
+ output_start (stdout);
+
+ fp = stdin;
+
+ linenum = 0;
+ while (NULL != fgets (line, sizeof (line), fp))
+ {
+ char *p = skip_whitespace (line);
+
+ linenum++;
+
+ if (*p != 'M') continue;
+
+ if (0 != strncmp (p, "MAKE_INTRINSIC", 14))
+ continue;
+
+ if (p[14] != '_')
+ {
+ fprintf (stderr, "Warning: line %u is old-fashioned\n", linenum);
+ continue;
+ }
+
+ p += 15;
+
+ switch (*p)
+ {
+ case 'I':
+ case 'S':
+ case '0':
+ do_make_intrinsic (linenum, p);
+ break;
+
+ default:
+ fprintf (stderr, "Warning: Unable to handle MAKE_INTRINSIC form on line %u\n",
+ linenum);
+ break;
+ }
+ }
+
+ output_finish (stdout);
+ return 0;
+}
+
+
+
diff --git a/libslang/src/util/keywords.lis b/libslang/src/util/keywords.lis
new file mode 100644
index 0000000..b794a59
--- /dev/null
+++ b/libslang/src/util/keywords.lis
@@ -0,0 +1,73 @@
+!if IFNOT_TOKEN
+ERROR_BLOCK ERRBLK_TOKEN
+EXIT_BLOCK EXITBLK_TOKEN
+USER_BLOCK0 USRBLK0_TOKEN
+USER_BLOCK1 USRBLK1_TOKEN
+USER_BLOCK2 USRBLK2_TOKEN
+USER_BLOCK3 USRBLK3_TOKEN
+USER_BLOCK4 USRBLK4_TOKEN
+__tmp TMP_TOKEN
+_for _FOR_TOKEN
+abs ABS_TOKEN
+and AND_TOKEN
+andelse ANDELSE_TOKEN
+break BREAK_TOKEN
+case CASE_TOKEN
+chs CHS_TOKEN
+continue CONT_TOKEN
+define DEFINE_TOKEN
+do DO_TOKEN
+do_while DOWHILE_TOKEN
+else ELSE_TOKEN
+exch EXCH_TOKEN
+for FOR_TOKEN
+foreach FOREACH_TOKEN
+forever FOREVER_TOKEN
+if IF_TOKEN
+loop LOOP_TOKEN
+mod MOD_TOKEN
+mul2 MUL2_TOKEN
+not NOT_TOKEN
+or OR_TOKEN
+orelse ORELSE_TOKEN
+pop POP_TOKEN
+private PRIVATE_TOKEN
+public PUBLIC_TOKEN
+return RETURN_TOKEN
+shl SHL_TOKEN
+shr SHR_TOKEN
+sign SIGN_TOKEN
+sqr SQR_TOKEN
+static STATIC_TOKEN
+struct STRUCT_TOKEN
+switch SWITCH_TOKEN
+typedef TYPEDEF_TOKEN
+using USING_TOKEN
+variable VARIABLE_TOKEN
+while WHILE_TOKEN
+xor BXOR_TOKEN
+%
+% The kewords below here are commented out because they are not
+% used. Most of them could be used if one wanted to write in RPN form
+% but I want to discontinue it
+%
+%push PUSH_TOKEN
+%readonly READONLY_TOKEN
+%__ref _REF_TOKEN
+%__aput _ARRAY_ASSIGN_TOKEN
+%__aput_minuseqs _ARRAY_MINUSEQS_TOKEN
+%__aput_minusminus _ARRAY_MINUSMINUS_TOKEN
+%__aput_pluseqs _ARRAY_PLUSEQS_TOKEN
+%__aput_plusplus _ARRAY_PLUSPLUS_TOKEN
+%__aput_pminusminus _ARRAY_POST_MINUSMINUS_TOKEN
+%__aput_pplusplus _ARRAY_POST_PLUSPLUS_TOKEN
+%__arg ARG_TOKEN
+%__array ARRAY_TOKEN
+%__earg EARG_TOKEN
+%__struct_eqs _STRUCT_ASSIGN_TOKEN
+%__struct_minuseqs _STRUCT_MINUSEQS_TOKEN
+%__struct_minusminus _STRUCT_MINUSMINUS_TOKEN
+%__struct_pluseqs _STRUCT_PLUSEQS_TOKEN
+%__struct_plusplus _STRUCT_PLUSPLUS_TOKEN
+%__struct_pminusminus _STRUCT_POST_MINUSMINUS_TOKEN
+%__struct_pplusplus _STRUCT_POST_PLUSPLUS_TOKEN
diff --git a/libslang/src/util/perfhash.c b/libslang/src/util/perfhash.c
new file mode 100644
index 0000000..b593b19
--- /dev/null
+++ b/libslang/src/util/perfhash.c
@@ -0,0 +1,600 @@
+/* Copyright (c) 1998, 2001 John E. Davis
+ *
+ * You may distribute under the terms of either the GNU General Public
+ * License or the Perl Artistic License.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+static char *C_Char_Map_Name = "Keyword_Hash_Table";
+static char *C_Hash_Function_Name = "keyword_hash";
+static char *C_Hash_Table_Type = "Keyword_Table_Type";
+static char *C_Hash_Table_Name = "Keyword_Table";
+static char *C_Is_Keyword_Function_Name = "is_keyword";
+
+typedef struct
+{
+ unsigned int hash;
+ unsigned int len;
+ unsigned int freq_statistic;
+ char *name;
+ char *type;
+}
+String_Table_Type;
+
+static String_Table_Type String_Table [256];
+
+static unsigned int Num_Strings;
+
+static unsigned int Max_Table_Value;
+static unsigned int Max_String_Len;
+static unsigned int Min_String_Len;
+static unsigned int Max_Hash_Value;
+static unsigned int Tweak_Step_Size;
+static int Use_Length = 1;
+
+static int Char_Table_Map [256];
+
+static void write_table (unsigned int min_hash, unsigned int max_hash)
+{
+ String_Table_Type *st, *st_max;
+ unsigned int i;
+
+ fprintf (stdout, "\n\
+typedef SLCONST struct\n\
+{\n\
+ char *name;\n\
+ unsigned int type;\n\
+}\n\
+%s;\n\
+",
+ C_Hash_Table_Type);
+
+ fprintf (stdout, "\nstatic %s %s [/* %u */] =\n{\n",
+ C_Hash_Table_Type, C_Hash_Table_Name, (max_hash - min_hash) + 1);
+ fprintf (stderr, "String Table Size: %u\n", (max_hash - min_hash) + 1);
+
+ for (i = min_hash; i <= max_hash; i++)
+ {
+ st = String_Table;
+ st_max = st + Num_Strings;
+ while (st < st_max)
+ {
+ if ((unsigned int) st->hash == i)
+ break;
+ st++;
+ }
+
+ if (st == st_max)
+ fprintf (stdout, " {NULL,0},\n");
+ else
+ fprintf (stdout, " {\"%s\",\t%s},\n", st->name, st->type);
+ }
+
+ fputs ("};\n\n", stdout);
+
+ fprintf (stdout, "\
+static %s *%s (char *str, unsigned int len)\n\
+{\n\
+ unsigned int hash;\n\
+ char *name;\n\
+ %s *kw;\n\
+\n\
+ if ((len < MIN_KEYWORD_LEN)\n\
+ || (len > MAX_KEYWORD_LEN))\n\
+ return NULL;\n\
+\n\
+ hash = %s (str, len);\n\
+ if ((hash > MAX_HASH_VALUE) || (hash < MIN_HASH_VALUE))\n\
+ return NULL;\n\
+\n\
+ kw = &%s[hash - MIN_HASH_VALUE];\n\
+ if ((NULL != (name = kw->name))\n\
+ && (*str == *name)\n\
+ && (0 == strcmp (str, name)))\n\
+ return kw;\n\
+ return NULL;\n\
+}\n\
+",
+ C_Hash_Table_Type, C_Is_Keyword_Function_Name,
+ C_Hash_Table_Type, C_Hash_Function_Name, C_Hash_Table_Name);
+}
+
+static unsigned int hash_function (int *char_map,
+ unsigned char *s, unsigned int len)
+{
+ unsigned int sum;
+
+ if (Use_Length) sum = len;
+ else sum = 0;
+
+ while (len)
+ {
+ len--;
+ sum += (unsigned int) char_map [s[len]];
+ }
+ return sum;
+}
+
+static unsigned int Frequency_Table [256];
+
+static void init_map (int *map)
+{
+ unsigned int i;
+
+ for (i = 0; i < 256; i++) map [i] = -1;
+
+ for (i = 0; i < Num_Strings; i++)
+ {
+ unsigned char *s;
+
+ s = (unsigned char *) String_Table[i].name;
+
+ while (*s != 0)
+ {
+ map [*s] = 0;
+ s++;
+ }
+ }
+}
+
+static void write_map (int *map)
+{
+ unsigned int i;
+ unsigned int min_hash, max_hash;
+ char *type;
+
+ min_hash = 0xFFFF;
+ max_hash = 0;
+ for (i = 0; i < Num_Strings; i++)
+ {
+ unsigned int h = String_Table[i].hash;
+ if (h < min_hash) min_hash = h;
+ if (h > max_hash) max_hash = h;
+#if 0
+ fprintf (stdout, "-->%s\t%u\n", String_Table[i].name, h);
+#endif
+ }
+
+ fprintf (stdout, "#ifndef SLCONST\n#define SLCONST const\n#endif\n");
+ fprintf (stdout, "#define MIN_HASH_VALUE\t%u\n", min_hash);
+ fprintf (stdout, "#define MAX_HASH_VALUE\t%u\n", max_hash);
+ fprintf (stdout, "#define MIN_KEYWORD_LEN %u\n", Min_String_Len);
+ fprintf (stdout, "#define MAX_KEYWORD_LEN %u\n", Max_String_Len);
+
+ for (i = 0; i < 256; i++)
+ if (map[i] == -1) map[i] = (max_hash + 1);
+
+ if (max_hash + 1 < 0xFF)
+ type = "unsigned char";
+ else if (max_hash + 1 < 0xFFFF)
+ type = "unsigned short";
+ else
+ type = "unsigned long";
+
+ fprintf (stderr, "Hash Table is of type %s with max hash %u\n",
+ type, max_hash);
+
+ fprintf (stdout, "\nstatic %s %s [256] =\n",
+ type, C_Char_Map_Name);
+
+ fprintf (stdout, "{\n ");
+
+ for (i = 0; i < 255; i++)
+ {
+ fprintf (stdout, "%3d, ", map[i]);
+ if ((i % 16) == 15)
+ fputs ("\n ", stdout);
+ }
+ fprintf (stdout, "%3d\n};\n", map[255]);
+
+ fputs ("\n", stdout);
+
+ fprintf (stdout, "static %s %s (char *s, unsigned int len)\n",
+ type, C_Hash_Function_Name);
+
+fprintf (stdout,"\
+{\n\
+ unsigned int sum;\n\
+\n\
+ sum = %s;\n\
+ while (len)\n\
+ {\n\
+ len--;\n\
+ sum += (unsigned int) %s [(unsigned char)s[len]];\n\
+ }\n\
+ return sum;\n\
+}\n\
+",
+ (Use_Length ? "len" : "0"),
+ C_Char_Map_Name);
+
+ write_table (min_hash, max_hash);
+}
+
+
+static unsigned int compute_hash (unsigned int i)
+{
+ String_Table_Type *s;
+ unsigned int hash;
+
+ s = String_Table + i;
+ hash = hash_function (Char_Table_Map, (unsigned char *) s->name, s->len);
+ return s->hash = hash;
+}
+
+
+static int tweak_character (unsigned char ch, unsigned int bad)
+{
+ unsigned int val;
+ unsigned int val_save;
+ unsigned int i, j;
+ unsigned int nvalues;
+
+ val_save = (unsigned int) Char_Table_Map [ch];
+ nvalues = Max_Table_Value;
+
+ val = 0;
+ while (nvalues)
+ {
+ val += Tweak_Step_Size;
+ val = val % Max_Table_Value;
+
+ Char_Table_Map[ch] = (int) val;
+
+ for (i = 0; i <= bad; i++)
+ {
+ unsigned int hash = compute_hash (i);
+ for (j = 0; j < i; j++)
+ {
+ if (hash == String_Table[j].hash)
+ break;
+ }
+ if (j != i)
+ break;
+ }
+
+ if (i > bad) return 0;
+ nvalues--;
+ }
+
+ Char_Table_Map [ch] = (int) val_save;
+#if 0
+ /* reset hashes */
+ for (i = 0; i <= bad; i++)
+ (void) compute_hash (i);
+#endif
+ return -1;
+}
+
+
+static void sort_according_to_frequency (unsigned char *s, unsigned int len)
+{
+ /* since len is small (I hope), I will be lazy */
+ unsigned char si, sj;
+ unsigned int fi;
+ unsigned int i, j, imax;
+
+ imax = len - 1;
+ for (i = 0; i < imax; i++)
+ {
+ si = s[i];
+ fi = Frequency_Table [si];
+
+ for (j = i + 1; j < len; j++)
+ {
+ sj = s[j];
+ if (Frequency_Table[sj] < fi)
+ {
+ s[i] = sj;
+ s[j] = si;
+ break;
+ }
+ }
+ }
+}
+
+static void create_frequency_table (unsigned int num)
+{
+ unsigned int i;
+
+ memset ((char *) Frequency_Table, 0, sizeof(Frequency_Table));
+
+ for (i = 0; i < num; i++)
+ {
+ unsigned char *s, ch;
+
+ s = String_Table[i].name;
+ while (0 != (ch = *s))
+ {
+ Frequency_Table [ch] += 1;
+ s++;
+ }
+ }
+}
+
+static int tweak_hash_function (unsigned int bad, unsigned int good)
+{
+ unsigned char unique_chars [256];
+ unsigned char bad_chars [256], good_chars[256];
+ unsigned char *s;
+ unsigned int i, len;
+
+
+ memset ((char *)unique_chars, 0, sizeof (unique_chars));
+ memset ((char *)bad_chars, 0, sizeof (bad_chars));
+ memset ((char *)good_chars, 0, sizeof (good_chars));
+
+ s = (unsigned char *) String_Table[bad].name;
+ while (*s != 0)
+ {
+ bad_chars [*s] = 1;
+ s++;
+ }
+
+ s = (unsigned char *) String_Table[good].name;
+ while (*s != 0)
+ {
+ good_chars [*s] = 1;
+ s++;
+ }
+
+ /* Find out the characters that are in good or bad, and not both. That
+ * way we are free to manipulate those to avoid the collision.
+ */
+ len = 0;
+ for (i = 0; i < 256; i++)
+ {
+ if (bad_chars[i])
+ {
+ if (good_chars [i] == 0)
+ unique_chars [len++] = i;
+ }
+ else if (good_chars [i])
+ unique_chars [len++] = i;
+ }
+
+ /* Unfortunately, the unique_chars may already be part of the words
+ * that have already been hashed. So, sort them according to how often
+ * they occur and deal with those that occur the least often first.
+ */
+#if 1
+ create_frequency_table (bad);
+#endif
+ sort_according_to_frequency (unique_chars, len);
+
+ for (i = 0; i < len; i++)
+ {
+ unsigned char ch = unique_chars [i];
+ if (0 == tweak_character (ch, bad))
+ return 0;
+ }
+
+ return -1;
+}
+
+static int perfect_hash (void)
+{
+ unsigned int i, j;
+ unsigned int hash;
+ int has_collisions = 1;
+
+ for (i = 0; i < Num_Strings; i++)
+ {
+ hash = compute_hash (i);
+ for (j = 0; j < i; j++)
+ {
+ if (hash != String_Table[j].hash)
+ continue;
+
+ /* Oops. We have a collision. tweak_hash_function will
+ * adjust the hash table array to resolve this collision
+ * and ensure that previous ones remain resolved.
+ */
+ if (-1 == tweak_hash_function (i, j))
+ {
+ has_collisions = 1;
+ /* return -1; */
+ }
+ break;
+ }
+ }
+
+ if (has_collisions == 0)
+ return 0;
+
+ has_collisions = 0;
+ /* Now check for collisions */
+ for (i = 0; i < Num_Strings; i++)
+ {
+ char *s = String_Table[i].name;
+ hash = compute_hash (i);
+
+ for (j = 0; j < i; j++)
+ {
+ if (hash != String_Table[j].hash)
+ continue;
+
+ has_collisions++;
+ fprintf (stderr, "Collision: %s, %s\n", s, String_Table [j].name);
+ }
+ }
+
+ if (has_collisions)
+ return -1;
+
+ return 0;
+}
+
+
+static int sort_function (String_Table_Type *a, String_Table_Type *b)
+{
+ return b->freq_statistic - a->freq_statistic;
+}
+
+static void sort_strings (void)
+{
+ unsigned int i;
+ void (*qsort_fun) (String_Table_Type *, unsigned int,
+ unsigned int, int (*)(String_Table_Type *, String_Table_Type *));
+
+ for (i = 0; i < Num_Strings; i++)
+ {
+ int f;
+ unsigned char *s;
+
+ f = 0;
+ s = (unsigned char *) String_Table[i].name;
+ while (*s != 0) f += Frequency_Table [*s++];
+ String_Table [i].freq_statistic = f;
+ }
+
+ qsort_fun = (void (*) (String_Table_Type *, unsigned int,
+ unsigned int,
+ int (*)(String_Table_Type *, String_Table_Type *)))
+ qsort;
+
+ qsort_fun (String_Table, Num_Strings, sizeof (String_Table_Type),
+ sort_function);
+}
+
+int main (int argc, char **argv)
+{
+ char line[256];
+ unsigned int count;
+ unsigned int min_char;
+ unsigned int max_char;
+ unsigned int min_len;
+ unsigned int max_len;
+ unsigned int i;
+
+ Tweak_Step_Size = 5;
+
+ if (isatty (0) || (argc > 2))
+ {
+ fprintf (stderr, "Usage: %s [step-size] < keywords > hash.c\n", argv[0]);
+ fprintf (stderr, " Default step-size is %u\n", Tweak_Step_Size);
+ return 1;
+ }
+
+ if (argc > 1)
+ {
+ if (1 != sscanf (argv[1], "%u", &Tweak_Step_Size))
+ Tweak_Step_Size = 5;
+
+ if ((Tweak_Step_Size % 2) == 0) Tweak_Step_Size++;
+ }
+
+ count = 0;
+ min_char = 255;
+ max_char = 0;
+ max_len = 0;
+ min_len = 0xFFFF;
+
+ while (NULL != fgets (line, sizeof (line), stdin))
+ {
+ char *s;
+ unsigned int len;
+ char *type;
+
+ if (*line == '%') continue;
+
+ if (count == 256)
+ {
+ fprintf (stderr, "Only 256 keywords permitted.\n");
+ return 1;
+ }
+
+ len = strlen (line);
+ if (len && (line [len - 1] == '\n'))
+ {
+ len--;
+ line [len] = 0;
+ }
+
+ if (len == 0)
+ continue;
+
+ s = malloc (len + 1);
+ if (s == NULL)
+ {
+ fprintf (stderr, "Malloc error.\n");
+ return 1;
+ }
+ strcpy (s, line);
+ String_Table[count].name = s;
+
+ type = s;
+ while (*type && (*type != ' ') && (*type != '\t'))
+ type++;
+
+ len = (unsigned int) (type - s);
+ String_Table [count].len = len;
+ if (len > max_len) max_len = len;
+ if (len < min_len) min_len = len;
+
+ if (*type != 0)
+ {
+ *type++ = 0;
+ while ((*type == ' ') || (*type == '\t'))
+ type++;
+ }
+
+ String_Table [count].type = type;
+
+
+ for (i = 0; i < len; i++)
+ {
+ unsigned char ch;
+
+ ch = (unsigned char) s[i];
+
+ if (ch < min_char)
+ min_char = ch;
+ if (ch > max_char)
+ max_char = ch;
+ }
+
+ count++;
+ }
+
+
+ Max_String_Len = max_len;
+ Min_String_Len = min_len;
+
+ Num_Strings = count;
+ Max_Table_Value = 1;
+ while (Max_Table_Value < Num_Strings)
+ Max_Table_Value = Max_Table_Value << 1;
+
+ Max_Hash_Value = Max_Table_Value * Max_String_Len;
+
+ fprintf (stderr, "Theoretical Max_Table_Value: %u\n", Max_Table_Value);
+ fprintf (stderr, "Theoretical Max_Hash_Value: %u\n", Max_Hash_Value);
+
+ create_frequency_table (Num_Strings);
+ sort_strings ();
+
+ init_map (Char_Table_Map);
+ if (-1 == perfect_hash ())
+ {
+ fprintf (stderr, "Hash failed.\n");
+ return -1;
+ }
+
+ fprintf (stderr, "Success.\n");
+
+ fprintf (stdout, "/* Perfect hash generated by command line:\n *");
+ for (i = 0; i < argc; i++)
+ fprintf (stdout, " %s", argv[i]);
+ fputs ("\n */\n", stdout);
+
+ write_map (Char_Table_Map);
+ return 0;
+}
+
+
+
+
+