diff options
Diffstat (limited to 'libslang/src/util')
-rw-r--r-- | libslang/src/util/bcdump.c | 457 | ||||
-rw-r--r-- | libslang/src/util/chkproto.c | 225 | ||||
-rw-r--r-- | libslang/src/util/keywords.lis | 73 | ||||
-rw-r--r-- | libslang/src/util/perfhash.c | 600 |
4 files changed, 1355 insertions, 0 deletions
diff --git a/libslang/src/util/bcdump.c b/libslang/src/util/bcdump.c new file mode 100644 index 0000000..04c64e4 --- /dev/null +++ b/libslang/src/util/bcdump.c @@ -0,0 +1,457 @@ +#include <stdio.h> +#include <stdio.h> +#include "sl-feat.h" +#include "slang.h" +#include "_slang.h" + +static void dump_token (_SLang_Token_Type *t) +{ + char buf [256], *b; + + b = buf; + + if (SLang_Error) + return; + + switch (t->type) + { + case IDENT_TOKEN: + b = t->v.s_val; + break; + + case CHAR_TOKEN: + case INT_TOKEN: + sprintf (buf, "%ld", t->v.long_val); + break; + + case DOUBLE_TOKEN: + b = t->v.s_val; + break; + + case STRING_TOKEN: + sprintf (buf, "\"%s\"", t->v.s_val); + break; + + case PLUSPLUS_TOKEN: + sprintf (buf, "++%s", t->v.s_val); + break; + + case POST_PLUSPLUS_TOKEN: + sprintf (buf, "%s++", t->v.s_val); + break; + + case MINUSMINUS_TOKEN: + sprintf (buf, "--%s", t->v.s_val); + break; + + case POST_MINUSMINUS_TOKEN: + sprintf (buf, "%s--", t->v.s_val); + break; + + case MINUSEQS_TOKEN: + sprintf (buf, "-=%s", t->v.s_val); + break; + + case PLUSEQS_TOKEN: + sprintf (buf, "+=%s", t->v.s_val); + break; + + case ASSIGN_TOKEN: + sprintf (buf, "=%s", t->v.s_val); + break; + + case EOF_TOKEN: + b = "EOF_TOKEN"; + break; + + case NOP_TOKEN: + b = "NOP_TOKEN"; + break; + + case FOREVER_TOKEN: + b = "forever"; + break; + + case ARG_TOKEN: + b = "__args"; + break; + + case EARG_TOKEN: + b = "__eargs"; + break; + + case FARG_TOKEN: + b = "__farg"; + break; + + case _INLINE_ARRAY_TOKEN: + b = "__inline_array"; + break; + + case _INLINE_IMPLICIT_ARRAY_TOKEN: + b = "__inline_implicit_array"; + break; + + case IFNOT_TOKEN: + b = "!if"; + break; + + case ABS_TOKEN: + b = "abs"; + break; + + case LT_TOKEN: + b = "<"; + break; + + case LE_TOKEN: + b = "<="; + break; + + case GT_TOKEN: + b = ">"; + break; + + case GE_TOKEN: + b = ">="; + break; + + case EQ_TOKEN: + b = "=="; + break; + + case NE_TOKEN: + b = "!="; + break; + + case AND_TOKEN: + b = "and"; + break; + + case IF_TOKEN: + b = "if"; + break; + + case POP_TOKEN: + b = "pop"; + break; + + case ANDELSE_TOKEN: + b = "andelse"; + break; + + case BXOR_TOKEN: + b = "xor"; + break; + + case BAND_TOKEN: + b = "&"; + break; + + case BOR_TOKEN: + b = "|"; + break; + + case BNOT_TOKEN: + b = "~"; + break; + + case SHR_TOKEN: + b = "shr"; + break; + + case CHS_TOKEN: + b = "chs"; + break; + + case SHL_TOKEN: + b = "shl"; + break; + + case SQR_TOKEN: + b = "sqr"; + break; + + case CASE_TOKEN: + b = "case"; + break; + + case SIGN_TOKEN: + b = "sign"; + break; + + case BREAK_TOKEN: + b = "break"; + break; + + case STATIC_TOKEN: + b = "static"; + break; + + case STRUCT_TOKEN: + b = "struct"; + break; + + case RETURN_TOKEN: + b = "return"; + break; + + case SWITCH_TOKEN: + b = "switch"; + break; + + case EXCH_TOKEN: + b = "exch"; + break; + + case CONT_TOKEN: + b = "continue"; + break; + + case EXITBLK_TOKEN: + b = "EXIT_BLOCK"; + break; + + case ERRBLK_TOKEN: + b = "ERROR_BLOCK"; + break; + + case USRBLK0_TOKEN: + b = "USER_BLOCK0"; + break; + + case USRBLK1_TOKEN: + b = "USER_BLOCK1"; + break; + + case USRBLK2_TOKEN: + b = "USER_BLOCK2"; + break; + + case USRBLK3_TOKEN: + b = "USER_BLOCK3"; + break; + + case USRBLK4_TOKEN: + b = "USER_BLOCK4"; + break; + + case ELSE_TOKEN: + b = "else"; + break; + + case MUL2_TOKEN: + b = "mul2"; + break; + + case DEFINE_TOKEN: + b = ")"; + break; + + case DEFINE_STATIC_TOKEN: + b = ")static"; + break; + + case LOOP_TOKEN: + b = "loop"; + break; + + case MOD_TOKEN: + b = "mod"; + break; + + case DO_TOKEN: + b = "do"; + break; + + case DOWHILE_TOKEN: + b = "do_while"; + break; + + case WHILE_TOKEN: + b = "while"; + break; + + case OR_TOKEN: + b = "or"; + break; + + case VARIABLE_TOKEN: + b = "variable"; + break; + + case _SCALAR_ASSIGN_TOKEN: + sprintf (buf, "=%s", t->v.s_val); + break; + + case _SCALAR_PLUSEQS_TOKEN: + sprintf (buf, "+=%s", t->v.s_val); + break; + + case _SCALAR_MINUSEQS_TOKEN: + sprintf (buf, "-=%s", t->v.s_val); + break; + + case _SCALAR_PLUSPLUS_TOKEN: + sprintf (buf, "++%s", t->v.s_val); + break; + + case _SCALAR_POST_PLUSPLUS_TOKEN: + sprintf (buf, "%s++", t->v.s_val); + break; + + case _SCALAR_MINUSMINUS_TOKEN: + sprintf (buf, "--%s", t->v.s_val); + break; + + case _SCALAR_POST_MINUSMINUS_TOKEN: + sprintf (buf, "%s--", t->v.s_val); + break; + + case _DEREF_ASSIGN_TOKEN: + sprintf (buf, "=@%s", t->v.s_val); + break; + + case _REF_TOKEN: + sprintf (buf, "%s __ref", t->v.s_val); + break; + + case ORELSE_TOKEN: + b = "orelse"; + break; + + case _FOR_TOKEN: + b = "_for"; + break; + + case FOR_TOKEN: + b = "for"; + break; + + case NOT_TOKEN: + b = "not"; + break; + + case OBRACKET_TOKEN: + b = "["; + break; + + case CBRACKET_TOKEN: + b = "]"; + break; + + case OPAREN_TOKEN: + b = "("; + break; + + case CPAREN_TOKEN: + b = ")"; + break; + + case OBRACE_TOKEN: + b = "{"; + break; + + case CBRACE_TOKEN: + b = "}"; + break; + + case DEREF_TOKEN: + b = "@"; + break; + + case COMMA_TOKEN: + b = ","; + break; + + case SEMICOLON_TOKEN: + b = ";"; + break; + + case COLON_TOKEN: + b = ":"; + break; + + case ADD_TOKEN: + b = "+"; + break; + + case SUB_TOKEN: + b = "-"; + break; + + /* case MUL_TOKEN: */ + /* b = "*"; */ + /* break; */ + + case DIV_TOKEN: + b = "/"; + break; + + case ARRAY_TOKEN: + b = "__aget"; + break; + + case _ARRAY_ASSIGN_TOKEN: + b = "__aput"; + break; + + case DOT_TOKEN: + sprintf (buf, "%s .", t->v.s_val); + break; + + case METHOD_TOKEN: + sprintf (buf, "%s __eargs __method_call", t->v.s_val); + break; + + case _STRUCT_ASSIGN_TOKEN: + b = "__struct_eqs"; break; + case _STRUCT_PLUSEQS_TOKEN: + b = "__struct_pluseqs"; break; + case _STRUCT_MINUSEQS_TOKEN: + b = "__struct_minuseqs"; break; + case _STRUCT_PLUSPLUS_TOKEN: + b = "__struct_plusplus"; break; + case _STRUCT_POST_PLUSPLUS_TOKEN: + b = "__struct_pplusplus"; break; + case _STRUCT_MINUSMINUS_TOKEN: + b = "__struct_minusminus"; break; + case _STRUCT_POST_MINUSMINUS_TOKEN: + b = "__struct_pminusminus"; break; + + case _NULL_TOKEN: b = "NULL"; break; + + case USING_TOKEN: + b = "__using__"; break; + case FOREACH_TOKEN: + b = "__foreach__"; break; + + default: + sprintf (buf, "____UNKNOWN___0x%X", t->type); + break; + } + + fprintf (stdout, "0x%2X: %s\n", t->type, b); +} + + +int main (int argc, char **argv) +{ + char *file; + + if (argc == 2) + { + /* fprintf (stderr, "Usage: %s <filename>\n", argv[0]); */ + file = argv[1]; + /* return 1; */ + } + else file = NULL; + + if (-1 == SLang_init_slang ()) + return 1; + + _SLcompile_ptr = dump_token; + SLang_load_file (file); + + return SLang_Error; +} diff --git a/libslang/src/util/chkproto.c b/libslang/src/util/chkproto.c new file mode 100644 index 0000000..d4a6e23 --- /dev/null +++ b/libslang/src/util/chkproto.c @@ -0,0 +1,225 @@ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> + +static char *Make_Intrinsic_Forms [] = +{ + "0", + "I", + "S", + "II", + "SS", + "SI", + "IS", + "III", + "IIS", + "ISI", + "ISS", + "SII", + "SIS", + "SSI", + "SSS", + "IIII", + "SSSS", + NULL +}; + + +static char *output_start (FILE *fp) +{ + char **form; + + form = Make_Intrinsic_Forms; + + while (*form != NULL) + { + char *r = "0ISD"; + char *f; + + while (*r != 0) + { + switch (*r) + { + case '0': + fprintf (fp, "static void (*V_F"); + break; + + case 'I': + fprintf (fp, "static int (*I_F"); + break; + + case 'S': + fprintf (fp, "static char *(*S_F"); + break; + + case 'D': + fprintf (fp, "static double (*D_F"); + break; + } + + f = *form; + fprintf (fp, "%s)(", f); + while (*f != 0) + { + if (f != *form) fputc (',', fp); + switch (*f) + { + case 'I': + fputs ("int*", fp); + break; + + case 'S': + fputs ("char*", fp); + break; + + case '0': + fputs ("void", fp); + break; + } + f++; + } + fputs (");\n", fp); + + r++; + } + form++; + } + + fputs ("\n", fp); + + fprintf (fp, "static void chkproto_not_used_fun (void)\n{\n"); + + return 0; +} + +static void output_finish (FILE *fp) +{ + fputs ("\n}\n", fp); +} + + +static char *skip_whitespace (char *p) +{ + while ((*p == ' ') || (*p == '\t') || (*p == '\n')) + p++; + + return p; +} + +static int do_make_intrinsic (unsigned int linenum, char *p) +{ + char *e; + char *name; + int ret_type; + + e = p; + while (*e && (*e != ' ') && (*e != '\t') && (*e != '(')) + e++; + + if (*e == 0) + { + return -1; + } + *e++ = 0; + + /* We expect form: IIS("name", function_name, RETURN_TYPE) */ + while (*e && (*e != ',')) e++; + if (*e == 0) + { + return -1; + } + e = skip_whitespace (e + 1); + name = e; + if (*name == 0) + { + return -1; + } + while (*e && (*e != ',')) e++; + if (*e == 0) + { + return -1; + } + *e = 0; + + e = skip_whitespace (e + 1); + if (0 == strncmp (e, "SLANG_", 6)) + e += 6; + + if (0 == strncmp (e, "VOID_TYPE", 8)) + ret_type = 'V'; + else if (0 == strncmp (e, "STRING_TYPE", 11)) + ret_type = 'S'; + else if (0 == strncmp (e, "INT_TYPE", 8)) + ret_type = 'I'; + else if (0 == strncmp (e, "DOUBLE_TYPE", 8)) + ret_type = 'D'; + else + { + fprintf (stderr, "return type on line %u is not supported\n", linenum); + return -1; + } + + fprintf (stdout, " %c_F%s = %s;\n", ret_type, p, name); + return 0; +} + + +int main (int argc, char **argv) +{ + char line [1024]; + FILE *fp; + unsigned int linenum; + + if (isatty (0) + || (argc > 1)) + { + fprintf (stderr, "Usage: %s < infile > outfile\n", argv[0]); + exit (1); + } + + output_start (stdout); + + fp = stdin; + + linenum = 0; + while (NULL != fgets (line, sizeof (line), fp)) + { + char *p = skip_whitespace (line); + + linenum++; + + if (*p != 'M') continue; + + if (0 != strncmp (p, "MAKE_INTRINSIC", 14)) + continue; + + if (p[14] != '_') + { + fprintf (stderr, "Warning: line %u is old-fashioned\n", linenum); + continue; + } + + p += 15; + + switch (*p) + { + case 'I': + case 'S': + case '0': + do_make_intrinsic (linenum, p); + break; + + default: + fprintf (stderr, "Warning: Unable to handle MAKE_INTRINSIC form on line %u\n", + linenum); + break; + } + } + + output_finish (stdout); + return 0; +} + + + diff --git a/libslang/src/util/keywords.lis b/libslang/src/util/keywords.lis new file mode 100644 index 0000000..b794a59 --- /dev/null +++ b/libslang/src/util/keywords.lis @@ -0,0 +1,73 @@ +!if IFNOT_TOKEN +ERROR_BLOCK ERRBLK_TOKEN +EXIT_BLOCK EXITBLK_TOKEN +USER_BLOCK0 USRBLK0_TOKEN +USER_BLOCK1 USRBLK1_TOKEN +USER_BLOCK2 USRBLK2_TOKEN +USER_BLOCK3 USRBLK3_TOKEN +USER_BLOCK4 USRBLK4_TOKEN +__tmp TMP_TOKEN +_for _FOR_TOKEN +abs ABS_TOKEN +and AND_TOKEN +andelse ANDELSE_TOKEN +break BREAK_TOKEN +case CASE_TOKEN +chs CHS_TOKEN +continue CONT_TOKEN +define DEFINE_TOKEN +do DO_TOKEN +do_while DOWHILE_TOKEN +else ELSE_TOKEN +exch EXCH_TOKEN +for FOR_TOKEN +foreach FOREACH_TOKEN +forever FOREVER_TOKEN +if IF_TOKEN +loop LOOP_TOKEN +mod MOD_TOKEN +mul2 MUL2_TOKEN +not NOT_TOKEN +or OR_TOKEN +orelse ORELSE_TOKEN +pop POP_TOKEN +private PRIVATE_TOKEN +public PUBLIC_TOKEN +return RETURN_TOKEN +shl SHL_TOKEN +shr SHR_TOKEN +sign SIGN_TOKEN +sqr SQR_TOKEN +static STATIC_TOKEN +struct STRUCT_TOKEN +switch SWITCH_TOKEN +typedef TYPEDEF_TOKEN +using USING_TOKEN +variable VARIABLE_TOKEN +while WHILE_TOKEN +xor BXOR_TOKEN +% +% The kewords below here are commented out because they are not +% used. Most of them could be used if one wanted to write in RPN form +% but I want to discontinue it +% +%push PUSH_TOKEN +%readonly READONLY_TOKEN +%__ref _REF_TOKEN +%__aput _ARRAY_ASSIGN_TOKEN +%__aput_minuseqs _ARRAY_MINUSEQS_TOKEN +%__aput_minusminus _ARRAY_MINUSMINUS_TOKEN +%__aput_pluseqs _ARRAY_PLUSEQS_TOKEN +%__aput_plusplus _ARRAY_PLUSPLUS_TOKEN +%__aput_pminusminus _ARRAY_POST_MINUSMINUS_TOKEN +%__aput_pplusplus _ARRAY_POST_PLUSPLUS_TOKEN +%__arg ARG_TOKEN +%__array ARRAY_TOKEN +%__earg EARG_TOKEN +%__struct_eqs _STRUCT_ASSIGN_TOKEN +%__struct_minuseqs _STRUCT_MINUSEQS_TOKEN +%__struct_minusminus _STRUCT_MINUSMINUS_TOKEN +%__struct_pluseqs _STRUCT_PLUSEQS_TOKEN +%__struct_plusplus _STRUCT_PLUSPLUS_TOKEN +%__struct_pminusminus _STRUCT_POST_MINUSMINUS_TOKEN +%__struct_pplusplus _STRUCT_POST_PLUSPLUS_TOKEN diff --git a/libslang/src/util/perfhash.c b/libslang/src/util/perfhash.c new file mode 100644 index 0000000..b593b19 --- /dev/null +++ b/libslang/src/util/perfhash.c @@ -0,0 +1,600 @@ +/* Copyright (c) 1998, 2001 John E. Davis + * + * You may distribute under the terms of either the GNU General Public + * License or the Perl Artistic License. + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +static char *C_Char_Map_Name = "Keyword_Hash_Table"; +static char *C_Hash_Function_Name = "keyword_hash"; +static char *C_Hash_Table_Type = "Keyword_Table_Type"; +static char *C_Hash_Table_Name = "Keyword_Table"; +static char *C_Is_Keyword_Function_Name = "is_keyword"; + +typedef struct +{ + unsigned int hash; + unsigned int len; + unsigned int freq_statistic; + char *name; + char *type; +} +String_Table_Type; + +static String_Table_Type String_Table [256]; + +static unsigned int Num_Strings; + +static unsigned int Max_Table_Value; +static unsigned int Max_String_Len; +static unsigned int Min_String_Len; +static unsigned int Max_Hash_Value; +static unsigned int Tweak_Step_Size; +static int Use_Length = 1; + +static int Char_Table_Map [256]; + +static void write_table (unsigned int min_hash, unsigned int max_hash) +{ + String_Table_Type *st, *st_max; + unsigned int i; + + fprintf (stdout, "\n\ +typedef SLCONST struct\n\ +{\n\ + char *name;\n\ + unsigned int type;\n\ +}\n\ +%s;\n\ +", + C_Hash_Table_Type); + + fprintf (stdout, "\nstatic %s %s [/* %u */] =\n{\n", + C_Hash_Table_Type, C_Hash_Table_Name, (max_hash - min_hash) + 1); + fprintf (stderr, "String Table Size: %u\n", (max_hash - min_hash) + 1); + + for (i = min_hash; i <= max_hash; i++) + { + st = String_Table; + st_max = st + Num_Strings; + while (st < st_max) + { + if ((unsigned int) st->hash == i) + break; + st++; + } + + if (st == st_max) + fprintf (stdout, " {NULL,0},\n"); + else + fprintf (stdout, " {\"%s\",\t%s},\n", st->name, st->type); + } + + fputs ("};\n\n", stdout); + + fprintf (stdout, "\ +static %s *%s (char *str, unsigned int len)\n\ +{\n\ + unsigned int hash;\n\ + char *name;\n\ + %s *kw;\n\ +\n\ + if ((len < MIN_KEYWORD_LEN)\n\ + || (len > MAX_KEYWORD_LEN))\n\ + return NULL;\n\ +\n\ + hash = %s (str, len);\n\ + if ((hash > MAX_HASH_VALUE) || (hash < MIN_HASH_VALUE))\n\ + return NULL;\n\ +\n\ + kw = &%s[hash - MIN_HASH_VALUE];\n\ + if ((NULL != (name = kw->name))\n\ + && (*str == *name)\n\ + && (0 == strcmp (str, name)))\n\ + return kw;\n\ + return NULL;\n\ +}\n\ +", + C_Hash_Table_Type, C_Is_Keyword_Function_Name, + C_Hash_Table_Type, C_Hash_Function_Name, C_Hash_Table_Name); +} + +static unsigned int hash_function (int *char_map, + unsigned char *s, unsigned int len) +{ + unsigned int sum; + + if (Use_Length) sum = len; + else sum = 0; + + while (len) + { + len--; + sum += (unsigned int) char_map [s[len]]; + } + return sum; +} + +static unsigned int Frequency_Table [256]; + +static void init_map (int *map) +{ + unsigned int i; + + for (i = 0; i < 256; i++) map [i] = -1; + + for (i = 0; i < Num_Strings; i++) + { + unsigned char *s; + + s = (unsigned char *) String_Table[i].name; + + while (*s != 0) + { + map [*s] = 0; + s++; + } + } +} + +static void write_map (int *map) +{ + unsigned int i; + unsigned int min_hash, max_hash; + char *type; + + min_hash = 0xFFFF; + max_hash = 0; + for (i = 0; i < Num_Strings; i++) + { + unsigned int h = String_Table[i].hash; + if (h < min_hash) min_hash = h; + if (h > max_hash) max_hash = h; +#if 0 + fprintf (stdout, "-->%s\t%u\n", String_Table[i].name, h); +#endif + } + + fprintf (stdout, "#ifndef SLCONST\n#define SLCONST const\n#endif\n"); + fprintf (stdout, "#define MIN_HASH_VALUE\t%u\n", min_hash); + fprintf (stdout, "#define MAX_HASH_VALUE\t%u\n", max_hash); + fprintf (stdout, "#define MIN_KEYWORD_LEN %u\n", Min_String_Len); + fprintf (stdout, "#define MAX_KEYWORD_LEN %u\n", Max_String_Len); + + for (i = 0; i < 256; i++) + if (map[i] == -1) map[i] = (max_hash + 1); + + if (max_hash + 1 < 0xFF) + type = "unsigned char"; + else if (max_hash + 1 < 0xFFFF) + type = "unsigned short"; + else + type = "unsigned long"; + + fprintf (stderr, "Hash Table is of type %s with max hash %u\n", + type, max_hash); + + fprintf (stdout, "\nstatic %s %s [256] =\n", + type, C_Char_Map_Name); + + fprintf (stdout, "{\n "); + + for (i = 0; i < 255; i++) + { + fprintf (stdout, "%3d, ", map[i]); + if ((i % 16) == 15) + fputs ("\n ", stdout); + } + fprintf (stdout, "%3d\n};\n", map[255]); + + fputs ("\n", stdout); + + fprintf (stdout, "static %s %s (char *s, unsigned int len)\n", + type, C_Hash_Function_Name); + +fprintf (stdout,"\ +{\n\ + unsigned int sum;\n\ +\n\ + sum = %s;\n\ + while (len)\n\ + {\n\ + len--;\n\ + sum += (unsigned int) %s [(unsigned char)s[len]];\n\ + }\n\ + return sum;\n\ +}\n\ +", + (Use_Length ? "len" : "0"), + C_Char_Map_Name); + + write_table (min_hash, max_hash); +} + + +static unsigned int compute_hash (unsigned int i) +{ + String_Table_Type *s; + unsigned int hash; + + s = String_Table + i; + hash = hash_function (Char_Table_Map, (unsigned char *) s->name, s->len); + return s->hash = hash; +} + + +static int tweak_character (unsigned char ch, unsigned int bad) +{ + unsigned int val; + unsigned int val_save; + unsigned int i, j; + unsigned int nvalues; + + val_save = (unsigned int) Char_Table_Map [ch]; + nvalues = Max_Table_Value; + + val = 0; + while (nvalues) + { + val += Tweak_Step_Size; + val = val % Max_Table_Value; + + Char_Table_Map[ch] = (int) val; + + for (i = 0; i <= bad; i++) + { + unsigned int hash = compute_hash (i); + for (j = 0; j < i; j++) + { + if (hash == String_Table[j].hash) + break; + } + if (j != i) + break; + } + + if (i > bad) return 0; + nvalues--; + } + + Char_Table_Map [ch] = (int) val_save; +#if 0 + /* reset hashes */ + for (i = 0; i <= bad; i++) + (void) compute_hash (i); +#endif + return -1; +} + + +static void sort_according_to_frequency (unsigned char *s, unsigned int len) +{ + /* since len is small (I hope), I will be lazy */ + unsigned char si, sj; + unsigned int fi; + unsigned int i, j, imax; + + imax = len - 1; + for (i = 0; i < imax; i++) + { + si = s[i]; + fi = Frequency_Table [si]; + + for (j = i + 1; j < len; j++) + { + sj = s[j]; + if (Frequency_Table[sj] < fi) + { + s[i] = sj; + s[j] = si; + break; + } + } + } +} + +static void create_frequency_table (unsigned int num) +{ + unsigned int i; + + memset ((char *) Frequency_Table, 0, sizeof(Frequency_Table)); + + for (i = 0; i < num; i++) + { + unsigned char *s, ch; + + s = String_Table[i].name; + while (0 != (ch = *s)) + { + Frequency_Table [ch] += 1; + s++; + } + } +} + +static int tweak_hash_function (unsigned int bad, unsigned int good) +{ + unsigned char unique_chars [256]; + unsigned char bad_chars [256], good_chars[256]; + unsigned char *s; + unsigned int i, len; + + + memset ((char *)unique_chars, 0, sizeof (unique_chars)); + memset ((char *)bad_chars, 0, sizeof (bad_chars)); + memset ((char *)good_chars, 0, sizeof (good_chars)); + + s = (unsigned char *) String_Table[bad].name; + while (*s != 0) + { + bad_chars [*s] = 1; + s++; + } + + s = (unsigned char *) String_Table[good].name; + while (*s != 0) + { + good_chars [*s] = 1; + s++; + } + + /* Find out the characters that are in good or bad, and not both. That + * way we are free to manipulate those to avoid the collision. + */ + len = 0; + for (i = 0; i < 256; i++) + { + if (bad_chars[i]) + { + if (good_chars [i] == 0) + unique_chars [len++] = i; + } + else if (good_chars [i]) + unique_chars [len++] = i; + } + + /* Unfortunately, the unique_chars may already be part of the words + * that have already been hashed. So, sort them according to how often + * they occur and deal with those that occur the least often first. + */ +#if 1 + create_frequency_table (bad); +#endif + sort_according_to_frequency (unique_chars, len); + + for (i = 0; i < len; i++) + { + unsigned char ch = unique_chars [i]; + if (0 == tweak_character (ch, bad)) + return 0; + } + + return -1; +} + +static int perfect_hash (void) +{ + unsigned int i, j; + unsigned int hash; + int has_collisions = 1; + + for (i = 0; i < Num_Strings; i++) + { + hash = compute_hash (i); + for (j = 0; j < i; j++) + { + if (hash != String_Table[j].hash) + continue; + + /* Oops. We have a collision. tweak_hash_function will + * adjust the hash table array to resolve this collision + * and ensure that previous ones remain resolved. + */ + if (-1 == tweak_hash_function (i, j)) + { + has_collisions = 1; + /* return -1; */ + } + break; + } + } + + if (has_collisions == 0) + return 0; + + has_collisions = 0; + /* Now check for collisions */ + for (i = 0; i < Num_Strings; i++) + { + char *s = String_Table[i].name; + hash = compute_hash (i); + + for (j = 0; j < i; j++) + { + if (hash != String_Table[j].hash) + continue; + + has_collisions++; + fprintf (stderr, "Collision: %s, %s\n", s, String_Table [j].name); + } + } + + if (has_collisions) + return -1; + + return 0; +} + + +static int sort_function (String_Table_Type *a, String_Table_Type *b) +{ + return b->freq_statistic - a->freq_statistic; +} + +static void sort_strings (void) +{ + unsigned int i; + void (*qsort_fun) (String_Table_Type *, unsigned int, + unsigned int, int (*)(String_Table_Type *, String_Table_Type *)); + + for (i = 0; i < Num_Strings; i++) + { + int f; + unsigned char *s; + + f = 0; + s = (unsigned char *) String_Table[i].name; + while (*s != 0) f += Frequency_Table [*s++]; + String_Table [i].freq_statistic = f; + } + + qsort_fun = (void (*) (String_Table_Type *, unsigned int, + unsigned int, + int (*)(String_Table_Type *, String_Table_Type *))) + qsort; + + qsort_fun (String_Table, Num_Strings, sizeof (String_Table_Type), + sort_function); +} + +int main (int argc, char **argv) +{ + char line[256]; + unsigned int count; + unsigned int min_char; + unsigned int max_char; + unsigned int min_len; + unsigned int max_len; + unsigned int i; + + Tweak_Step_Size = 5; + + if (isatty (0) || (argc > 2)) + { + fprintf (stderr, "Usage: %s [step-size] < keywords > hash.c\n", argv[0]); + fprintf (stderr, " Default step-size is %u\n", Tweak_Step_Size); + return 1; + } + + if (argc > 1) + { + if (1 != sscanf (argv[1], "%u", &Tweak_Step_Size)) + Tweak_Step_Size = 5; + + if ((Tweak_Step_Size % 2) == 0) Tweak_Step_Size++; + } + + count = 0; + min_char = 255; + max_char = 0; + max_len = 0; + min_len = 0xFFFF; + + while (NULL != fgets (line, sizeof (line), stdin)) + { + char *s; + unsigned int len; + char *type; + + if (*line == '%') continue; + + if (count == 256) + { + fprintf (stderr, "Only 256 keywords permitted.\n"); + return 1; + } + + len = strlen (line); + if (len && (line [len - 1] == '\n')) + { + len--; + line [len] = 0; + } + + if (len == 0) + continue; + + s = malloc (len + 1); + if (s == NULL) + { + fprintf (stderr, "Malloc error.\n"); + return 1; + } + strcpy (s, line); + String_Table[count].name = s; + + type = s; + while (*type && (*type != ' ') && (*type != '\t')) + type++; + + len = (unsigned int) (type - s); + String_Table [count].len = len; + if (len > max_len) max_len = len; + if (len < min_len) min_len = len; + + if (*type != 0) + { + *type++ = 0; + while ((*type == ' ') || (*type == '\t')) + type++; + } + + String_Table [count].type = type; + + + for (i = 0; i < len; i++) + { + unsigned char ch; + + ch = (unsigned char) s[i]; + + if (ch < min_char) + min_char = ch; + if (ch > max_char) + max_char = ch; + } + + count++; + } + + + Max_String_Len = max_len; + Min_String_Len = min_len; + + Num_Strings = count; + Max_Table_Value = 1; + while (Max_Table_Value < Num_Strings) + Max_Table_Value = Max_Table_Value << 1; + + Max_Hash_Value = Max_Table_Value * Max_String_Len; + + fprintf (stderr, "Theoretical Max_Table_Value: %u\n", Max_Table_Value); + fprintf (stderr, "Theoretical Max_Hash_Value: %u\n", Max_Hash_Value); + + create_frequency_table (Num_Strings); + sort_strings (); + + init_map (Char_Table_Map); + if (-1 == perfect_hash ()) + { + fprintf (stderr, "Hash failed.\n"); + return -1; + } + + fprintf (stderr, "Success.\n"); + + fprintf (stdout, "/* Perfect hash generated by command line:\n *"); + for (i = 0; i < argc; i++) + fprintf (stdout, " %s", argv[i]); + fputs ("\n */\n", stdout); + + write_map (Char_Table_Map); + return 0; +} + + + + + |