/*****************************************************************************
 * mktable - table-building program to ease table maintenance problems
 *
 * DESCRIPTION
 *  Several parts of the FORTRAN compiler need large tables.
 *  For example, the lexer contains tables of keywords and multicharacter
 *  tokens; the intrinsic-function handler contains a table of all the
 *  FORTRAN intrinsic functions.
 *  Maintaining these tables can be aggravating, since they are typically
 *  large and involve lots of drudge work (like changing many sequentially-
 *  numbered macro definitions) to modify.
 *
 *  `mktable' can be used to build tables automatically as part of the
 *  usual compiler building process.  Its usages and semantics are as
 *  follows.
 *
 *  `mktable' takes a "table" file on its standard input.  Each line of
 *  the table file has one of the following forms:
 *
 *      # commentary information
 *      "key-string" [index-macro-name [arbitrary-stuff]]
 *      <blank line>
 *
 *  The key string and arbitrary-stuff form the contents of a single
 *  table record.  The index-macro-name is #define'd to be the index
 *  of the given record in the table.  If the index-macro-name is absent or
 *  is an empty string ("") then no macro definition is produced for the
 *  record.
 *
 *  `mktable' produces its output on four files:
 *      mktable.keys: the key string
 *      mktable.defs: #define <index_macro_name> <index to mktable.keys>
 *      mktable.indx: contains the initialization part of a definition
 *          for an index array for key-letter indexed tables,
 *          or the initialization part of a collision-resolution
 *          table for linear-list hashed tables.
 *          (not generated for sorted or _open-addressed tables.)
 *      mktable.info: contains arbitrary-stuff
 *
 *  For example, if the table to be defined were named "symtab" and the
 *  table being constructed was of the "sorted" type (suitable for binary
 *  search),
 *
 *      # contents of symtab:
 *      "alpha" ST_ALPHA    2, 4, MONADIC
 *      "gamma" ST_GAMMA    2, 3, MONADIC
 *      "delta" ST_DELTA    2, 1, DYADIC
 *      "epsilon"
 *
 *  then `mktable' produces the following in mktable.keys:
 *
 *      "alpha","delta","epsilon","gamma"
 *
 *  and the following in mktable.defs:
 *
 *      #define ST_ALPHA 0
 *      #define ST_DELTA 1
 *      #define ST_GAMMA 2
 *
 *  and in mktable.info :
 *
 *      {2, 4, MONADIC}, {2, 1, DYADIC}, {0}, {2, 3, MONADIC}
 *
 *  The files might be included in a C source program in the
 *  following way:
 *
 *      #include "mktable.defs"
 *      ...
 *      char    *symname[] = {
 *      #   include "mktable.keys"
 *          };
 *      struct syminfo
 *          {
 *          int size;
 *          int cycles;
 *          int arity;
 *          };
 *      struct syminfo symtab[] = {
 *      #   include "mktable.info"
 *          };
 *
 *  The `mktable' command itself is used in one of the following ways:
 *
 *  mktable "open" size <tablefile
 *      This form creates an _open-addressed hash table, keyed on
 *      the string fields at the beginning of each record in the
 *      table file.  The hash function used is the absolute value
 *      of the sum of all the characters in a key, modulo the table
 *      size.  The collision resolution function is simply one plus
 *      the last hash, modulo the table size.
 *      Since some of the entries in the hash table may be empty,
 *      and `mktable' has no way of knowing how to fill them,
 *      one of the records supplied by the user will be replicated
 *      in the empty entries with its key value set to NULL.
 *      "table.c" will be created with the hash table itself, and
 *      "table.h" will be created with index-macro definitions that
 *      may be used to index directly into the table in "table.c".
 *
 *  mktable "hashed" size <tablefile
 *      This form creates a hash table keyed on the string fields
 *      at the beginning of each table file record.  The hash function
 *      is the absolute value of the sum of all the characters in a
 *      key, modulo the table size.  Collision resolution is handled
 *      with linear chaining, as follows:  If two keys hash to the
 *      same table location, the first one will be placed in the table,
 *      and the corresponding entry of the collision resolution vector
 *      will contain the (integer) index of the next table slot to be
 *      checked for the hash synonym.  When the collision resolution
 *      vector entry is -1, the end of the chain has been reached.
 *      Note that since all entries are stored in the main table, the
 *      `size' must be at least as large as the number of entries.
 *      As with _open addressing, some slots in the table may be
 *      padded with a replicated entry (key value set to NULL).
 *      "table.c" receives the hash table.  "table.h" receives the
 *      index-macro definitions that will index into the table in
 *      "table.c".  "tabindex.c" receives the conflict resolution
 *      vector.
 *
 *  mktable "sorted" <tablefile
 *      This form creates a table sorted in ascending order, keyed
 *      on the string fields at the beginning of each record in the
 *      table file.  Comparisons are ordered according to the ASCII
 *      values of the characters being compared.
 *      "table.c" will be created with the sorted table itself, and
 *      "table.h" will be created with index-macro definitions that
 *      may be used to index directly into the table in "table.c".
 *
 *  mktable "key-letter" <tablefile
 *      This form creates a key-letter-indexed table.
 *      The string fields serve as the
 *      key letter.  An auxiliary table indexed from 'A' to 'Z'+1
 *      gives the starting index of all the entries whose keys begin
 *      with each letter (the last entry duplicates the entry for 'Z').
 *      "table.c" will contain the sorted table.  "tabindex.c" will
 *      contain the auxiliary index table information.  "table.h" will
 *      contain the index-macro definitions that may be used to index
 *      directly into the "table.c" table.
 *      Note that key-letter tables are sorted in a peculiar way;
 *      in ascending order by first letter of the key, but descending
 *      order by the remainder of the key.  This is required by
 *      FORTRAN, to insure that longer keywords are matched before
 *      shorter keywords that are initial substrings of the longer
 *      keywords.
 *      Also note that the key strings themselves are missing the first
 *      char, since by indexing into the table, we are always assured
 *      of having matched the first char.
 *
 * AUTHOR
 *      February, 1984      Allen Akin
 *
 * MODIFICATIONS
 *  March 8, 1984       Allen Akin
 *      Added linear-list resolved hashing.
 *****************************************************************************/

#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>

#define MAXRECORDS  300     /* maximum-size table we can handle */
#define MAXLINE     82      /* maximum line length (incl "\n\0") */

#define HASHED      0       /* flag used by table loader */
#define LINEAR      1       /* ditto */
#define OPENADDR    2       /* ditto */

#define KEYFILE         "mktable.key"   /* name of table output file */
#define DEFFILE         "mktable.def"   /* name of index defs output file */
#define INDEXFILE       "mktable.ind"   /* name of table index output file */
#define INFOFILE        "mktable.inf"   /* gots the infos in it */

typedef struct rec {
    char *key;      /* key-string field */
    char *id;       /* index macro identifier */
    char *other;    /* other stuff in the record - output untouched */
    struct rec *link;   /* pointer to next record in hash synonyms list */
} Rec_t;

int Upper = 0;

FILE *Fkeys, *Findex, *Fdefs, *Finfo;

/************************************************************************/
/* Function Prototypes                          */
/************************************************************************/
void main (int argc, char **argv);
void usage (void);
void error(char * message);
void open_addr(int size);
void hash_linear(int size);
void sorted(void);
void key_letter(void);
int load(Rec_t *record, int method, int size);
void startoutput(void);
void endoutput(void);
void outrec(Rec_t *rec);
void outdef(char *name, int value);
void outinx(int value);
void sortrec(Rec_t **rptr, int size);
int hash(register char *name);


/************************************************************************/
/* Program code                             */
/************************************************************************/
void  __cdecl
main (
    int argc,
    char **argv
    )
{
    if (argc <= 1)
        usage();

    if(strcmp(argv[1], "-U") == 0) {
        Upper = 1;
        argv++;
        argc--;
    }

    if (strcmp(argv[1], "open") == 0) {
        if (argc != 3)
            usage();
        open_addr(atoi(argv[2]));
    } else if (strcmp(argv[1], "hashed") == 0) {
        if (argc != 3)
            usage();
        hash_linear(atoi(argv[2]));
    } else if (strcmp(argv[1], "sorted") == 0) {
        if (argc != 2)
            usage();
        sorted();
    } else if (strcmp(argv[1], "key-letter") == 0) {
        if (argc != 2)
            usage();
        key_letter();
    } else
        usage();
    exit(0);
}

void
usage (
    void
    )
{
    error("usage: mktable (open SIZE | hashed SIZE | sorted | key-letter) <table-master");
}

void
error(
    char * message
    )
{
    fprintf(stderr, "%s\n", message);
    exit(1);
}

void
open_addr(
    int size
    )
{
    register Rec_t *record;     /* points to array storing all records */
    Rec_t defrec;               /* "default" record for empty array slot */
    register int i;

    if (size <= 0)
        error("hash table size specified is less than zero");

    if ((record = (Rec_t *)calloc(size, sizeof(Rec_t))) == NULL)
        error("insufficient memory for hash table");

    for (i = 0; i < size; ++i)
        record[i].key = NULL;

    if (load(record, OPENADDR, size) == 0)
        error("couldn't find any input records");

    defrec.key = NULL;
    defrec.id = NULL;
    for (i = 0; i < size; ++i)
    if (record[i].key != NULL)
        break;
    defrec.other = record[i].other;

    startoutput();

    for (i = 0; i < size; ++i) {
        if (record[i].key == NULL) {
            outrec(&defrec);
        } else {
            outrec(&record[i]);
            outdef(record[i].id, i);
        }
    }

    endoutput();
    _unlink(INDEXFILE);
}

void
hash_linear(
    int size
    )
{
    register Rec_t *record,     /* stores some records, all buckets */
                    *rp;
    Rec_t defrec;               /* default record for empty hash table slots */
    register int i,
                 nextslot,      /* next empty slot in main hash table */
                 prev;

    if (size <= 0)
        error("hash table size specified is less than zero");

    if ((record = (Rec_t *)calloc(size, sizeof(Rec_t))) == NULL)
        error("insufficient memory for hash table");

    for (i = 0; i < size; ++i) {
        record[i].key = NULL;
        record[i].link = NULL;
    }

    if ((i = load(record, HASHED, size)) == 0)
        error("couldn't find any input records");

    if (i > size)
        error("too many records to hold in table");

    defrec.key = NULL;
    defrec.id = NULL;
    for (i = 0; i < size; ++i) {
        if (record[i].key != NULL)
            break;
    }
    defrec.other = record[i].other;
    defrec.link = NULL;
    /*
     * The `load' routine has built a hash table `record'.
     * Each entry in `record' is either empty (key == NULL) or contains a record.
     * Each record may have a NULL link field, or a link field that points to
     * a hash synonym.
     * With this section of code, we rearrange the linked lists of hash synonyms
     * so that all the entries are stored in `record'.
     */
    nextslot = 0;
    for (i = 0; i < size; ++i) {
        if ((record[i].key != NULL) &&
            (record[i].link != NULL) &&
            ((record[i].link < record) || (record[i].link >= (record + size))))
        {
            for (prev = i, rp = record[i].link; rp != NULL; rp = rp->link) {
                while (record[nextslot].key != NULL)
                    ++nextslot;
                record[prev].link = &record[nextslot];
                record[nextslot] = *rp;
                prev = nextslot;
            }
        }
    }

    startoutput();

    for (i = 0; i < size; ++i) {
        if (record[i].key == NULL) {
            outrec(&defrec);
            outinx(-1);
        } else {
            outrec(&record[i]);
            if (record[i].link == NULL)
                outinx(-1);
            else
                outinx(record[i].link - record);    /* cvt. to inx in table */
            outdef(record[i].id, i);
        }
    }

    endoutput();
}

void
sorted(
    void
    )
{
    Rec_t  record[MAXRECORDS],
          *rptr[MAXRECORDS];
    register int i, size;

    size = load(record, LINEAR, MAXRECORDS);

    for (i = 0; i < size; ++i)
        rptr[i] = &record[i];

    sortrec(rptr, size);

    startoutput();

    for (i = 0; i < size; ++i) {
        outrec(rptr[i]);
        outdef(rptr[i]->id, i);
    }

    endoutput();
    _unlink(INDEXFILE);
}

void
key_letter(
    void
    )
{
    Rec_t  record[MAXRECORDS],
          *rptr[MAXRECORDS],
          *temp;
    register int i, size, j, k, l;

    register char lastletter;

    size = load(record, LINEAR, MAXRECORDS);

    for (i = 0; i < size; ++i)
        rptr[i] = &record[i];

    sortrec(rptr, size);

    for (i = 0; i < size; i = j) {
        for (j = i; j < size; ++j) {
            if (rptr[i]->key[0] != rptr[j]->key[0])
                break;
        }

        l = j - 1;

        for (k = i; k < l; ++k, --l) {
            temp = rptr[k];
            rptr[k] = rptr[l];
            rptr[l] = temp;
        }
    }

    startoutput();

    lastletter = (char)((Upper ? 'A' : '_') - 1);
    for (i = 0; i < size; ++i)
    {
        while (rptr[i]->key[0] > lastletter) {
            outinx(i);
            ++lastletter;
        }
        outrec(rptr[i]);
        outdef(rptr[i]->id, i);
    }


    for (; lastletter < (char)((Upper ? 'Z' : 'z') + 1); ++lastletter)
        outinx(size);

    endoutput();
}

int
load(
    Rec_t *record,
    int method,
    int size
    )
{
    char *line;
    register char *p;
    int rec, h, chainlen, maxchainlen = 0, collisions = 0;
    Rec_t r;

    for (rec = 0; ; ++rec)
    {
        if ((line = malloc(MAXLINE)) == NULL)
            error("insufficient memory to load records");

        if (fgets(line, MAXLINE, stdin) == NULL)
            break;

        if (rec >= size)
            error("too many records to handle");

        r.key = r.id = r.other = NULL;
        r.link = NULL;

        for (p = line; *p && isspace(*p); ++p)
            ;
        if (*p != '"') {
            free(line);
            --rec;
            continue;
        }
        r.key = ++p;
        for (; *p != '"'; ++p) {
            if(Upper && (islower(*p)))
                *p = (char)toupper(*p);
        }

        *p++ = '\0';

        for (; *p && isspace(*p); ++p)          /* skip space key and id */
            ;
        if (*p == '"' && *(p + 1) == '"') {     /* no id */
            r.id = NULL;
            p += 2;
        } else if (*p) {
            r.id = p++;                         /* id start */
            for (; *p && ( ! isspace(*p)); ++p) /* til first space */
                ;
            if(*p) {
                *p++ = '\0';                    /* terminate id */
            }
        }

        for (; *p && isspace(*p); ++p)      /* skip space til other info */
            ;
        if(*p) {
            r.other = p++;
            for (; *p != '\n' && *p != '\0'; ++p)
                ;
            *p = '\0';
        }

        if (method == LINEAR) {
            record[rec] = r;
        } else if (method == OPENADDR) {
            chainlen = 0;
            for(h = hash(r.key) % size; record[h].key; h = (h+1) % size) {
                ++chainlen;
                ++collisions;
            }
            maxchainlen = (chainlen < maxchainlen)? maxchainlen: chainlen;
            record[h] = r;
        } else { /* method == HASHED */
            Rec_t  *rp;

            h = hash(r.key) % size;
            if (record[h].key == NULL) {
                record[h] = r;
            } else {
                if ((rp = (Rec_t *)malloc(sizeof(Rec_t))) == NULL)
                    error("insufficient memory to store all records");
                *rp = record[h];
                r.link = rp;
                record[h] = r;
                ++collisions;
                chainlen = 1;
                for (rp = &record[h]; rp->link != NULL; rp = rp->link)
                    ++chainlen;
                maxchainlen = (chainlen < maxchainlen)? maxchainlen: chainlen;
            }
        }
    }

    if (method == HASHED || method == OPENADDR)
        fprintf(stderr, "%d collisions, max chain length %d\n", collisions, maxchainlen);

    return rec;
}

void
startoutput(
    void
    )
{
    if ((Fkeys = fopen(KEYFILE, "w")) == NULL)
        error("can't open keys output file");

    if ((Findex = fopen(INDEXFILE, "w")) == NULL)
        error("can't open index output file");

    if ((Fdefs = fopen(DEFFILE, "w")) == NULL)
        error("can't open definitions output file");

    if ((Finfo = fopen(INFOFILE, "w")) == NULL)
        error("can't open info output file");
}

void
endoutput(
    void
    )
{
    fclose(Fkeys);
    fclose(Findex);
    fclose(Fdefs);
    fclose(Finfo);
}

void outrec(Rec_t *rec)
{
    if (rec->key == NULL)
        fprintf(Fkeys, "NULL,\n");
    else
        fprintf(Fkeys, "\"%s\",\n", ((rec->key) + 1));

    if (rec->other == NULL)
        fprintf(Finfo, "{0},\n");
    else
        fprintf(Finfo, "{%s},\n", rec->other);
}

void
outdef(
    char *name,
    int value
    )
{
    if (name != NULL)
        fprintf(Fdefs, "#define %s %d\n", name, value);
}

void
outinx(
    int value
    )
{
    fprintf(Findex, "%d,\n", value);
}
/*
 * Following code defines the hash function used in `mktable' and in
 * the compiler.  Since we must guarantee they are the same function,
 * we use a single source file.
 *
 * `mktable' does not use the standard include file that the compiler
 * uses, so we define the allowable register declarations here.
 */
#define REG1 register
#define REG2 register
#define REG3 register

void
sortrec(
    Rec_t **rptr,
    int size
    )
{
    register int j, i, gap;
    Rec_t  *temp;

    for (gap = size / 2; gap > 0; gap /= 2) {
        for (i = gap; i < size; ++i) {
            for (j = i - gap; j >= 0; j -= gap) {
                if (strcmp(rptr[j]->key, rptr[j + gap]->key) <= 0)
                    break;
                temp = rptr[j];
                rptr[j] = rptr[j + gap];
                rptr[j + gap] = temp;
            }
        }
    }
}

int
hash(
    register char *name
    )
{
    register    int i;

    i = 0;
    while(*name) {
        i += *name++ ;
    }
    return(i) ;
}