/*
 *  pathash.h
 *
 *  author:	John R. Douceur
 *  date:	5 May 1997
 *
 *  This header file defines structures, function prototypes, and macros for
 *  the pat-hash table database.  The code is object-oriented C, transliterated
 *  from a C++ implementation.
 *
 *  The pat-hash database is a combination of a dynamically sized, separately
 *  chained hash table and a Patricia tree.  The hash table dynamically grows
 *  and shrinks as needed, and the workload of modifying the table size is
 *  distributed evenly among the insertion or removal operations that cause
 *  the growth or shrinkage.
 *
 *  The insertion and removal operations manage both a hash table and a Patricia
 *  tree, but the search routine uses only the hash table for performing the
 *  search.  The Patrica tree is present to support a scan operation, which
 *  searches the database for all entries that match a given pattern, where the
 *  pattern that is scanned may contain wildcards.
 *
 *  Because this code is C, rather than C++, it is not possible to hide as
 *  much of the implementation from the client code as one might wish.
 *  Nonetheless, there is an attempt to isolate the client from some of the
 *  implementation details through the use of macros.  Below is described each
 *  of the functions and macros necessary to use the pat-hash table.
 *
 */

#ifndef _INC_PATHASH

#define _INC_PATHASH

#ifdef __cplusplus
extern "C" {
#endif

/*
 *  There are three basic structures employed: the PHTableEntry, the
 *  PHTableGroup, and the PatHashTable.  Ideally, these would be completely
 *  hidden from the client, but the macro GetReferenceFromSpecificPatternHandle
 *  requires knowledge of the structure's definition.  It is strongly urged
 *  that the client not directly refer to any of the fields of either of these
 *  structures.  To support the documentation of the accompanying pathash.c
 *  file, these structures are annotated with internal comments, but these can
 *  be ignored by the reader who wishes only to understand how to write client
 *  code for the pat-hash table.
 *
 *  The values stored in the pat-hash table are known as specific patterns,
 *  where the term "specific" implies that the patterns do not contain
 *  wildcards.  The client refers to a pattern by its SpecificPatternHandle.
 *  This is typedefed to a pointer to PHTableEntry, but this fact should be
 *  ignored by the client, since it is an implementation detail.
 *
 */

    //#include <stdlib.h>
    //#include <malloc.h>

struct _PHTableEntry
{
	// This is the element in which a specific pattern is stored.  It is both
	// a component of a hash chain (linked list) that is indexed by a hash
	// table and also a component of a Patricia tree.

	// hash table fields:
	unsigned int hash;                                             // hash value
	struct _PHTableEntry *next;          // pointer to next entry in linked list

	// Patricia tree fields
	int pivot_bit;                              // bit of key on which to branch
	struct _PHTableEntry *children[2];                // pointers to child nodes

	// general:
	void *reference;                       // reference value supplied by client
	char value[1];                            // space for storing pattern value
};

typedef struct _PHTableEntry PHTableEntry;

struct _PHTableGroup
{
	// The hash table that indexes the hash chain of entries is itself a
	// linked list of structures called groups.  Each group is a table of
	// pointers to the hash chains of entries, and the group also contains
	// a pointer to the previous group, meaning that the groups are backwardly
	// linked.  The groups are sized in powers of two, so, in addition to one
	// special group of size one, there is a group of size one, a group of size
	// two, a group of size four, a group of size eight, and so on, up to the
	// number of groups necessary to hold the table.

	struct _PHTableGroup *previous;      // pointer to immediately smaller group
	PHTableEntry *entry_list[1];        // space to hold table of chain pointers
};

typedef struct _PHTableGroup PHTableGroup;

struct _PatHashTable
{
	int keybits;                                        // number of bits in key
	int keybytes;             // number of bytes in key, calculated from keybits
	int usage_ratio;                  // desired ratio of entries to hash chains
	int usage_histeresis;    // histeresis between insertion and removal resizes
	int allocation_histeresis;  // histeresis between insert and removal mallocs
	int max_free_list_size;                   // maximum size of free entry list
	PHTableGroup *initial_group;             // pointer to first group to search
	PHTableGroup *top_group;               // pointer to largest group allocated
	int allocation_exponent;       // binary exponent of current allocation size
	int size_exponent;                  // binary exponent of current group size
	int extension_size;               // number of slots in use in initial group
	int population;                             // number of entries in database
	PHTableEntry *root;                                 // root of Patricia tree
	PHTableEntry *free_list;                    // list of free (unused) entries
	int free_list_size;             // number of elements currently on free list
};

typedef struct _PatHashTable PatHashTable;

// The client uses SpecificPatternHandle to refer to values in the database.
typedef PHTableEntry *SpecificPatternHandle;

/*
 *  The client interface to the pat-hash table is provided by seven functions
 *  and two macros.  It is expected that the client will first instantiate a
 *  database, either on the stack or the heap, and then insert specific patterns
 *  with corresponding reference information into the database.  The client can
 *  then search the database for the specific patterns that were stored, and
 *  it can scan the database for all specific patterns that match a general
 *  pattern containing wildcards.
 *
 */

// A pat-hash table may be allocated on the stack simply by declaring a variable
// of type PatHashTable.  To allocate it on the heap, the following macro
// returns a pointer to a new PatHashTable structure.  If this macro is used, a
// corresponding call to free() must be made to deallocate the structure from
// the heap.
//
//#define NEW_PatHashTable ((PatHashTable *)malloc(sizeof(PatHashTable)))

#define AllocatePatHashTable(_ph)    GpcAllocMem(&_ph, \
                                                 sizeof(PatHashTable), \
                                                 PathHashTag)
#define FreePatHashTable(_ph)        GpcFreeMem(_ph,PathHashTag)

// Since this is not C++, the PatHashTable structure is not self-constructing;
// therefore, the following constructor code must be called on the PatHashTable
// structure after it is allocated.  The argument keybits specifies the size
// (in bits) of each pattern that will be stored in the database.  The remaining
// arguments are parameters to the various control systems that govern the size
// of the database.
//
// The usage ratio is the target ratio of database entries to discrete hash
// chains, which is also the mean length of a hash chain:  The minimum value
// is one; a larger value slightly decreases memory utilization and
// insertion/removal time at the expense of increasing search time.  There is
// benefit to choosing a power of two for this value.  Recommended values are
// 2 and 4.
//
// The usage histeresis is the histeresis between resizing operations due to
// insertions and removals.  The minimum value is zero, providing no histeresis;
// in this case, if an insertion that causes a increase in table size is
// immediately followed by a removal, the table size will be decreased.  Thus,
// a zero histeresis maintains low memory usage, but it engenders resizing
// chatter if insertions and removals are frequent.
//
// Allocation histeresis is the histeresis between allocation and deallocation
// of groups.  A group is allocated immediately when it is required by a size
// increase in the table, but it is not necessarily deallocated immediately
// following a size decrease, if the allocation histeresis is set to a value
// greater than zero.  Because groups are allocated in powers of two, the
// histeresis value is specified as a binary exponent.  A value of 1 causes a
// group to be deallocated when the table is half of the size that will cause
// the group to be re-allocated.  A value of 2 causes the group to be
// deallocated when the table is one quarter of the size that will cause the
// group to be re-allocated, and so forth.
//
// The maximum free list size determines the maximum number of elements that
// will be placed on a free list, rather than deallocated, when they are
// removed.  Setting this value to zero keeps memory utilization low, but it
// can result in more frequent allocations and deallocation operations, which
// are expensive.
//
int
constructPatHashTable(
	PatHashTable *phtable,
	int keybits,
	int usage_ratio,
	int usage_histeresis,
	int allocation_histeresis,
	int max_free_list_size);

// Since this is not C++, the PatHashTable structure is not self-destructing;
// therefore, the following destructor code must be called on the PatHashTable
// structure before it is deallocated.
//
void
destructPatHashTable(
	PatHashTable *phtable);

// Once the PatHashTable structure has been allocated and constructed, patterns
// can be inserted into the database.  Each pattern is passed as an array of
// bytes.
//
// Since the PatHashTable structure specifies the size of each pattern, it is
// theoretically possible for the insert routine to digest the submitted
// pattern and produce a hash value therefrom; however, general mechanisms for
// accomplishing this digestion are not very efficient.  Therefore, the client
// is responsible for providing a digested form of its input as the chyme
// parameter.  If the pattern is no bigger than an unsigned int, then the chyme
// can simply be equal to the pattern.  If it is larger, then it should be set
// to something like the exclusive-or of the pattern's fields; however, care
// should be taken to ensure that two patterns are not likely to digest to the
// same chyme value, since this will substantially decrease the efficiency of
// the hash table.  One common way of accomplishing this is by rotating the
// fields by varying amounts prior to the exclusive-or.
//
// The client also specifies a reference value, as a void pointer, that it
// wishes to associate with this pattern.  When the pattern is installed, the
// insert routine returns a pointer to a SpecificPatternHandle.  From the
// SpecificPatternHandle can be gotten the reference value via the macro
// GetReferenceFromSpecificPatternHandle.
//
// If the submitted pattern has already been installed in the database, then
// the insertion does not occur, and the SpecificPatternHandle of the
// previously installed pattern is returned.
//
SpecificPatternHandle
insertPatHashTable(
	PatHashTable *phtable,
	char *pattern,
	unsigned int chyme,
	void *reference);

// This function removes a pattern from the pat-hash table.  The pattern is
// specified by the SpecificPatternHandle that was returned by the insert
// routine.  No checks are performed to insure that this is a valid handle.
//
void
removePatHashTable(
	PatHashTable *phtable,
	SpecificPatternHandle sphandle);

// This function searches the database for the specific pattern that matches
// the given key, which is passed as an array of bytes.  If a match is found,
// the SpecificPatternHandle of that matching specific pattern is returned.
// From the SpecificPatternHandle can be gotten the reference value via the
// macro GetReferenceFromSpecificPatternHandle.  If no match is found, then a
// value of 0 is returned as the SpecificPatternHandle.
//
// As with the insert routine, the client is expected to provide a digested
// form of the key as the chyme argument to the routine.  This chyme value
// must be calculated in the exact same way for the search routine as it is
// for the insert routine; otherwise, the search will not be able to find the
// matching pattern.
//
SpecificPatternHandle
searchPatHashTable(
	PatHashTable *phtable,
	char *key,
	unsigned int chyme);

// The scan routine (described below) requires the client to supply a callback
// function to be called for each specific pattern that matches the supplied
// general pattern.  The following typedef defines the ScanCallback function
// pointer, which specifies the prototype of the callback function that the
// client must provide.  The client's callback function must accept a void
// pointer (which is a client-supplied context) and a SpecificPatternHandle.
// The return type of the client's callback function is void.
//
typedef void (*ScanCallback)(void *, SpecificPatternHandle);

// This function searches the database for all specific patterns that match a
// given general pattern.  The general pattern is specified by a value and a
// mask.  Each bit of the mask determines whether the bit position is specified
// or is a wildcard:  A 1 in a mask bit indicates that the value of that bit is
// specified by the general pattern; a 0 indicates that the value of that bit
// is a wildcard.  If a mask bit is 1, then the corresponding bit in the value
// field indicates the specified value of that bit.  Value and mask fields are
// passed as arrays of bytes.
//
// For each specific pattern in the database that matches the supplied general
// pattern, a client-supplied callback function is called with the
// SpecificPatternHandle of the matching specific pattern.  This callback
// function is also passed a context (as a void pointer) that is supplied by
// the client in the call to the scan routine.
//
void
scanPatHashTable(
	PatHashTable *phtable,
	char *value,
	char *mask,
	void *context,
	ScanCallback func);

// To get the client-supplied reference value from a SpecificPatternHandle, the
// following macro should be used.  The client should not make assumptions
// about the details of the PHTableEntry structure, nor should it even assume
// that the SpecificPatternHandle is a pointer to a PHTableEntry.
// Also, get the key pointer (value)
//
#define GetReferenceFromSpecificPatternHandle(sphandle) (sphandle)->reference
#define GetKeyPtrFromSpecificPatternHandle(sphandle) (sphandle)->value

// As described above in the comments on the constructor, if the allocation
// histeresis is non-zero, then the groups will not be deallocated as soon as
// they can be.  Similarly, if max free list size is non-zero, then entries
// will not be deallocated as soon as they can be.  Thus, unused pieces of
// memory may accumulate, up to a limit.  If the client wishes to force the
// pat-hash table to release all of the memory that it currently can, then it
// should call the flush routine, which will deallocate all unneeded groups
// and entries.
//
void
flushPatHashTable(
	PatHashTable *phtable);

#ifdef __cplusplus
}
#endif

#endif	/* _INC_PATHASH */