//+---------------------------------------------------------------------------
//
//  Microsoft Windows
//  Copyright (C) Microsoft Corporation, 1991 - 2000.
//
//  File:       NOISE.CXX
//
//  Contents:   Noise list
//
//  Classes:    CNoiseList, NoiseListInit, NoiseListEmpty
//              CLString, CStringList, CStringTable
//
//  History:    11-Jul-91   BartoszM    Created
//
//----------------------------------------------------------------------------

#include <pch.cxx>
#pragma hdrstop

#include <noise.hxx>

//+---------------------------------------------------------------------------
//
//  Member:     CLString::CLString, public
//
//  Synopsis:   Initializes and links a string list element
//
//  Arguments:  [cb] -- length
//              [buf] -- string
//              [next] -- next link in the chain
//
//  History:    16-Jul-91   BartoszM     Created.
//
//----------------------------------------------------------------------------
CLString::CLString ( UINT cb, const BYTE* buf, CLString* next )
{
    _cb = cb;
#if CIDBG == 1
    cb++;
#endif
    memcpy ( _buf, buf, cb );
    _next = next;
}

//+---------------------------------------------------------------------------
//
//  Member:     CLString::operator new, public
//
//  Synopsis:   Allocates a string list element
//
//  Arguments:  [n]    -- size of class instance
//              [cb]   -- length of string buffer needed
//
//  History:    10 Apr 96   AlanW     Created.
//
//----------------------------------------------------------------------------

void *
CLString::operator new ( size_t n, UINT cb )
{
#if CIDBG == 1
    cb++;
#endif
    return new BYTE [n+cb];
}


//+---------------------------------------------------------------------------
//
//  Member:     CStringList::~CStringList, public
//
//  Synopsis:   Free linked list
//
//  History:    16-Jul-91   BartoszM     Created.
//
//----------------------------------------------------------------------------
CStringList::~CStringList()
{
    while ( _head != 0 )
    {
        CLString* p = _head;
        _head = _head->Next();
        delete p;
    }
}

//+---------------------------------------------------------------------------
//
//  Member:     CStringList::Add, public
//
//  Synopsis:   Adds a string to list
//
//  Arguments:  [cb] -- length
//              [str] -- string
//
//  History:    16-Jul-91   BartoszM     Created.
//
//----------------------------------------------------------------------------
void CStringList::Add ( UINT cb, const BYTE * str )
{
    _head = new (cb) CLString ( cb, str, _head );
}

//+---------------------------------------------------------------------------
//
//  Member:     CStringList::Find, public
//
//  Synopsis:   Returns TRUE if string found in the list, FALSE otherwise
//
//  Arguments:  [cb] -- length
//              [str] -- string
//
//  History:    16-Jul-91   BartoszM     Created.
//
//----------------------------------------------------------------------------
BOOL CStringList::Find ( UINT cb, const BYTE* str ) const
{
    CLString* pStr = _head;
    while ( pStr != 0 )
    {
        if ( pStr->Equal ( cb, str ) )
        {
            return TRUE;
        }
        pStr = pStr->Next();
    }
    return FALSE;
}

#if CIDBG == 1

void CStringList::Dump () const
{
    CLString * p = _head;
    while ( p )
    {
        p->Dump();
        p = p->Next();
    }
    ciDebugOut (( DEB_ITRACE, "\n" ));
}

#endif // CIDBG == 1

//+---------------------------------------------------------------------------
//
//  Member:     CStringTable::CStringTable, public
//
//  Synopsis:   Create hash table of given size
//
//  Arguments:  [size] -- size
//
//  History:    16-Jul-91   BartoszM     Created.
//
//----------------------------------------------------------------------------
CStringTable::CStringTable( UINT size )
{
    _size = size;
    _bucket = new CStringList[size];
}

//+---------------------------------------------------------------------------
//
//  Member:     CStringTable::~CStringTable, public
//
//  Synopsis:   Free linked lists
//
//  History:    16-Jul-91   BartoszM     Created.
//
//----------------------------------------------------------------------------
CStringTable::~CStringTable()
{
    delete [] _bucket;
}

//+---------------------------------------------------------------------------
//
//  Member:     CStringTable::Add, publid
//
//  Synopsis:   Add a string to hash table
//
//  Arguments:  [cb] -- size
//              [str] -- string
//              [hash] -- precomputed hash value
//
//  History:    16-Jul-91   BartoszM     Created.
//
//----------------------------------------------------------------------------
void CStringTable::Add ( UINT cb, const BYTE* str, UINT hash )
{
    _bucket[_index(hash)].Add ( cb, str );
}

#if CIDBG == 1

void CStringTable::Dump () const
{
    for ( unsigned i = 0; i < _size; i++ )
    {
        if ( !_bucket[i].IsEmpty() )
        {
            ciDebugOut (( DEB_ITRACE, "%3d: ", i ));
            _bucket[i].Dump();
        }
    }
}

#endif // CIDBG == 1

//+---------------------------------------------------------------------------
//
//  Member:     CNoiseList::CNoiseList
//
//  Synopsis:   constructor for noise list
//
//  Effects:    gets buffers from key repository
//
//  Arguments:  [krep] -- key repository to give words to.
//
//  History:    05-June-91   t-WadeR     Created.
//
//----------------------------------------------------------------------------
CNoiseList::CNoiseList( const CStringTable& table, PKeyRepository& krep )
    : _krep(krep),
      _table(table),
      _cNoiseWordsSkipped(0),
      _cNonNoiseAltWords(0),
      _fFoundNoise( FALSE )
{
    krep.GetBuffers( &_pcbOutBuf, &_pbOutBuf, &_pocc );
    _cbMaxOutBuf = *_pcbOutBuf;
}


//+---------------------------------------------------------------------------
//
//  Member:     CNoiseList::GetBuffers
//
//  Synopsis:   Returns address of normilizer's input buffers
//
//  Arguments:  [ppcbInBuf] -- pointer to pointer to size of input buffer
//              [ppbInBuf] -- pointer to pointer to recieve address of buffer
//
//  History:    05-June-91   t-WadeR     Created.
//
//----------------------------------------------------------------------------
void    CNoiseList::GetBuffers( UINT** ppcbInBuf, BYTE** ppbInBuf )
{
    // Don't actually have an in buffer, so pass through the out buffer
    *ppbInBuf = _pbOutBuf;
    *_pcbOutBuf = _cbMaxOutBuf;
    *ppcbInBuf = _pcbOutBuf;
}


//+---------------------------------------------------------------------------
//
//  Member:     CNoiseList::GetFlags
//
//  Synopsis:   Returns address of ranking and range flags
//
//  Arguments:  [ppRange] -- range flag
//              [ppRank] -- rank flag
//
//  History:    11-Fab-92   BartoszM     Created.
//
//----------------------------------------------------------------------------
void CNoiseList::GetFlags ( BOOL** ppRange, CI_RANK** ppRank )
{
    _krep.GetFlags ( ppRange, ppRank );
}


//+---------------------------------------------------------------------------
//
//  Member:     CNoiseList::PutWord
//
//  Synopsis:   If word isn't a noise word, passes it to the key repository
//
//  Effects:    calls _krep.PutKey
//
//  Arguments:  [hash] -- precomputed hash value
//
//  History:    05-June-91   t-WadeR     Created stub.
//
//----------------------------------------------------------------------------
void    CNoiseList::PutWord ( UINT hash )
{
    // Check the word to see if it should pass through.
    if ( _table.Find ( *_pcbOutBuf, _pbOutBuf, hash ))
    {
        _fFoundNoise = TRUE;

        //
        // if all alternate words at current occurrence have been noise words,
        //   then it is equivalent to one noise word at current occcurrence,
        //   hence increment count of noise words skipped
        //
        if ( _cNonNoiseAltWords == 0 )
            _cNoiseWordsSkipped++;
    }
    else
    {
        //
        // output word to key repository. The count of noise words skipped refers to
        //   noise words at previous occurrences only
        //
        _krep.PutKey( _cNoiseWordsSkipped );
        _cNoiseWordsSkipped = 0;
    }

    // reset count of non-noise words in preparation for word at next occurrence
    _cNonNoiseAltWords = 0;

    (*_pocc)++;
}


//+---------------------------------------------------------------------------
//
//  Member:     CNoiseList::PutAltWord
//
//  Synopsis:   If word isn't a noise word, passes it to the key repository
//
//  Effects:    calls _krep.PutKey
//
//  Arguments:  [hash] -- precomputed hash value
//
//  History:    03-May-95   SitaramR     Created
//
//----------------------------------------------------------------------------
void    CNoiseList::PutAltWord ( UINT hash )
{
    // Check the word to see if it should pass through.
    if ( _table.Find ( *_pcbOutBuf, _pbOutBuf, hash ) )
    {
        _fFoundNoise = TRUE;
    }
    else
    {
        //
        // since this is not the last of a sequence of alternate words we increment
        //   count of non-noise words at current occurrence
        //
        _cNonNoiseAltWords++;

        //
        // output word to key repository. The count of noise words skipped refers to
        //   noise words at previous occurrences only
        //
        _krep.PutKey( _cNoiseWordsSkipped );
        _cNoiseWordsSkipped = 0;
    }
}


//+---------------------------------------------------------------------------
//
//  Member:     CNoiseList::StartAltPhrase
//
//  History:    29-Nov-94   SitaramR     Created
//
//----------------------------------------------------------------------------

void CNoiseList::StartAltPhrase()
{
    _krep.StartAltPhrase( _cNoiseWordsSkipped );
    _cNoiseWordsSkipped = 0;
}



//+---------------------------------------------------------------------------
//
//  Member:     CNoiseList::EndAltPhrase
//
//  History:    29-Nov-94   SitaramR     Created
//
//----------------------------------------------------------------------------

void CNoiseList::EndAltPhrase()
{
    _krep.EndAltPhrase( _cNoiseWordsSkipped );
    _cNoiseWordsSkipped = 0;
}



//+---------------------------------------------------------------------------
//
//  Member:     CNoiseListInit::CNoiseListInit
//
//  Synopsis:   Creates a hash table to be filled
//
//  Arguments:  [size] -- size of the hash table (possibly prime #)
//
//  History:    15-Jul-91   BartoszM     Created.
//
//----------------------------------------------------------------------------

CNoiseListInit::CNoiseListInit ( UINT size )
{
    _table = new CStringTable ( size );

    END_CONSTRUCTION( CNoiseListInit );
}

//+---------------------------------------------------------------------------
//
//  Member:     CNoiseListInit::GetBuffers
//
//  Synopsis:   Returns address of repository's input buffers
//
//  Arguments:  [ppcbInBuf] -- pointer to pointer to size of input buffer
//              [ppbInBuf] -- pointer to pointer to recieve address of buffer
//
//  History:    15-Jul-91   BartoszM     Created.
//
//----------------------------------------------------------------------------

void  CNoiseListInit::GetBuffers( UINT** ppcbInBuf, BYTE** ppbInBuf )
{
    _key.SetCount(MAXKEYSIZE);
    *ppcbInBuf = _key.GetCountAddress();
    *ppbInBuf = _key.GetWritableBuf();
}

//+---------------------------------------------------------------------------
//
//  Member:     CNoiseListInit::PutWord
//
//  Synopsis:   Puts a key into the hash table
//
//  Arguments:  [hash] -- hash value
//
//  History:    15-Jul-91   BartoszM     Created
//
//----------------------------------------------------------------------------
void CNoiseListInit::PutWord ( UINT hash )
{
    _table->Add ( _key.Count(), _key.GetBuf(), hash );
}


//+---------------------------------------------------------------------------
//
//  Member:     CNoiseListInit::PutAltWord
//
//  Synopsis:   Puts a key into the hash table
//
//  Arguments:  [hash] -- hash value
//
//  History:    03-May-95     SitaramR    Created
//
//----------------------------------------------------------------------------
void CNoiseListInit::PutAltWord ( unsigned hash )
{
    PutWord( hash );
}



//+---------------------------------------------------------------------------
//
//  Member:     CNoiseListEmpty::CNoiseListEmpty
//
//  Synopsis:   constructor for a default empty noise list
//
//  Effects:    gets buffers from key repository
//
//  Arguments:  [krep] -- key repository to give words to.
//              [ulFuzzy] -- Fuzziness of query
//
//  History:    16-Jul-91   BartoszM     Created.
//
//----------------------------------------------------------------------------
CNoiseListEmpty::CNoiseListEmpty( PKeyRepository& krep, ULONG ulFuzzy )
    : _krep(krep),
      _ulGenerateMethod(ulFuzzy),
      _cNoiseWordsSkipped(0),
      _cNonNoiseAltWords(0),
      _fFoundNoise( FALSE )
{
    krep.GetBuffers( &_pcbOutBuf, &_pbOutBuf, &_pocc );
    _cbMaxOutBuf = *_pcbOutBuf;
}


//+---------------------------------------------------------------------------
//
//  Member:     CNoiseListEmpty::GetBuffers
//
//  Synopsis:   Returns address of normilizer's input buffers
//
//  Arguments:  [ppcbInBuf] -- pointer to pointer to size of input buffer
//              [ppbInBuf] -- pointer to pointer to recieve address of buffer
//
//  History:    16-Jul-91   BartoszM     Created.
//
//----------------------------------------------------------------------------
void    CNoiseListEmpty::GetBuffers( UINT** ppcbInBuf, BYTE** ppbInBuf )
{
    // Don't actually have an in buffer, so pass through the out buffer
    *ppbInBuf = _pbOutBuf;
    *_pcbOutBuf = _cbMaxOutBuf;
    *ppcbInBuf = _pcbOutBuf;
}

//+---------------------------------------------------------------------------
//
//  Member:     CNoiseListEmpty::GetFlags
//
//  Synopsis:   Returns address of ranking and range flags
//
//  Arguments:  [ppRange] -- range flag
//              [ppRank] -- rank flag
//
//  History:    11-Fab-92   BartoszM     Created.
//
//----------------------------------------------------------------------------
void CNoiseListEmpty::GetFlags ( BOOL** ppRange, CI_RANK** ppRank )
{
    _krep.GetFlags ( ppRange, ppRank );
}



//+---------------------------------------------------------------------------
//
//  Member:     CNoiseListEmpty::PutWord
//
//  Synopsis:   If word isn't a noise word, passes it to the key repository
//
//  Effects:    calls _krep.PutKey
//
//  Arguments:  [hash] -- hash value (ignored)
//
//  History:    16-Jul-91   BartoszM     Created
//
//  Notes:      Filters out one letter words, unless it is a prefix (*) query
//
//----------------------------------------------------------------------------
void    CNoiseListEmpty::PutWord ( UINT )
{
    //
    // Even though the noise list is empty, we are modeling PutBreak()
    // by a skip of appropriate number of noise words, and we are counting
    // 1 letter words as noise words. Note that the length is in bytes and there is
    // a 1 byte prefix.
    //
    if ( _ulGenerateMethod != GENERATE_METHOD_PREFIX && *_pcbOutBuf <= NOISE_WORD_LENGTH )
    {
        _fFoundNoise = TRUE;

        //
        // if all alternate words at current occurrence have been noise words,
        //   then it is equivalent to one noise word at current occcurrence,
        //   hence increment count of noise words skipped
        //
        if ( _cNonNoiseAltWords == 0 )
            _cNoiseWordsSkipped++;
    }
    else
    {
        //
        // output word to key repository. The count of noise words skipped refers to
        //   noise words at previous occurrences only
        //
        _krep.PutKey( _cNoiseWordsSkipped );
        _cNoiseWordsSkipped = 0;
    }

    // reset count of non-noise words in preparation for word at next occurrence
    _cNonNoiseAltWords = 0;

    (*_pocc)++;
}




//+---------------------------------------------------------------------------
//
//  Member:     CNoiseListEmpty::PutAltWord
//
//  Synopsis:   If word isn't a noise word, passes it to the key repository
//
//  Effects:    calls _krep.PutKey
//
//  Arguments:  [hash] -- precomputed hash value
//
//  History:    03-May-95   SitaramR     Created
//
//  Notes:      Filters out one letter words, unless it is a prefix (*) query
//
//----------------------------------------------------------------------------
void    CNoiseListEmpty::PutAltWord ( UINT hash )
{
    //
    // Even though the noise list is empty, we are modeling PutBreak()
    // by a skip of appropriate number of noise words, and we are counting
    // 1 letter words as noise words. Note that the length is in bytes and there is
    // a 1 byte prefix.
    //
    if ( _ulGenerateMethod == GENERATE_METHOD_PREFIX || *_pcbOutBuf > NOISE_WORD_LENGTH )
    {
        //
        // since this is not the last of a sequence of alternate words we increment
        //   count of non-noise words at current occurrence
        //

        _cNonNoiseAltWords++;

        //
        // output word to key repository. The count of noise words skipped refers to
        //   noise words at previous occurrences only
        //

        _krep.PutKey( _cNoiseWordsSkipped );
        _cNoiseWordsSkipped = 0;
    }
    else
        _fFoundNoise = TRUE;
}




//+---------------------------------------------------------------------------
//
//  Member:     CNoiseListEmpty::StartAltPhrase
//
//  Synopsis:   Pass on StartAltPhrase to key repository
//
//  History:    20-Feb-95   SitaramR     Created
//
//----------------------------------------------------------------------------

void CNoiseListEmpty::StartAltPhrase()
{
    _krep.StartAltPhrase( _cNoiseWordsSkipped );
    _cNoiseWordsSkipped = 0;
}



//+---------------------------------------------------------------------------
//
//  Member:     CNoiseListEmpty::EndAltPhrase
//
//  Synopsis:   Pass on EndAltPhrase to key repository
//
//  History:    20-Feb-95   SitaramR     Created
//
//----------------------------------------------------------------------------

void CNoiseListEmpty::EndAltPhrase()
{
    _krep.EndAltPhrase( _cNoiseWordsSkipped );
    _cNoiseWordsSkipped = 0;
}