//+---------------------------------------------------------------------------
//
//  Microsoft Windows
//  Copyright (C) Microsoft Corporation, 1991 - 1995.
//
//  File:       stemmer.cxx
//
//  Contents:   NLG's FarEast Stemmer
//
//  History:    01-July-1996   PatHal       Created.
//
//----------------------------------------------------------------------------

#include "pch.cxx"
#pragma hdrstop

#include "stemmer.hxx"

extern long gulcInstances;

//+---------------------------------------------------------------------------
//
//  Member:     CStemmer::CStemmer
//
//  Synopsis:   Constructor for the CStemmer class.
//
//  Arguments:  [lcid] -- locale id
//
//----------------------------------------------------------------------------

CStemmer::CStemmer( LCID lcid )
        : _cRefs(1)
{
   InterlockedIncrement( &gulcInstances );
}


//+---------------------------------------------------------------------------
//
//  Member:     CStemmer::~CStemmer
//
//  Synopsis:   Destructor for the CStemmer class.
//
//  Notes:      All termination/deallocation is done by embedded smart pointers
//
//----------------------------------------------------------------------------

CStemmer::~CStemmer()
{
   InterlockedDecrement( &gulcInstances );
}

//+-------------------------------------------------------------------------
//
//  Method:     CStemmer::QueryInterface
//
//  Synopsis:   Rebind to other interface
//
//  Arguments:  [riid]      -- IID of new interface
//              [ppvObject] -- New interface * returned here
//
//  Returns:    S_OK if bind succeeded, E_NOINTERFACE if bind failed
//
//--------------------------------------------------------------------------

SCODE STDMETHODCALLTYPE
CStemmer::QueryInterface( REFIID riid, void  ** ppvObject)
{
    IUnknown *pUnkTemp;
    SCODE sc = S_OK;

    switch( riid.Data1 & 0x000000FF )
    {
    case 0x00:
        if ( riid == IID_IUnknown )
            pUnkTemp = (IUnknown *)this;
        else
            sc = E_NOINTERFACE;
        break;

    case 0x40:
        if ( riid == IID_IStemmer )
            pUnkTemp = (IUnknown *)(IStemmer *)this;
        else
            sc = E_NOINTERFACE;
        break;

    default:
        pUnkTemp = 0;
        sc = E_NOINTERFACE;
        break;
    }

    if( 0 != pUnkTemp )
    {
        *ppvObject = (void  * )pUnkTemp;
        pUnkTemp->AddRef();
    }
    else
       *ppvObject = 0;
    return(sc);
}


//+-------------------------------------------------------------------------
//
//  Method:     CStemmer::AddRef
//
//  Synopsis:   Increments refcount
//
//--------------------------------------------------------------------------

ULONG STDMETHODCALLTYPE
CStemmer::AddRef()
{
    return InterlockedIncrement( &_cRefs );
}

//+-------------------------------------------------------------------------
//
//  Method:     CStemmer::Release
//
//  Synopsis:   Decrement refcount.  Delete if necessary.
//
//--------------------------------------------------------------------------

ULONG STDMETHODCALLTYPE
CStemmer::Release()
{
    unsigned long uTmp = InterlockedDecrement( &_cRefs );

    if ( 0 == uTmp )
        delete this;

    return(uTmp);
}

//+-------------------------------------------------------------------------
//
//  Method:     CStemmer::Init
//
//  Synopsis:   Initialize stemmer
//
//  Arguments:  [ulMaxTokenSize] -- Maximum size token stored by caller
//              [pfLicense]      -- Set to true if use restricted
//
//  Returns:    Status code
//
//--------------------------------------------------------------------------

SCODE STDMETHODCALLTYPE
CStemmer::Init(
    ULONG ulMaxTokenSize,
    BOOL *pfLicense )
{
    if (IsBadWritePtr(pfLicense, sizeof(DWORD))) {
        return E_FAIL;
    }

    *pfLicense = TRUE;
    _ulMaxTokenSize = ulMaxTokenSize;

    return S_OK;
}

//+---------------------------------------------------------------------------
//
//  Member:     CStemmer::GetLicenseToUse
//
//  Synopsis:   Returns a pointer to vendors license information
//
//  Arguments:  [ppwcsLicense] -- ptr to ptr to which license info is returned
//
//----------------------------------------------------------------------------
SCODE STDMETHODCALLTYPE
CStemmer::GetLicenseToUse( const WCHAR **ppwcsLicense )
{
    static WCHAR const * wcsCopyright = L"Copyright Microsoft, 1991-1995";

    if (IsBadWritePtr(ppwcsLicense, sizeof(DWORD))) {
        return ( E_FAIL );
    }

    *ppwcsLicense = wcsCopyright;
    return( S_OK );
}

//+---------------------------------------------------------------------------
//
//  Member:     CStemmer::StemWord
//
//  Synopsis:   Stem a word into its inflected forms, eg swim to swims and swimming
//
//  Arguments:  [pwcInBuf] -- input Unicode word
//              [cwc] -- count of characters in word
//              [pStemSink] -- sink to collect inflected forms
//
//----------------------------------------------------------------------------

SCODE STDMETHODCALLTYPE
CStemmer::StemWord(
    WCHAR const *pwc,
    ULONG cwc,
    IStemSink *pStemSink )
{
    SCODE sc = S_OK;

    if ( 0 == pStemSink || 0 == pwc ) {
        return E_FAIL;
    }

    if ( 0 == cwc) {
        return S_OK;
    }

    CONST WCHAR *pwcStem;
    DWORD i;
    BYTE ct;
    BOOL fRomanWord = FALSE;

    __try {

        for ( i=1; i< ( cwc - 1 ); i++, pwc++) {
            ct = GetCharType(*pwc);

            if (ct == CH) {
                if (!fRomanWord) {
                    pwcStem = pwc;
                    fRomanWord = TRUE;
                }
            }
            else {
                if (fRomanWord) {
                    (pStemSink->PutWord)( pwcStem, pwc - pwcStem );
                    fRomanWord = FALSE;
                }
                else {
                    switch (ct) {
                    case PS:
                    case WS:
                        break;
                    default:
                        (pStemSink->PutWord)( pwc, 2 );
                        break;
                    }
                }
            }
        }

        // put the last English word
        if (fRomanWord) {
            (pStemSink->PutWord)( pwcStem, pwc - pwcStem );
            fRomanWord = FALSE;
        }

        // output inflected words to stemmer sink in EnumInflections callback
    } __except (1) {

        sc = E_UNEXPECTED;
    }

    return sc;
}