#include <windows.h>
#include <assert.h>
#include "PropNoun.H"

int __cdecl CharCompare(
    const void *item1,
    const void *item2)
{
    PCharProb pChar1 = (PCharProb) item1;
    PCharProb pChar2 = (PCharProb) item2;
    
    if (pChar1->dwUnicode > pChar2->dwUnicode) {
        return 1;
    } else if (pChar1->dwUnicode < pChar2->dwUnicode) {
        return -1;
    } else {
        return 0;
    }
}

int __cdecl UnicodeCompare(
    const void *item1,
    const void *item2)
{
    int nSize1 = lstrlenW((LPWSTR) item1) * sizeof(WCHAR),
        nSize2 = lstrlenW((LPWSTR) item2) * sizeof(WCHAR);
    return memcmp(item1, item2, nSize1 > nSize2 ? nSize1 : nSize2);
}

int __cdecl EngNameCompare(
    const void *item1,
    const void *item2)
{
    PEngName p1 = (PEngName) item1;
    PEngName p2 = (PEngName) item2;

    if (p1->wPrevUnicode > p2->wPrevUnicode) {
        return 1;
    } else if (p1->wPrevUnicode < p2->wPrevUnicode) {
        return -1;
    } else {
        if (p1->wNextUnicode > p2->wNextUnicode) {
            return 1;
        } else if (p1->wNextUnicode < p2->wNextUnicode) {
            return -1;
        } else {
            return 0;
        }
    }
}

CProperNoun::CProperNoun(
    HINSTANCE hInstance) :
    m_dProperNameThreshold(FL_PROPER_NAME_THRESHOLD),
    m_pCharProb(NULL),
    m_dwTotalCharProbNum(0),
    m_pEngNameData(NULL),
    m_hProcessHeap(0),
    m_hInstance(hInstance)
{
}

CProperNoun::~CProperNoun()
{
}

BOOL CProperNoun::InitData()
{
    BOOL fRet = FALSE;
    HRSRC hResource;
    HGLOBAL hGlobal;

    m_hProcessHeap = GetProcessHeap();

    //  Find resource
    hResource = FindResource(m_hInstance, TEXT("CNAME"), TEXT("BIN"));
    if (!hResource) { goto _exit; }

    //  Load resource
    hGlobal = LoadResource(m_hInstance, hResource);
    if (!hGlobal) { goto _exit; }

    m_pCharProb = (PCharProb) LockResource(hGlobal);
    if (!m_pCharProb) { goto _exit; }
    m_dwTotalCharProbNum = SizeofResource(m_hInstance, hResource) / sizeof(CharProb);
/*
    //  Find resource
    hResource = FindResource(m_hInstance, TEXT("ENAME"),
        TEXT("BIN"));
    if (!hResource) { goto _exit; }

    //  Load resource
    hGlobal = LoadResource(m_hInstance, hResource);
    if (!hGlobal) { goto _exit; }

    m_pEngNameData = (PEngNameData) LockResource(hGlobal);
    m_pEngNameData->pwUnicode = (PWORD) ((PBYTE) m_pEngNameData +
        sizeof(m_pEngNameData->dwTotalEngUnicodeNum) +
        sizeof(m_pEngNameData->dwTotalEngNamePairNum));
    m_pEngNameData->pEngNamePair = (PEngName) ((PBYTE) m_pEngNameData +
        sizeof(m_pEngNameData->dwTotalEngUnicodeNum) +
        sizeof(m_pEngNameData->dwTotalEngNamePairNum) +
        sizeof(m_pEngNameData->pwUnicode[0]) * m_pEngNameData->dwTotalEngUnicodeNum);

//    m_pEngName = (PEngName) LockResource(hGlobal);
//    m_dwTotalEngNameNum = SizeofResource(m_hInstance, hResource) / sizeof(EngName);
*/
    qsort(m_pwszSurname, m_dwTotalSurnameNum, sizeof(m_pwszSurname[0]), UnicodeCompare);

    fRet = TRUE;

_exit:

    return fRet;
}

BOOL CProperNoun::IsAProperNoun(
    LPWSTR lpwszChar,
    UINT uCount)
{
    return (IsAChineseName(lpwszChar, uCount) || IsAEnglishName(lpwszChar, uCount));
}

BOOL CProperNoun::IsAChineseName(
    LPCWSTR lpcwszChar,
    UINT    uCount)
{
    static WCHAR wszChar[3] = { NULL };
    PWCHAR pwsResult;

    wszChar[0] = lpcwszChar[0];

    //  Find surname
    if (pwsResult = (PWCHAR) bsearch(wszChar, m_pwszSurname, m_dwTotalSurnameNum, sizeof(m_pwszSurname[0]),
        UnicodeCompare)) {
        FLOAT flProbability = 1;
        PCharProb pCharProb;
        CharProb CProb;

        //  Calculate probability to be a proper noun
        for (UINT i = 1; i < uCount; ++i) {
            CProb.dwUnicode = lpcwszChar[i];
            if (pCharProb = (PCharProb) bsearch(&CProb, m_pCharProb,
                m_dwTotalCharProbNum, sizeof(m_pCharProb[0]), CharCompare)) {
                flProbability *= pCharProb->flProbability;
            } else {
                flProbability *= (FLOAT) FL_DEFAULT_CHAR_PROBABILITY;
            }
        }

        if (flProbability >= m_dProperNameThreshold) {
            return TRUE;
        }
    }

    return FALSE;
}

BOOL CProperNoun::IsAEnglishName(
    LPCWSTR lpwszChar,
    UINT uCount)
{
    static EngName Name;

    Name.wPrevUnicode = lpwszChar[0];
    Name.wNextUnicode = lpwszChar[uCount - 1];

    if (bsearch(&Name, m_pEngNameData->pEngNamePair, m_pEngNameData->dwTotalEngUnicodeNum, sizeof(EngName), EngNameCompare)) {
        return TRUE;
    }

    return FALSE;
}

WCHAR CProperNoun::m_pwszSurname[][3] = {
    L"�B",
    L"�R",
    L"�_",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"�C",
    L"�K",
    L"�T",
    L"�]",
    L"�q",
    L"�v",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"�V",
    L"�w",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"�E",
    L"�d",
    L"�f",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"�H",
    L"�L",
    L"�f",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"�P",
    L"�s",
    L"�u",
    L"�x",
    L"�}",
    L"��",
    L"��",
    L"��",
    L"�L",
    L"�Z",
    L"�k",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"�J",
    L"�\\",
    L"��",
    L"��",
    L"��",
    L"�I",
    L"�R",
    L"�_",
    L"�d",
    L"�h",
    L"�q",
    L"�x",
    L"��",
    L"�J",
    L"�S",
    L"�]",
    L"�p",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"�L",
    L"�V",
    L"�]",
    L"�c",
    L"�u",
    L"�}",
    L"��",
    L"��",
    L"��",
    L"��",
    L"�Z",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"�K",
    L"�q",
    L"�|",
    L"�}",
    L"��",
    L"��",
    L"�O",
    L"�Z",
    L"�d",
    L"�h",
    L"�i",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"�\\",
    L"�s",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"�^",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"�J",
    L"�q",
    L"�{",
    L"��",
    L"��",
    L"��",
    L"�O",
    L"�P",
    L"�R",
    L"�d",
    L"�k",
    L"�s",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"�q",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"�Q",
    L"�l",
    L"�p",
    L"��",
    L"��",
    L"�a",
    L"��",
    L"��",
    L"��",
    L"�p",
    L"�u",
    L"��",
    L"��",
    L"��",
    L"�B",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"�G",
    L"�H",
    L"�|",
    L"��",
    L"��",
    L"��",
    L"�P",
    L"�c",
    L"�p",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"�F",
    L"�N",
    L"�R",
    L"�d",
    L"�j",
    L"�s",
    L"��",
    L"��",
    L"��",
    L"�t",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"��",
    L"£",
    L"²",
    L"¿",
    L"��",
    L"��",
    L"��",
    L"��",
    L"�C",
    L"�Q",
    L"�e",
    L"ù",
    L"��",
    L"��",
    L"��",
    L"��",
    L"�Y",
    L"�u",
    L"ĩ",
    L"Ī",
    L"Ĭ",
    L"��",
    L"��",
    L"�U",
    L"��",
    L"��",
    L"�e",
    L"�s",
    L"м",
    L"�\\",
    L"�k"
};

DWORD CProperNoun::m_dwTotalSurnameNum = sizeof(m_pwszSurname) / sizeof(m_pwszSurname[0]);