2025-04-27 07:49:33 -04:00

281 lines
9.0 KiB
C++

#include "stdafx.h"
#include "HanjaLex.h"
#include "LexHeader.h"
#define UNICODE_CJK_UNIFIED_IDEOGRAPHS_START 0x4E00
#define UNICODE_CJK_UNIFIED_IDEOGRAPHS_END 0x9FFF
#define UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS_START 0xF900
#define UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS_END 0xFAFF
inline
BOOL fIsHanja(WCHAR wcCh)
{
return (wcCh >= UNICODE_CJK_UNIFIED_IDEOGRAPHS_START &&
wcCh <= UNICODE_CJK_UNIFIED_IDEOGRAPHS_END) ||
(wcCh >= UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS_START &&
wcCh <= UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS_END);
}
// This data extracted from Hanja MDB
static
WORD wNumOfHanjaMap[TOTAL_NUMBER_OF_HANGUL_MAPPING] =
{
38, 27, 23, 48, 83, 94, 75, 68, 86, 56,
66, 82, 30, 18, 51, 42, 52, 48, // 987 symbols
////////////////////////////////////
44, 17, 35, 16, 35, 7, 41, 27, 2, 7,
1, 31, 18, 7, 9, 5, 3, 13, 20, 9,
15, 65, 36, 68, 12, 24, 7, 28, 1, 25,
9, 32, 10, 24, 7, 16, 1, 8, 44, 103,
10, 9, 8, 7, 16, 5, 20, 6, 32, 9,
1, 11, 25, 1, 19, 13, 6, 100, 1, 6,
1, 1, 20, 8, 13, 2, 11, 5, 8, 11,
1, 1, 1, 4, 4, 6, 24, 6, 1, 11,
7, 7, 10, 1, 3, 4, 1, 2, 1, 6,
8, 2, 2, 3, 29, 10, 29, 5, 28, 22,
1, 2, 59, 12, 13, 4, 33, 18, 8, 1,
1, 17, 18, 11, 16, 4, 12, 4, 14, 6,
1, 3, 19, 29, 13, 19, 9, 8, 3, 31,
7, 32, 14, 1, 14, 19, 24, 2, 20, 19,
4, 7, 6, 3, 3, 4, 8, 40, 21, 6,
5, 14, 8, 28, 9, 18, 23, 8, 10, 4,
16, 4, 16, 1, 39, 9, 2, 13, 16, 27,
3, 19, 3, 33, 23, 5, 25, 33, 16, 41,
25, 9, 15, 6, 12, 2, 17, 16, 6, 25,
23, 26, 1, 1, 22, 70, 1, 38, 13, 9,
86, 27, 8, 79, 8, 24, 5, 11, 11, 40,
4, 8, 7, 40, 22, 48, 21, 15, 11, 20,
13, 60, 11, 8, 3, 11, 8, 2, 82, 19,
36, 5, 4, 3, 4, 8, 12, 45, 18, 34,
6, 16, 4, 1, 1, 29, 25, 14, 12, 19,
4, 12, 31, 11, 8, 12, 17, 42, 14, 6,
13, 3, 12, 4, 2, 1, 25, 17, 58, 11,
27, 8, 52, 43, 55, 5, 17, 3, 18, 17,
26, 1, 8, 5, 14, 54, 8, 34, 52, 10,
19, 3, 2, 41, 5, 39, 87, 8, 15, 7,
6, 20, 2, 11, 6, 4, 29, 49, 11, 35,
11, 15, 6, 5, 52, 21, 8, 9, 6, 56,
23, 8, 46, 32, 81, 11, 20, 9, 74, 40,
80, 5, 3, 3, 27, 9, 1, 62, 2, 34,
2, 5, 3, 2, 5, 14, 49, 6, 44, 20,
3, 9, 7, 23, 10, 23, 7, 26, 41, 14,
10, 9, 24, 37, 20, 18, 15, 12, 21, 56,
12, 5, 15, 1, 11, 51, 17, 3, 4, 9,
9, 17, 7, 1, 42, 4, 5, 3, 16, 1,
2, 3, 27, 25, 15, 3, 7, 6, 9, 21,
3, 3, 1, 4, 3, 10, 7, 9, 1, 3,
1, 30, 10, 5, 16, 8, 1, 19, 3, 11,
14, 40, 6, 27, 3, 7, 11, 1, 22, 3,
22, 11, 23, 3, 17, 14, 22, 29, 4, 6,
10, 5, 7, 1, 5, 8, 33, 6, 1, 19,
25, 17, 59, 3, 11, 4, 15, 21, 10, 29,
6, 31, 29, 3, 6, 21, 24, 16, 1, 1,
7, 7, 10, 10, 4, 7, 1, 10, 9, 4,
8, 1, 31, 6
};
static HanjaPronouncEntry HangulToHanjaTables[TOTAL_NUMBER_OF_HANGUL_MAPPING];
static HanjaToHangulIndex HanjaToHangulTables[TOTAL_NUMBER_OF_HANJA];
static int s_iCurHanjaToHangul = 0;
static int s_iCurHangulIndex = 0;
static int s_iCurHanjaindex = 0;
static _DictHeader s_DictHeader;
HanjaEntry::HanjaEntry()
{
wUnicode = 0;
memset(szSense, NULL, sizeof(szSense));
}
HanjaPronouncEntry::HanjaPronouncEntry()
{
iNumOfK0 = iNumOfK1 = 0;
wUniHangul = 0;
pHanjaEntry = NULL;
}
void InitHanjaEntryTable()
{
s_iCurHangulIndex = s_iCurHanjaindex = 0;
for (int i=0; i<TOTAL_NUMBER_OF_HANGUL_MAPPING; i++)
HangulToHanjaTables[i].pHanjaEntry = NULL;
}
BOOL Append(int iKCode, WCHAR wUniHanja, LPTSTR pszPronounc, LPTSTR pszSense)
{
static WCHAR wcPrevHangul;
// if first run
if (s_iCurHangulIndex==0 && s_iCurHanjaindex==0)
{
HangulToHanjaTables[0].pHanjaEntry = new HanjaEntry[wNumOfHanjaMap[s_iCurHangulIndex]];
}
// Check if Next Hangul
if (s_iCurHanjaindex == wNumOfHanjaMap[s_iCurHangulIndex])
{
s_iCurHangulIndex++;
s_iCurHanjaindex = 0;
ASSERT(s_iCurHangulIndex<TOTAL_NUMBER_OF_HANGUL_MAPPING);
HangulToHanjaTables[s_iCurHangulIndex].pHanjaEntry = new HanjaEntry[wNumOfHanjaMap[s_iCurHangulIndex]];
}
if (s_iCurHanjaindex)
ASSERT(wcPrevHangul == pszPronounc[0]);
else
HangulToHanjaTables[s_iCurHangulIndex].wUniHangul = pszPronounc[0];
// Add one Hangul to Hanja info
HangulToHanjaTables[s_iCurHangulIndex].pHanjaEntry[s_iCurHanjaindex].wUnicode = wUniHanja;
ASSERT(lstrlen(HangulToHanjaTables[s_iCurHangulIndex].pHanjaEntry[s_iCurHanjaindex].szSense)<MAX_SENSE);
lstrcpy(HangulToHanjaTables[s_iCurHangulIndex].pHanjaEntry[s_iCurHanjaindex].szSense, pszSense);
s_iCurHanjaindex++;
// Increase K0 or K1
if (iKCode==_K_K0)
HangulToHanjaTables[s_iCurHangulIndex].iNumOfK0++;
else
if (iKCode==_K_K1)
HangulToHanjaTables[s_iCurHangulIndex].iNumOfK1++;
else
ASSERT(0);
if (fIsHanja(wUniHanja))
{
HanjaToHangulTables[s_iCurHanjaToHangul].wchHanja = wUniHanja;
HanjaToHangulTables[s_iCurHanjaToHangul].wchHangul = pszPronounc[0];
s_iCurHanjaToHangul++;
}
wcPrevHangul = pszPronounc[0];
return TRUE;
}
int compare( const void *arg1, const void *arg2 )
{
const HanjaToHangulIndex* pHangulToHanjaEntry1, *pHangulToHanjaEntry2;
pHangulToHanjaEntry1 = (HanjaToHangulIndex*)arg1;
pHangulToHanjaEntry2 = (HanjaToHangulIndex*)arg2;
if (pHangulToHanjaEntry1->wchHanja < pHangulToHanjaEntry2->wchHanja)
return -1;
else
if (pHangulToHanjaEntry1->wchHanja == pHangulToHanjaEntry2->wchHanja)
return 0;
else
return 1;
}
const int ENTRY_BUFFER_SIZE = 1024*200;
void SaveLex()
{
HANDLE hMainDict;
DWORD writtenBytes, dwCurBuf;
char *lpbuf = new char[DICT_HEADER_SIZE];
_LexIndex *pIndexTbl = new _LexIndex[TOTAL_NUMBER_OF_HANGUL_MAPPING];
HGLOBAL hEntryBuffer;
LPBYTE lpEntryBuffer;
INT iCurHanjaToHangul = 0;
memset(lpbuf, 0, DICT_HEADER_SIZE);
memset(pIndexTbl, 0, TOTAL_NUMBER_OF_HANGUL_MAPPING*sizeof(_LexIndex));
hEntryBuffer = GlobalAlloc(GHND, ENTRY_BUFFER_SIZE);
lpEntryBuffer = (LPBYTE)GlobalLock(hEntryBuffer);
////////////////////////////////
// LEX internal version
s_DictHeader.Version = LEX_VERSION; //0x1000 - set on 97/06/27
////////////////////////////////
s_DictHeader.NumOfHangulEntry = TOTAL_NUMBER_OF_HANGUL_MAPPING;
s_DictHeader.MaxNumOfHanja = MAX_NUMBER_OF_HANJA_SAME_PRONUNC;
s_DictHeader.Headersize = DICT_HEADER_SIZE; // it will be used as OpenDict offset parameter
s_DictHeader.iBufferStart = DICT_HEADER_SIZE +
TOTAL_NUMBER_OF_HANGUL_MAPPING*sizeof(_LexIndex) +
TOTAL_NUMBER_OF_HANJA*sizeof(HanjaToHangulIndex);
// !!! Hanja number hard-coded now !!!
s_DictHeader.uiNumofHanja = TOTAL_NUMBER_OF_HANJA;
s_DictHeader. iHanjaToHangulIndex = DICT_HEADER_SIZE + TOTAL_NUMBER_OF_HANGUL_MAPPING*sizeof(_LexIndex);
hMainDict = CreateFile(LEX_FILE_NAME, GENERIC_WRITE, 0, 0,
CREATE_ALWAYS, FILE_FLAG_SEQUENTIAL_SCAN, 0);
memcpy(lpbuf, &s_DictHeader, sizeof(_DictHeader));
WriteFile(hMainDict, lpbuf, DICT_HEADER_SIZE, &writtenBytes, NULL);
_ASSERT(DICT_HEADER_SIZE==writtenBytes);
dwCurBuf = 0;
//
for (int i=0; i<TOTAL_NUMBER_OF_HANGUL_MAPPING; i++) {
BYTE iSenseLen;
pIndexTbl[i].wcHangul = HangulToHanjaTables[i].wUniHangul;
pIndexTbl[i].wNumOfK0 = HangulToHanjaTables[i].iNumOfK0;
pIndexTbl[i].wNumOfK1 = HangulToHanjaTables[i].iNumOfK1;
pIndexTbl[i].iOffset = dwCurBuf;
for (int iCurIndex=0;
iCurIndex<HangulToHanjaTables[i].iNumOfK0 + HangulToHanjaTables[i].iNumOfK1;
iCurIndex++)
{
// Set Hanja to Hangul table offset
if (fIsHanja(HangulToHanjaTables[i].pHanjaEntry[iCurIndex].wUnicode))
HanjaToHangulTables[iCurHanjaToHangul++].iOffset = dwCurBuf;
*(WCHAR*)(lpEntryBuffer+dwCurBuf) = HangulToHanjaTables[i].pHanjaEntry[iCurIndex].wUnicode;
dwCurBuf += 2;
// number of byte !!
iSenseLen = lstrlen(HangulToHanjaTables[i].pHanjaEntry[iCurIndex].szSense)*2;
*(BYTE*)(lpEntryBuffer+dwCurBuf) = iSenseLen;
dwCurBuf++;
if (iSenseLen)
{
lstrcpy((LPTSTR)(lpEntryBuffer+dwCurBuf), HangulToHanjaTables[i].pHanjaEntry[iCurIndex].szSense);
dwCurBuf += iSenseLen;
}
}
}
ASSERT(iCurHanjaToHangul == TOTAL_NUMBER_OF_HANJA);
WriteFile(hMainDict, pIndexTbl, TOTAL_NUMBER_OF_HANGUL_MAPPING*sizeof(_LexIndex),
&writtenBytes, NULL);
qsort( (void *)HanjaToHangulTables, (size_t)iCurHanjaToHangul, sizeof(HanjaToHangulIndex), compare );
WriteFile(hMainDict, HanjaToHangulTables, TOTAL_NUMBER_OF_HANJA*sizeof(HanjaToHangulIndex),
&writtenBytes, NULL);
WriteFile(hMainDict, lpEntryBuffer, dwCurBuf, &writtenBytes, NULL);
/*
WriteFile(hMainDict, lpSilsaBuffer, SilsaDictSize, &writtenBytes, NULL);
_ASSERT(SilsaDictSize==writtenBytes);
WriteFile(hMainDict, lpHeosaBuffer, HeosaDictSize, &writtenBytes, NULL);
_ASSERT(HeosaDictSize==writtenBytes);
WriteFile(hMainDict, lpOyongBuffer, OyongDictSize, &writtenBytes, NULL);
_ASSERT(OyongDictSize==writtenBytes);
*/
CloseHandle(hMainDict);
GlobalFree(hEntryBuffer);
delete [] lpbuf;
delete [] pIndexTbl;
}
void DeleteAllLexTable()
{
for (int i=0; i<TOTAL_NUMBER_OF_HANGUL_MAPPING; i++) {
delete [] HangulToHanjaTables[s_iCurHangulIndex].pHanjaEntry;
HangulToHanjaTables[s_iCurHangulIndex].pHanjaEntry = 0;
}
}