#include "stdafx.h" #include "HanjaLex.h" #include "LexHeader.h" #define UNICODE_CJK_UNIFIED_IDEOGRAPHS_START 0x4E00 #define UNICODE_CJK_UNIFIED_IDEOGRAPHS_END 0x9FFF #define UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS_START 0xF900 #define UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS_END 0xFAFF inline BOOL fIsHanja(WCHAR wcCh) { return (wcCh >= UNICODE_CJK_UNIFIED_IDEOGRAPHS_START && wcCh <= UNICODE_CJK_UNIFIED_IDEOGRAPHS_END) || (wcCh >= UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS_START && wcCh <= UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS_END); } // This data extracted from Hanja MDB static WORD wNumOfHanjaMap[TOTAL_NUMBER_OF_HANGUL_MAPPING] = { 38, 27, 23, 48, 83, 94, 75, 68, 86, 56, 66, 82, 30, 18, 51, 42, 52, 48, // 987 symbols //////////////////////////////////// 44, 17, 35, 16, 35, 7, 41, 27, 2, 7, 1, 31, 18, 7, 9, 5, 3, 13, 20, 9, 15, 65, 36, 68, 12, 24, 7, 28, 1, 25, 9, 32, 10, 24, 7, 16, 1, 8, 44, 103, 10, 9, 8, 7, 16, 5, 20, 6, 32, 9, 1, 11, 25, 1, 19, 13, 6, 100, 1, 6, 1, 1, 20, 8, 13, 2, 11, 5, 8, 11, 1, 1, 1, 4, 4, 6, 24, 6, 1, 11, 7, 7, 10, 1, 3, 4, 1, 2, 1, 6, 8, 2, 2, 3, 29, 10, 29, 5, 28, 22, 1, 2, 59, 12, 13, 4, 33, 18, 8, 1, 1, 17, 18, 11, 16, 4, 12, 4, 14, 6, 1, 3, 19, 29, 13, 19, 9, 8, 3, 31, 7, 32, 14, 1, 14, 19, 24, 2, 20, 19, 4, 7, 6, 3, 3, 4, 8, 40, 21, 6, 5, 14, 8, 28, 9, 18, 23, 8, 10, 4, 16, 4, 16, 1, 39, 9, 2, 13, 16, 27, 3, 19, 3, 33, 23, 5, 25, 33, 16, 41, 25, 9, 15, 6, 12, 2, 17, 16, 6, 25, 23, 26, 1, 1, 22, 70, 1, 38, 13, 9, 86, 27, 8, 79, 8, 24, 5, 11, 11, 40, 4, 8, 7, 40, 22, 48, 21, 15, 11, 20, 13, 60, 11, 8, 3, 11, 8, 2, 82, 19, 36, 5, 4, 3, 4, 8, 12, 45, 18, 34, 6, 16, 4, 1, 1, 29, 25, 14, 12, 19, 4, 12, 31, 11, 8, 12, 17, 42, 14, 6, 13, 3, 12, 4, 2, 1, 25, 17, 58, 11, 27, 8, 52, 43, 55, 5, 17, 3, 18, 17, 26, 1, 8, 5, 14, 54, 8, 34, 52, 10, 19, 3, 2, 41, 5, 39, 87, 8, 15, 7, 6, 20, 2, 11, 6, 4, 29, 49, 11, 35, 11, 15, 6, 5, 52, 21, 8, 9, 6, 56, 23, 8, 46, 32, 81, 11, 20, 9, 74, 40, 80, 5, 3, 3, 27, 9, 1, 62, 2, 34, 2, 5, 3, 2, 5, 14, 49, 6, 44, 20, 3, 9, 7, 23, 10, 23, 7, 26, 41, 14, 10, 9, 24, 37, 20, 18, 15, 12, 21, 56, 12, 5, 15, 1, 11, 51, 17, 3, 4, 9, 9, 17, 7, 1, 42, 4, 5, 3, 16, 1, 2, 3, 27, 25, 15, 3, 7, 6, 9, 21, 3, 3, 1, 4, 3, 10, 7, 9, 1, 3, 1, 30, 10, 5, 16, 8, 1, 19, 3, 11, 14, 40, 6, 27, 3, 7, 11, 1, 22, 3, 22, 11, 23, 3, 17, 14, 22, 29, 4, 6, 10, 5, 7, 1, 5, 8, 33, 6, 1, 19, 25, 17, 59, 3, 11, 4, 15, 21, 10, 29, 6, 31, 29, 3, 6, 21, 24, 16, 1, 1, 7, 7, 10, 10, 4, 7, 1, 10, 9, 4, 8, 1, 31, 6 }; static HanjaPronouncEntry HangulToHanjaTables[TOTAL_NUMBER_OF_HANGUL_MAPPING]; static HanjaToHangulIndex HanjaToHangulTables[TOTAL_NUMBER_OF_HANJA]; static int s_iCurHanjaToHangul = 0; static int s_iCurHangulIndex = 0; static int s_iCurHanjaindex = 0; static _DictHeader s_DictHeader; HanjaEntry::HanjaEntry() { wUnicode = 0; memset(szSense, NULL, sizeof(szSense)); } HanjaPronouncEntry::HanjaPronouncEntry() { iNumOfK0 = iNumOfK1 = 0; wUniHangul = 0; pHanjaEntry = NULL; } void InitHanjaEntryTable() { s_iCurHangulIndex = s_iCurHanjaindex = 0; for (int i=0; iwchHanja < pHangulToHanjaEntry2->wchHanja) return -1; else if (pHangulToHanjaEntry1->wchHanja == pHangulToHanjaEntry2->wchHanja) return 0; else return 1; } const int ENTRY_BUFFER_SIZE = 1024*200; void SaveLex() { HANDLE hMainDict; DWORD writtenBytes, dwCurBuf; char *lpbuf = new char[DICT_HEADER_SIZE]; _LexIndex *pIndexTbl = new _LexIndex[TOTAL_NUMBER_OF_HANGUL_MAPPING]; HGLOBAL hEntryBuffer; LPBYTE lpEntryBuffer; INT iCurHanjaToHangul = 0; memset(lpbuf, 0, DICT_HEADER_SIZE); memset(pIndexTbl, 0, TOTAL_NUMBER_OF_HANGUL_MAPPING*sizeof(_LexIndex)); hEntryBuffer = GlobalAlloc(GHND, ENTRY_BUFFER_SIZE); lpEntryBuffer = (LPBYTE)GlobalLock(hEntryBuffer); //////////////////////////////// // LEX internal version s_DictHeader.Version = LEX_VERSION; //0x1000 - set on 97/06/27 //////////////////////////////// s_DictHeader.NumOfHangulEntry = TOTAL_NUMBER_OF_HANGUL_MAPPING; s_DictHeader.MaxNumOfHanja = MAX_NUMBER_OF_HANJA_SAME_PRONUNC; s_DictHeader.Headersize = DICT_HEADER_SIZE; // it will be used as OpenDict offset parameter s_DictHeader.iBufferStart = DICT_HEADER_SIZE + TOTAL_NUMBER_OF_HANGUL_MAPPING*sizeof(_LexIndex) + TOTAL_NUMBER_OF_HANJA*sizeof(HanjaToHangulIndex); // !!! Hanja number hard-coded now !!! s_DictHeader.uiNumofHanja = TOTAL_NUMBER_OF_HANJA; s_DictHeader. iHanjaToHangulIndex = DICT_HEADER_SIZE + TOTAL_NUMBER_OF_HANGUL_MAPPING*sizeof(_LexIndex); hMainDict = CreateFile(LEX_FILE_NAME, GENERIC_WRITE, 0, 0, CREATE_ALWAYS, FILE_FLAG_SEQUENTIAL_SCAN, 0); memcpy(lpbuf, &s_DictHeader, sizeof(_DictHeader)); WriteFile(hMainDict, lpbuf, DICT_HEADER_SIZE, &writtenBytes, NULL); _ASSERT(DICT_HEADER_SIZE==writtenBytes); dwCurBuf = 0; // for (int i=0; i