985 lines
29 KiB
C
985 lines
29 KiB
C
#include <mvopsys.h>
|
|
#include <mem.h>
|
|
#include <memory.h>
|
|
#include <time.h>
|
|
#include "mvsearch.h"
|
|
#include "common.h"
|
|
#include "index.h"
|
|
|
|
#ifdef _DEBUG
|
|
static BYTE NEAR s_aszModule[] = __FILE__; /* Used by error return functions.*/
|
|
#endif
|
|
|
|
|
|
#define FILEBUF_SIZE 0xff00
|
|
typedef struct HUGEARRAY
|
|
{
|
|
HANDLE hMem;
|
|
LPB FAR *hrgStrArray;
|
|
DWORD dwCount;
|
|
DWORD dwSize;
|
|
DWORD dwBufCount;
|
|
LPVOID pBlockMgr;
|
|
FCOMPARE lpfnCompare;
|
|
LPVOID lpSortParm;
|
|
STATUS_FUNC PrintStatusFunc;
|
|
INTERRUPT_FUNC lpfnInterrupt;
|
|
LPVOID lpInterruptParm;
|
|
ESI esi;
|
|
} HUGEARRAY, FAR *PHUGEARRAY;
|
|
|
|
PUBLIC int PASCAL FAR CompareLine(LPB, LPB, LPVOID);
|
|
|
|
/*************************************************************************
|
|
*
|
|
* INTERNAL PRIVATE FUNCTIONS
|
|
* All of them should be declared near
|
|
*************************************************************************/
|
|
|
|
PRIVATE HRESULT PASCAL FAR GetLine(LPFBI, LPB, LPV);
|
|
PRIVATE HRESULT PASCAL NEAR PrioritySortList (HFPB, PHUGEARRAY, LPVOID);
|
|
PUBLIC HRESULT PASCAL FAR QueueCompare (LPESB, LPESB, WORD);
|
|
PRIVATE DWORD PASCAL NEAR EsbBlockFill (LPESI lpesi, LPESB lpesb,
|
|
PHRESULT phr);
|
|
PRIVATE VOID PASCAL NEAR PriorityQueueUp (LPESB FAR *lrgPriorityQueue,
|
|
FCOMPARE fCompare, LPVOID lpParm, WORD index);
|
|
PRIVATE HRESULT PASCAL NEAR HugeArrayAddWord(HUGEARRAY FAR *phugeArray,
|
|
LPB pLineBuf);
|
|
PRIVATE HRESULT NEAR PASCAL HugeArrayFlush (PHUGEARRAY pHugeArray);
|
|
PRIVATE HRESULT PASCAL NEAR FlushISI (PHUGEARRAY pHugeArray);
|
|
PRIVATE VOID PASCAL NEAR PriorityQueueFree (LPESI lpesi);
|
|
|
|
/*************************************************************************
|
|
* @doc EXTERNAL API
|
|
* @func HRESULT PASCAL FAR | FileSort |
|
|
* Given a text file, this function will sort all the lines into
|
|
* specified order. The text file will be overwritten and replaced
|
|
* by the new sorted file
|
|
* @parm LPB | Filename |
|
|
* File to be sorted. It will be overwritten/replaced with the new
|
|
* sorted file
|
|
* @parm STATUS_FUNC | PrintStatusFunc |
|
|
* Callback messaging function to display status message
|
|
* @parm INTERRUPT_FUNC | lpfnInterrupt |
|
|
* Callback interrupt function to stop the file sort.
|
|
* @parm LPVOID | lpInteruptParm |
|
|
* Parameter to interrupt function
|
|
* @parm FNSORT | fnSort |
|
|
* Sorting function to be used for this file. If not specified
|
|
* the default sort function will be used, which consists of case
|
|
* sesitive comparison
|
|
* @parm LPVOID | lpSortParm |
|
|
* Any extra information that may be needed by the sorting function
|
|
* @parm BOOL | fUseSortFunc |
|
|
* Flag to denote to use user sorting function if set
|
|
* @rdesc S_OK or other errors
|
|
*************************************************************************/
|
|
HRESULT EXPORT_API PASCAL FAR FileSort (HFPB hfpb, LPB Filename,
|
|
STATUS_FUNC PrintStatusFunc, INTERRUPT_FUNC lpfnInterrupt,
|
|
LPV lpInterruptParm, FNSORT fnSort, LPVOID lpSortParm,
|
|
FNSCAN fnScan, LPVOID lpScanParam)
|
|
{
|
|
HRESULT fRet = S_OK;
|
|
BYTE OutputBuffer[cbMAX_PATH + 50];
|
|
BYTE count = 0;
|
|
HFPB hfpbIn;
|
|
LPFBI lpfbi;
|
|
HANDLE hBuf;
|
|
LPB pLineBuf;
|
|
HUGEARRAY hugeArray;
|
|
FNSCAN fnScanInternal = GetLine;
|
|
|
|
// Clear out all fields in hugeArray
|
|
MEMSET (&hugeArray, 0, sizeof (HUGEARRAY));
|
|
|
|
// Allocate a huge buffer to store the strings
|
|
if ((hugeArray.hMem = _GLOBALALLOC (DLLGMEM_ZEROINIT,
|
|
200000 * sizeof (LPB))) == NULL)
|
|
return(E_OUTOFMEMORY);
|
|
|
|
hugeArray.hrgStrArray = (LPB FAR *)_GLOBALLOCK (hugeArray.hMem);
|
|
hugeArray.dwBufCount = hugeArray.dwCount = 0;
|
|
hugeArray.dwSize = 200000;
|
|
|
|
if ((hugeArray.pBlockMgr = BlockInitiate (FILEBUF_SIZE, 0, 0, 0)) == NULL)
|
|
{
|
|
fRet = E_OUTOFMEMORY;
|
|
exit0:
|
|
_GLOBALUNLOCK(hugeArray.hMem);
|
|
_GLOBALFREE(hugeArray.hMem);
|
|
return(fRet);
|
|
}
|
|
|
|
/* Set the comparison fucntion */
|
|
if (fnSort)
|
|
{
|
|
hugeArray.lpfnCompare = (FCOMPARE) fnSort;
|
|
hugeArray.lpSortParm = (LPVOID) lpSortParm;
|
|
}
|
|
else
|
|
{
|
|
hugeArray.lpfnCompare = CompareLine;
|
|
hugeArray.lpSortParm = NULL;
|
|
}
|
|
hugeArray.PrintStatusFunc = PrintStatusFunc;
|
|
hugeArray.lpfnInterrupt = lpfnInterrupt;
|
|
hugeArray.lpInterruptParm = lpInterruptParm;
|
|
|
|
/* Open the file */
|
|
if ((hfpbIn = FileOpen (hfpb, Filename,
|
|
hfpb == NULL ? REGULAR_FILE : FS_SUBFILE, READ, NULL)) == 0)
|
|
{
|
|
fRet = E_NOTEXIST;
|
|
exit1:
|
|
BlockFree (hugeArray.pBlockMgr);
|
|
goto exit0;
|
|
}
|
|
|
|
/* Allocate a file buffer associated with the input file */
|
|
if ((lpfbi = FileBufAlloc (hfpbIn, FILEBUF_SIZE)) == NULL)
|
|
{
|
|
fRet = E_OUTOFMEMORY;
|
|
|
|
exit2:
|
|
FileClose (hfpbIn);
|
|
goto exit1;
|
|
}
|
|
|
|
if ((hBuf = _GLOBALALLOC (DLLGMEM_ZEROINIT, 0xffff)) == NULL)
|
|
{
|
|
fRet = E_OUTOFMEMORY;
|
|
exit3:
|
|
FileBufFree (lpfbi);
|
|
goto exit2;
|
|
}
|
|
|
|
if (PrintStatusFunc)
|
|
{
|
|
wsprintf (OutputBuffer, "Sort file: %s", Filename);
|
|
(*PrintStatusFunc)(OutputBuffer);
|
|
}
|
|
|
|
pLineBuf = (LPB)_GLOBALLOCK (hBuf);
|
|
|
|
/* Set the scan fucntion */
|
|
if (fnScan)
|
|
fnScanInternal = fnScan;
|
|
|
|
for (;;)
|
|
{
|
|
if ((++count & 0x7f) == 0)
|
|
{
|
|
if (lpfnInterrupt)
|
|
(*lpfnInterrupt)(lpInterruptParm);
|
|
}
|
|
if ((fRet = (*fnScanInternal)(lpfbi, pLineBuf, NULL)) != S_OK)
|
|
{
|
|
exit4:
|
|
_GLOBALUNLOCK(hBuf);
|
|
_GLOBALFREE(hBuf);
|
|
goto exit3;
|
|
}
|
|
if (*(LPW)pLineBuf == 0) /* EOF */
|
|
break;
|
|
|
|
if ((fRet = HugeArrayAddWord(&hugeArray, pLineBuf)) != S_OK)
|
|
goto exit4;
|
|
}
|
|
|
|
// Sort and flush any records in the tree to disk
|
|
fRet = HugeArrayFlush (&hugeArray);
|
|
|
|
FileBufFree(hugeArray.esi.lpfbiTemp);
|
|
FileClose (hugeArray.esi.hfpb);
|
|
|
|
// Free all memory blocks.
|
|
BlockFree(hugeArray.pBlockMgr);
|
|
FileBufFree (lpfbi);
|
|
_GLOBALUNLOCK(hugeArray.hMem);
|
|
_GLOBALFREE(hugeArray.hMem);
|
|
_GLOBALUNLOCK(hBuf);
|
|
_GLOBALFREE(hBuf);
|
|
|
|
/* Close the input file */
|
|
FileClose (hfpbIn);
|
|
|
|
if (fRet != S_OK)
|
|
return fRet;
|
|
|
|
/* Now perform external sort */
|
|
if (PrintStatusFunc)
|
|
(*PrintStatusFunc)("Merge Sort Phase");
|
|
|
|
if ((hfpbIn = FileOpen (hfpb, Filename,
|
|
hfpb == NULL ? REGULAR_FILE : FS_SUBFILE, READ_WRITE, NULL)) == 0)
|
|
{
|
|
fRet = E_ASSERT;
|
|
goto exit0;
|
|
}
|
|
|
|
fRet = PrioritySortList (hfpbIn, &hugeArray, lpSortParm);
|
|
FileClose (hfpbIn);
|
|
return fRet;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
* @doc INTERNAL PRIVATE
|
|
* @func HRESULT PASCAL FAR | GetLine |
|
|
* The function will extract a line from a text file. The output line
|
|
* will have the following format
|
|
* - 2 bytes: word length
|
|
* - The word it self
|
|
* - 0 terminated
|
|
* @parm LPFBI | lpfbi |
|
|
* File buffer for flushing out the word
|
|
* @parm LPB lpbOut |
|
|
* Buffer to contain the word. The following assumptions are made:
|
|
* - The size of the buffer is large enough to handle any line.
|
|
* There is no checking for buffer overflow
|
|
* @parm LPV | Not used
|
|
* @rdesc S_OK or other errors
|
|
*************************************************************************/
|
|
PRIVATE HRESULT PASCAL FAR GetLine(LPFBI lpfbi, LPB lpbOut, LPV lpv)
|
|
{
|
|
LPB lpbLimit = lpfbi->lrgbBuf + lpfbi->cbBuf;
|
|
LPB lpbIn = lpfbi->lrgbBuf + lpfbi->ibBuf;
|
|
LPB lpbSaved = lpbOut;
|
|
ERRB errb;
|
|
|
|
*(LPW)lpbOut = 0;
|
|
lpbOut += sizeof(WORD);
|
|
|
|
for (;; lpbIn++)
|
|
{
|
|
if (lpbIn >= lpbLimit)
|
|
{
|
|
lpfbi->ibBuf = (WORD)(lpbIn - lpfbi->lrgbBuf);
|
|
if (FileBufFill (lpfbi, &errb) == cbIO_ERROR)
|
|
return errb;
|
|
|
|
lpfbi->ibBuf = 0;
|
|
lpbIn = lpfbi->lrgbBuf;
|
|
lpbLimit = lpfbi->lrgbBuf + lpfbi->cbBuf;
|
|
|
|
/* EOF */
|
|
if (lpfbi->ibBuf == lpfbi->cbBuf)
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* Update the buffer */
|
|
if ((*lpbOut++ = *lpbIn) == '\n')
|
|
{
|
|
lpbIn++;
|
|
break;
|
|
}
|
|
}
|
|
|
|
*lpbOut = 0;
|
|
*(LPW)lpbSaved = (WORD)(lpbOut - lpbSaved - sizeof(WORD));
|
|
lpfbi->ibBuf = (WORD)(lpbIn - lpfbi->lrgbBuf);
|
|
return S_OK;
|
|
}
|
|
|
|
PRIVATE HRESULT PASCAL NEAR PrioritySortList (HFPB hfpb, PHUGEARRAY pHugeArray,
|
|
LPVOID lpParm)
|
|
{
|
|
LPFBI lpfbiOut;
|
|
HRESULT fRet;
|
|
LPESB lpesb; // Pointer to the current queue element.
|
|
LST lstWord;
|
|
LPB lpbCurPtr;
|
|
LPB lpbBufLimit;
|
|
LPB lpbBufStart;
|
|
LPESI lpesi;
|
|
LPESB FAR *lrgPriorityQueue;
|
|
WORD wLen;
|
|
|
|
HANDLE hQueue; // Handle to the word-key queue, which
|
|
// is allocated in global memory.
|
|
|
|
/* Get the pointer to external sort info block */
|
|
lpesi = &pHugeArray->esi;
|
|
|
|
/* Open the internal sort file */
|
|
if ((lpesi->hfpb = FileOpen (NULL, pHugeArray->esi.aszTempName,
|
|
REGULAR_FILE, READ, NULL)) == 0)
|
|
return(E_INVALIDARG);
|
|
|
|
|
|
if ((lpfbiOut = FileBufAlloc (hfpb, FILEBUF_SIZE)) == NULL)
|
|
{
|
|
fRet = E_OUTOFMEMORY;
|
|
exit0:
|
|
FileClose (lpesi->hfpb);
|
|
FileUnlink (NULL, pHugeArray->esi.aszTempName, REGULAR_FILE);
|
|
PriorityQueueFree(lpesi);
|
|
return fRet;
|
|
}
|
|
|
|
/*
|
|
* Allocate a priority queue array. The size of the array
|
|
* is the number of external sort info blocks plus 1, since
|
|
* location 0 is not used
|
|
*/
|
|
if ((lpesi->hPriorityQueue = hQueue = _GLOBALALLOC (DLLGMEM_ZEROINIT,
|
|
(DWORD)(lpesi->cesb + 1) * sizeof(LPB))) == NULL)
|
|
{
|
|
fRet = E_OUTOFMEMORY;
|
|
exit1:
|
|
FileBufFree (lpfbiOut);
|
|
goto exit0;
|
|
}
|
|
|
|
lrgPriorityQueue = lpesi->lrgPriorityQueue =
|
|
(LPESB FAR *)_GLOBALLOCK(hQueue);
|
|
|
|
/* Queue initialization. */
|
|
|
|
if ((fRet = PriorityQueueCreate (lpesi, (FCOMPARE)pHugeArray->lpfnCompare,
|
|
lpParm)) != S_OK)
|
|
{
|
|
exit03:
|
|
if (hQueue != NULL)
|
|
{
|
|
_GLOBALUNLOCK(hQueue);
|
|
_GLOBALFREE(hQueue);
|
|
}
|
|
lpesi->hPriorityQueue = NULL;
|
|
goto exit1;
|
|
}
|
|
|
|
/* Get the pointer to the I/O buffer. I am allocating 256 bytes
|
|
* for data overflow, which should be more than enough to handle
|
|
* a word. The usage of lpbBufLimit will decrease the number of
|
|
* buffer overflow check
|
|
*/
|
|
|
|
lpbBufLimit = (lpbCurPtr = lpfbiOut->lrgbBuf) + lpfbiOut->cbBufSize;// - 256;
|
|
lpbBufStart = lpfbiOut->lrgbBuf;
|
|
|
|
for (;lpesi->uiQueueSize;)
|
|
{
|
|
|
|
/*
|
|
* If here, the queue has something in it. Grab
|
|
* the head of the queue and process. Remember that
|
|
* the first byte is only the record length
|
|
*/
|
|
lpesb = (LPESB)lrgPriorityQueue[1];
|
|
lstWord = (LST)lpesb->lrgbMem + lpesb->ibBuf;
|
|
|
|
/* The first2 bytes are the word's length. */
|
|
wLen = *(LPW)lstWord;
|
|
|
|
if (lpbCurPtr + wLen >= lpbBufLimit)
|
|
{
|
|
lpfbiOut->ibBuf = (WORD) (lpbCurPtr - lpbBufStart);
|
|
if ((fRet = FileBufFlush (lpfbiOut)) != S_OK)
|
|
return fRet;
|
|
lpbCurPtr = lpfbiOut->lrgbBuf;
|
|
}
|
|
MEMCPY (lpbCurPtr, lstWord + sizeof(WORD), wLen);
|
|
lpbCurPtr += wLen;
|
|
|
|
/*
|
|
* Get a new occurence to replace the one I pulled from
|
|
* the head of the queue.
|
|
*/
|
|
if ((fRet = PriorityQueueRemove(lpesi,
|
|
(FCOMPARE)pHugeArray->lpfnCompare, lpParm)) != S_OK)
|
|
return fRet;
|
|
}
|
|
|
|
/* Update the offset, for flushing */
|
|
lpfbiOut->ibBuf = (WORD)(lpbCurPtr - lpfbiOut->lrgbBuf);
|
|
fRet = FileBufFlush (lpfbiOut);
|
|
goto exit03;
|
|
}
|
|
|
|
PUBLIC int PASCAL FAR CompareLine (LPB lpb1, LPB lpb2, LPVOID lpUnused)
|
|
{
|
|
|
|
register int diff;
|
|
|
|
/* Skip word length */
|
|
lpb1 += sizeof(WORD);
|
|
lpb2 += sizeof(WORD);
|
|
|
|
while (*lpb1 && *lpb2)
|
|
{
|
|
if (diff = *lpb1 - *lpb2 )
|
|
{
|
|
return diff;
|
|
}
|
|
lpb1++;
|
|
lpb2++;
|
|
}
|
|
return *lpb1 - *lpb2;
|
|
}
|
|
|
|
PUBLIC HRESULT PASCAL FAR PriorityQueueCreate (LPESI lpesi, FCOMPARE fCompare,
|
|
LPVOID lpParm)
|
|
{
|
|
|
|
WORD uiQueueSize; /* Final queue size. */
|
|
LPESB lpesb; /* Scratch pointer */
|
|
HRESULT fRet = S_OK;
|
|
LPESB FAR * lrgPriorityQueue = lpesi->lrgPriorityQueue;
|
|
ERRB errb;
|
|
|
|
|
|
/*
|
|
* Initialize all ESB buffers.
|
|
*/
|
|
uiQueueSize = 0;
|
|
for (lpesb = lpesi->lpesbRoot; lpesb != NULL;
|
|
lpesb = lpesb->lpesbNext)
|
|
{
|
|
|
|
/* Allocate the buffer */
|
|
|
|
if ((lpesb->hMem = _GLOBALALLOC (DLLGMEM, FILEBUF_SIZE)) == NULL)
|
|
{
|
|
PriorityQueueFree(lpesi);
|
|
return SetErrCode (&errb, fRet = E_OUTOFMEMORY);
|
|
}
|
|
|
|
lpesb->lrgbMem = (LPB)_GLOBALLOCK(lpesb->hMem);
|
|
|
|
/* Mimic an out-of-data to force reading */
|
|
lpesb->ibBuf = lpesb->dwEsbSize = FILEBUF_SIZE;
|
|
|
|
/* Read the data, lpesb->lfo, lpesb->ibBuf will be updated
|
|
* by the call
|
|
*/
|
|
if (EsbBlockFill (lpesi, lpesb, &errb) == cbIO_ERROR)
|
|
{
|
|
PriorityQueueFree(lpesi);
|
|
return errb;
|
|
}
|
|
|
|
/* Sanity check, make sure that we didn't read pass the
|
|
* block.
|
|
*/
|
|
if (FoCompare(lpesb->lfo,lpesb->lfoMax)>0)
|
|
{
|
|
PriorityQueueFree(lpesi);
|
|
return SetErrCode (&errb, E_ASSERT);
|
|
}
|
|
|
|
/* Update the priority queue */
|
|
uiQueueSize ++; // Grow queue.
|
|
|
|
/* Add to the priority queue */
|
|
lrgPriorityQueue[uiQueueSize] = lpesb;
|
|
|
|
PriorityQueueUp (lrgPriorityQueue, fCompare, lpParm, uiQueueSize);
|
|
}
|
|
|
|
lpesi->uiQueueSize = uiQueueSize;
|
|
return S_OK;
|
|
}
|
|
|
|
PRIVATE VOID PASCAL NEAR PriorityQueueFree (LPESI lpesi)
|
|
{
|
|
LPESB lpesb; /* Scratch pointer */
|
|
LPESB lpesbNext;
|
|
|
|
for (lpesb = lpesi->lpesbRoot; lpesb != NULL; lpesb = lpesbNext)
|
|
{
|
|
lpesbNext = lpesb->lpesbNext;
|
|
_GLOBALUNLOCK(lpesb->hMem);
|
|
_GLOBALFREE(lpesb->hMem);
|
|
GlobalLockedStructMemFree (lpesb);
|
|
}
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
*
|
|
* @doc INTERNAL
|
|
*
|
|
* @func VOID | PriorityQueueUp |
|
|
* The function restore the heap condition of a PQ, ie. the parent
|
|
* node must be less than the children. When a node is inserted
|
|
* at the bottom, the heap condition may be violated, if the node
|
|
* is smaller than its parent. In this case the nodes have to
|
|
* be switched
|
|
*
|
|
* @parm LPESB FAR * | lrgPriorityQueue |
|
|
* PQ array
|
|
*
|
|
* @parm FCOMPARE | fCompare |
|
|
* Function used to compare the node
|
|
*
|
|
* @parm LPVOID | lpParm |
|
|
* Parameter to be used with fCompare
|
|
*
|
|
* @parm WORD | index |
|
|
* Index of the inserted node
|
|
*
|
|
*************************************************************************/
|
|
|
|
PRIVATE VOID PASCAL NEAR PriorityQueueUp (LPESB FAR *lrgPriorityQueue,
|
|
FCOMPARE fCompare, LPVOID lpParm, WORD index)
|
|
{
|
|
LPESB lpesbTemp; /* Pointer to the inserted node */
|
|
LPESB lpesbHalf; /* Pointer to the parent node */
|
|
WORD uiHalf; /* Index of the parent's node */
|
|
|
|
lpesbTemp = lrgPriorityQueue [index];
|
|
|
|
uiHalf = index/2;
|
|
lpesbHalf = lrgPriorityQueue [uiHalf];
|
|
|
|
/* If the parent node is greated than the child, then exchange the
|
|
* nodes, The condition uiHalf != index makes sure that we stop
|
|
* at node 0 (top node)
|
|
*/
|
|
while (uiHalf && (*fCompare)(lpesbHalf->lrgbMem + lpesbHalf->ibBuf,
|
|
lpesbTemp->lrgbMem + lpesbTemp->ibBuf, lpParm) > 0)
|
|
{
|
|
lrgPriorityQueue [index] = lpesbHalf;
|
|
index = uiHalf;
|
|
uiHalf = index/2;
|
|
lpesbHalf = lrgPriorityQueue [uiHalf];
|
|
}
|
|
lrgPriorityQueue[index] = lpesbTemp;
|
|
}
|
|
|
|
/*************************************************************************
|
|
*
|
|
* @doc INTERNAL
|
|
*
|
|
* @func VOID | PriorityQueueDown |
|
|
* The function restore the heap condition of a PQ, ie. the parent
|
|
* node must be less than the children. When the top node is removed
|
|
* the heap condition may be violated, if the resulting node
|
|
* is greater than its children. In this case the nodes have to
|
|
* be switched
|
|
*
|
|
* @parm LPESI | lpesi |
|
|
* Pointer to external sort info, which contains all info
|
|
*
|
|
* @parm FCOMPARE | fCompare |
|
|
* Function used to compare the node
|
|
*
|
|
* @parm LPVOID | lpParm |
|
|
* Parameter to be used with fCompare
|
|
*
|
|
* @parm WORD | index |
|
|
* Index of the inserted node
|
|
*
|
|
*************************************************************************/
|
|
|
|
PRIVATE VOID PriorityQueueDown (LPESI lpesi, FCOMPARE fCompare, LPVOID lpParm)
|
|
{
|
|
LPESB FAR *lrgPriorityQueue;
|
|
int CurIndex;
|
|
int ChildIndex;
|
|
int MaxCurIndex;
|
|
int MaxChildIndex;
|
|
LPESB lpesbSaved;
|
|
LPESB lpesbTemp;
|
|
LPESB lpesbChild;
|
|
|
|
lrgPriorityQueue = lpesi->lrgPriorityQueue;
|
|
lpesbSaved = lrgPriorityQueue[1];
|
|
MaxCurIndex = (MaxChildIndex = lpesi->uiQueueSize) / 2;
|
|
|
|
for (CurIndex = 1; CurIndex <= MaxCurIndex; CurIndex = ChildIndex)
|
|
{
|
|
|
|
/* Get child index */
|
|
ChildIndex = CurIndex * 2;
|
|
|
|
/* Find the minimum of the two children */
|
|
if (ChildIndex < MaxChildIndex)
|
|
{
|
|
if ((lpesbTemp = lrgPriorityQueue[ChildIndex + 1]) != NULL)
|
|
{
|
|
|
|
lpesbChild = lrgPriorityQueue[ChildIndex];
|
|
|
|
/* The two children exist. Take the smallest */
|
|
|
|
if ((*fCompare)(lpesbChild->lrgbMem + lpesbChild->ibBuf,
|
|
lpesbTemp->lrgbMem + lpesbTemp->ibBuf, lpParm) >= 0)
|
|
ChildIndex++;
|
|
}
|
|
}
|
|
|
|
if (ChildIndex > MaxChildIndex)
|
|
break;
|
|
|
|
/* If the parent's node is less than the child, then break
|
|
* (heap condition met)
|
|
*/
|
|
|
|
lpesbTemp = lrgPriorityQueue [ChildIndex];
|
|
|
|
if ((*fCompare)(lpesbSaved->lrgbMem + lpesbSaved->ibBuf,
|
|
lpesbTemp->lrgbMem + lpesbTemp->ibBuf, lpParm) < 0)
|
|
break;
|
|
|
|
/* Replace the node */
|
|
lrgPriorityQueue [CurIndex] = lpesbTemp;
|
|
}
|
|
|
|
lrgPriorityQueue [CurIndex] = lpesbSaved;
|
|
}
|
|
|
|
PUBLIC HRESULT PASCAL FAR PriorityQueueRemove (LPESI lpesi, FCOMPARE fCompare,
|
|
LPVOID lpParm)
|
|
{
|
|
LPESB lpesb;
|
|
LPB lpbCurPtr;
|
|
SHORT reclen;
|
|
DWORD index;
|
|
LPESB FAR *lrgPriorityQueue;
|
|
ERRB errb;
|
|
|
|
/* Get all appropriate pointers */
|
|
lpesb = (lrgPriorityQueue = lpesi->lrgPriorityQueue)[1];
|
|
lpbCurPtr = lpesb->lrgbMem + (index = lpesb->ibBuf);
|
|
|
|
reclen = *(LPW)lpbCurPtr + sizeof(WORD); /* Current record's length */
|
|
index += reclen; /* Should point to location of next record */
|
|
lpesb->ibBuf = index;
|
|
|
|
#ifdef _DEBUG
|
|
/* Make sure that we did not pass the buffer's limit */
|
|
if (index > lpesb->dwEsbSize)
|
|
return SetErrCode (&errb, E_ASSERT);
|
|
#endif
|
|
|
|
/* Skip this record */
|
|
lpbCurPtr += reclen;
|
|
|
|
if (index < lpesb->dwEsbSize)
|
|
{
|
|
/* We may have some data left in the buffer, so check
|
|
* the next record, make sure that it is complete. Refill the
|
|
* buffer if necessary
|
|
*/
|
|
|
|
if (index + *(LPW)lpbCurPtr + sizeof(WORD) > lpesb->dwEsbSize)
|
|
{
|
|
|
|
/* Fill the data buffer */
|
|
if (EsbBlockFill (lpesi, lpesb, &errb) == cbIO_ERROR)
|
|
return errb;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* Fill the data buffer */
|
|
if (EsbBlockFill (lpesi, lpesb, &errb) == cbIO_ERROR)
|
|
return errb;
|
|
}
|
|
|
|
/* Check for data */
|
|
if (lpesb->ibBuf >= lpesb->dwEsbSize)
|
|
{
|
|
|
|
DWORD size;
|
|
|
|
/* This block runs out of data, just replace it with the
|
|
* last block in the array
|
|
*/
|
|
lrgPriorityQueue[1] = lrgPriorityQueue [size = lpesi->uiQueueSize];
|
|
lrgPriorityQueue [size--] = NULL;
|
|
lpesi->uiQueueSize = size ;
|
|
}
|
|
|
|
/* Now fix the heap condition */
|
|
PriorityQueueDown (lpesi, fCompare, lpParm);
|
|
return S_OK;
|
|
}
|
|
|
|
PRIVATE DWORD PASCAL NEAR EsbBlockFill (LPESI lpesi, LPESB lpesb,
|
|
PHRESULT phr)
|
|
{
|
|
LPB lpbStart;
|
|
LPB lpbCurPtr;
|
|
DWORD cbByteRead;
|
|
DWORD cbByteLeft;
|
|
|
|
lpbStart = lpesb->lrgbMem;
|
|
lpbCurPtr = lpesb->lrgbMem + lpesb->ibBuf;
|
|
|
|
/* Check to see how many bytes we have left */
|
|
if (cbByteLeft = lpesb->dwEsbSize - lpesb->ibBuf)
|
|
{
|
|
/* We have some leftover data. Just copied them to the
|
|
* beginning of the buffer
|
|
*/
|
|
MEMCPY (lpbStart, lpbCurPtr, cbByteLeft);
|
|
lpbStart += cbByteLeft;
|
|
}
|
|
|
|
/* Update the current index */
|
|
lpesb->ibBuf = 0;
|
|
|
|
/* Calculate how many bytes to be read in */
|
|
|
|
cbByteRead = lpesb->dwEsbSize - cbByteLeft;
|
|
if (FoCompare(FoSubFo(lpesb->lfoMax,lpesb->lfo),MakeFo(cbByteRead,0)) < 0)
|
|
cbByteRead = (WORD)(DwSubFo(lpesb->lfoMax,lpesb->lfo));
|
|
|
|
|
|
/* Update the size of the buffer */
|
|
lpesb->dwEsbSize = (WORD)cbByteLeft;
|
|
|
|
/* Read in new data */
|
|
if (cbByteRead > 0)
|
|
{
|
|
if ((cbByteRead = FileSeekRead (lpesi->hfpb, lpbStart, lpesb->lfo,
|
|
cbByteRead, phr)) == cbIO_ERROR)
|
|
return cbIO_ERROR;
|
|
|
|
/* Update the pointer */
|
|
lpesb->lfo = FoAddDw(lpesb->lfo, cbByteRead);
|
|
lpesb->dwEsbSize += (WORD)cbByteRead;
|
|
}
|
|
return cbByteRead;
|
|
}
|
|
|
|
PRIVATE HRESULT PASCAL NEAR HugeArrayAddWord(HUGEARRAY FAR *pHugeArray,
|
|
LPB pLineBuf)
|
|
{
|
|
HRESULT fRet;
|
|
|
|
if (pHugeArray->dwCount >= pHugeArray->dwSize)
|
|
{
|
|
if ((fRet = HugeArrayFlush (pHugeArray)) != S_OK)
|
|
return(fRet);
|
|
}
|
|
|
|
// The +1 is for the extra 0 at the end of the line
|
|
|
|
if ((pHugeArray->hrgStrArray[pHugeArray->dwCount] =
|
|
(LPB)BlockCopy (pHugeArray->pBlockMgr, pLineBuf,
|
|
*(LPW)pLineBuf + sizeof(WORD) + 1, 0)) == NULL)
|
|
{
|
|
if ((fRet = HugeArrayFlush (pHugeArray)) != S_OK)
|
|
return(fRet);
|
|
|
|
if ((pHugeArray->hrgStrArray[pHugeArray->dwCount] =
|
|
(LPB)BlockCopy (pHugeArray->pBlockMgr, pLineBuf,
|
|
*(LPW)pLineBuf + sizeof(WORD), 0)) == NULL)
|
|
{
|
|
return(E_ASSERT);
|
|
}
|
|
}
|
|
pHugeArray->dwCount++;
|
|
return(S_OK);
|
|
}
|
|
/*************************************************************************
|
|
*
|
|
* @doc INTERNAL INDEXING
|
|
*
|
|
* @func HRESULT PASCAL FAR | HugeArrayFlush |
|
|
* This function will perform an internal sort on the huge
|
|
* array associated with pHugeArray, and then flush out the
|
|
* result onto the disk. An external sort element is created
|
|
* to record the position of the block in the disk
|
|
*
|
|
* @parm PHUGEARRAY | pHugeArray |
|
|
* Pointer to huge array struct
|
|
*
|
|
* @rdesc S_OK, or errors if failed
|
|
*
|
|
*************************************************************************/
|
|
|
|
PRIVATE HRESULT NEAR PASCAL HugeArrayFlush (PHUGEARRAY pHugeArray)
|
|
{
|
|
LPESB lpesb; /* Pointer to a newly created external sort block */
|
|
LPESI lpesi; /* Pointer to external sort info struct */
|
|
HRESULT fRet; /* Function return value */
|
|
char TmpBuf[50];
|
|
ERRB errb;
|
|
|
|
/* If there is no data, then just return */
|
|
if (pHugeArray->dwCount == 0)
|
|
return S_OK;
|
|
|
|
/* Print the status */
|
|
if (pHugeArray->PrintStatusFunc)
|
|
{
|
|
pHugeArray->dwBufCount++;
|
|
wsprintf (TmpBuf, "QuickSort %ld strings (set # %ld)",
|
|
pHugeArray->dwCount, pHugeArray->dwBufCount);
|
|
pHugeArray->PrintStatusFunc ((LPSTR)TmpBuf);
|
|
}
|
|
|
|
/* Sort the huge array */
|
|
if ((fRet = HugeDataSort((LPVOID)pHugeArray->hrgStrArray,
|
|
pHugeArray->dwCount, pHugeArray->lpfnCompare,
|
|
pHugeArray->lpSortParm, pHugeArray->lpfnInterrupt,
|
|
pHugeArray->lpInterruptParm)) != S_OK)
|
|
return fRet;
|
|
|
|
/* Get pointer to external sort info block */
|
|
lpesi = &pHugeArray->esi;
|
|
|
|
/*
|
|
* Create external sort temporary file, if the file doesn't
|
|
* already exist. This is to store all the internal sort results
|
|
*/
|
|
|
|
if (lpesi->hfpb == NULL)
|
|
{
|
|
|
|
/* Create the external sort temp file */
|
|
|
|
(void)GETTEMPFILENAME((char)0, (LPB)"iso", (WORD)0,
|
|
lpesi->aszTempName);
|
|
|
|
if ((lpesi->hfpb = FileOpen (NULL, lpesi->aszTempName,
|
|
REGULAR_FILE, READ_WRITE, &errb)) == NULL)
|
|
return errb;
|
|
|
|
/* Allocate a temporary I/O file buffer info */
|
|
if ((lpesi->lpfbiTemp = FileBufAlloc(lpesi->hfpb, FILEBUF_SIZE)) == NULL)
|
|
return SetErrCode (&errb, E_OUTOFMEMORY);
|
|
|
|
SetFCallBack (lpesi->hfpb, pHugeArray->lpfnInterrupt,
|
|
pHugeArray->lpInterruptParm);
|
|
|
|
}
|
|
|
|
/*
|
|
* Make a new ESB (external sort block) record. All fields are 0's
|
|
*/
|
|
if ((lpesb = GlobalLockedStructMemAlloc (sizeof(ESB))) == NULL)
|
|
{
|
|
SetErrCode (&errb, E_OUTOFMEMORY);
|
|
return E_OUTOFMEMORY;
|
|
}
|
|
|
|
/* Add to the BEGINNING of the linked list */
|
|
lpesb->lpesbNext = lpesi->lpesbRoot;
|
|
lpesi->lpesbRoot = lpesb;
|
|
|
|
/* Update the number of external sort blocks */
|
|
lpesi->cesb++;
|
|
|
|
/* Print the status */
|
|
if (pHugeArray->PrintStatusFunc)
|
|
{
|
|
wsprintf (TmpBuf, "Write sorted strings");
|
|
pHugeArray->PrintStatusFunc ((LPSTR)TmpBuf);
|
|
}
|
|
|
|
/* Do the flush. In case of error, we will just return, not worrying
|
|
* about the allocated structure since it will be released by
|
|
* IndexDispose()
|
|
*/
|
|
fRet = FlushISI (pHugeArray);
|
|
return fRet;
|
|
}
|
|
|
|
|
|
/*************************************************************************
|
|
* @doc INTERNAL INDEXING
|
|
*
|
|
* @func HRESULT PASCAL NEAR | FlushISI |
|
|
* Write out the result of the internal sort to the disk. To save
|
|
* disk space, the data are semi-compacted. The external sort info
|
|
* structure's length associated with the data is updated.
|
|
*
|
|
* @parm PHUGEARRAY | pHugeArray |
|
|
* Pointer to index parameter block
|
|
*
|
|
* @rdesc S_OK, or errors if failed
|
|
*************************************************************************/
|
|
|
|
PRIVATE HRESULT PASCAL NEAR FlushISI (PHUGEARRAY pHugeArray)
|
|
{
|
|
LPFBI lpfbiTemp; /* Temporary file I/O buffer */
|
|
LPESI lpesi; /* Pointer to external sort info */
|
|
LPESB lpesb; /* Pointer to external sort block */
|
|
LST lstWord; /* Pointer to word-occurence buffer */
|
|
DWORD i; /* Scratch variable */
|
|
WORD wLen; /* Length of the current word */
|
|
LPB lpbBuf; /* Short cut of lpfbi->lpbCurPtr */
|
|
DWORD lcByteWritten; /* How many bytes have been written */
|
|
LPB lpbBufLimit; /* Buffer safety limit */
|
|
LPB HUGE *hplpbRec; /* Pointer to key */
|
|
HRESULT fRet;
|
|
|
|
/* Initialize the variables */
|
|
lpesi = &pHugeArray->esi;
|
|
lpesb = lpesi->lpesbRoot;
|
|
lcByteWritten = 0;
|
|
|
|
/* Remember the offset in the external sort temporary file at
|
|
* which data for this ESB starts.
|
|
*/
|
|
lpesb->lfo = lpesi->lfoTempOffset;
|
|
|
|
/* Get pointer to output buffer */
|
|
lpfbiTemp = lpesi->lpfbiTemp;
|
|
|
|
/* Set the buffer limit. Leave some room for overflow */
|
|
lpbBufLimit = (lpbBuf = lpfbiTemp->lrgbBuf) + FILEBUF_SIZE - sizeof(DWORD);
|
|
|
|
/*
|
|
* Write out the sorted records to the temporary file space
|
|
* managed by the ESB.
|
|
*/
|
|
|
|
for (i = pHugeArray->dwCount, hplpbRec = pHugeArray->hrgStrArray; i > 0;
|
|
i--, hplpbRec ++)
|
|
{
|
|
|
|
/* Get pointer to word data */
|
|
lstWord = (LST)*hplpbRec;
|
|
|
|
/* Get the word length */
|
|
wLen = *(LPW)lstWord;
|
|
|
|
if (lpbBuf + wLen >= lpbBufLimit)
|
|
{
|
|
// Buffer overflow. Record teh number of bytes written and
|
|
// flush it.
|
|
|
|
lcByteWritten += (lpfbiTemp->ibBuf = (WORD)(lpbBuf - lpfbiTemp->lrgbBuf));
|
|
|
|
/* Flush the buffer */
|
|
|
|
if ((fRet = FileBufFlush(lpfbiTemp)) != S_OK)
|
|
return fRet;
|
|
|
|
/* Reset lpbBuf */
|
|
lpbBuf = lpfbiTemp->lrgbBuf;
|
|
}
|
|
|
|
MEMCPY(lpbBuf, (LPB)lstWord, wLen + sizeof(WORD));
|
|
lpbBuf += wLen + sizeof(WORD);
|
|
}
|
|
|
|
/* Record the number of bytes emitted, and update ibBuf for flushing */
|
|
lcByteWritten += (lpfbiTemp->ibBuf = (WORD)(lpbBuf - lpfbiTemp->lrgbBuf));
|
|
|
|
/* Flush the buffer */
|
|
if ((fRet = FileBufFlush(lpfbiTemp)) != S_OK)
|
|
{
|
|
return SetErrCode (NULL, fRet);
|
|
}
|
|
|
|
/* Update the fields of the external sort info */
|
|
lpesb->lfoMax = (lpesi->lfoTempOffset =
|
|
FoAddDw(lpesi->lfoTempOffset, lcByteWritten));
|
|
|
|
/* Reset all the variables */
|
|
pHugeArray->dwCount = 0; /* Index of the huge array */
|
|
BlockReset(pHugeArray->pBlockMgr); /* Reset data space */
|
|
|
|
return S_OK;
|
|
}
|