windows-nt/Source/XPSP1/NT/enduser/stuff/itircl/fts/search/stopwd.c

904 lines
28 KiB
C
Raw Normal View History

2020-09-26 03:20:57 -05:00
/*************************************************************************
* *
* STOP.C *
* *
* Copyright (C) Microsoft Corporation 1990-1994 *
* All Rights reserved. *
* *
**************************************************************************
* *
* Module Intent *
* Stop list indexing and retrieval *
* *
**************************************************************************
* *
* Written By : Binh Nguyen *
* Current Owner: Binh Nguyen *
* *
*************************************************************************/
#include <mvopsys.h>
#include <orkin.h>
#include <mem.h>
#include <memory.h>
#include <io.h>
#include <mvsearch.h>
#include "common.h"
#ifdef _DEBUG
static BYTE NEAR s_aszModule[] = __FILE__; /* Used by error return functions.*/
#endif
#define cbSTOP_BUF ((CB)512) // Number of bytes read at a time
// from the stop-word file.
/*************************************************************************
*
* API FUNCTIONS
* Those functions should be exported in a .DEF file
*************************************************************************/
PUBLIC HRESULT EXPORT_API FAR PASCAL MVStopListAddWord(LPSIPB, LST);
PUBLIC HRESULT EXPORT_API FAR PASCAL MVStopListIndexLoad (HFPB, LPSIPB, LSZ);
PUBLIC LPSIPB EXPORT_API FAR PASCAL MVStopListInitiate(WORD, PHRESULT);
PUBLIC void EXPORT_API FAR PASCAL MVStopListDispose(LPSIPB);
PUBLIC HRESULT EXPORT_API FAR PASCAL MVStopListLoad(HFPB, LPSIPB, LSZ,
BREAKER_FUNC, LPV);
PUBLIC HRESULT EXPORT_API PASCAL FAR MVStopFileBuild (HFPB, LPSIPB, LSZ);
PUBLIC LPCHAIN EXPORT_API FAR PASCAL MVStopListFind(_LPSIPB lpsipb, LST lstWord);
/*************************************************************************
*
* INTERNAL PRIVATE FUNCTIONS
* All of them should be declared near
*************************************************************************/
PRIVATE WORD NEAR PASCAL GetHashKey (WORD, LST);
/*************************************************************************
*
* INTERNAL PUBLIC FUNCTIONS
* All of them should be declared far, and included in some include file
*************************************************************************/
PUBLIC HRESULT FAR PASCAL FStopCallback(LST, LST, LFO, LPV);
/*************************************************************************
* @doc API RETRIEVAL
*
* @func LPSIPB FAR PASCAL | MVStopListInitiate |
* Create and initiate a stop-word information structure
*
* @parm PHRESULT | phr |
* Pointer to error buffer.
*
* @parm WORD | wTabSize |
* Table size in DWORD. The process of stop word checking will
* be faster with larger values of dwTabSize.
*
* @rdesc the pointer to the stop-list structure if succeeded,
* NULL if failed. The error buffer will contain descriptions about
* the cause of the failure
*************************************************************************/
PUBLIC LPSIPB EXPORT_API FAR PASCAL MVStopListInitiate(WORD wTabSize,
PHRESULT phr)
{
_LPSIPB lpsipb;
if (wTabSize < HASH_SIZE)
wTabSize = HASH_SIZE;
/* Allocate a StopInfo structure */
if ((lpsipb = (_LPSIPB)GLOBALLOCKEDSTRUCTMEMALLOC(sizeof(SIPB) +
wTabSize * sizeof(LPB))) == NULL)
{
exit00:
SetErrCode(phr, E_OUTOFMEMORY);
return NULL;
}
lpsipb->HashTab = (LPCHAIN FAR *)((LPB)lpsipb + sizeof(SIPB));
/* Allocate a word block buffer */
if ((lpsipb->lpBlkMgr = BlockInitiate (WORDBUF_SIZE, 0, 0, 0)) == NULL)
{
GlobalLockedStructMemFree((LPV)lpsipb);
goto exit00;
}
lpsipb->wTabSize = wTabSize; /* Size of hash table */
lpsipb->lpfnStopListLookup = MVStopListLookup;
return (LPSIPB)lpsipb;
}
/*************************************************************************
* @doc API RETRIEVAL
*
* @func HRESULT FAR PASCAL | MVStopListAddWord |
* Add a word to a stop list
*
* @parm LPSIPB | lpsipb |
* Pointer to stop-word information structure
*
* @parm LST | lstWord |
* Pointer to 2-byte length preceded Pascal word to be added
* into the stop-word list
*
* @rdesc S_OK if succeeded
*************************************************************************/
PUBLIC HRESULT EXPORT_API FAR PASCAL MVStopListAddWord(_LPSIPB lpsipb, LST lstWord)
{
WORD wHash;
LPCHAIN lpChain;
WORD wByteUsed;
// Sanity check
if (lpsipb == NULL || lstWord == NULL)
return(E_INVALIDARG);
/* Look for the word. If it is already there then just
* return S_OK, don't add it into the list
*/
if (lpChain = MVStopListFind (lpsipb, lstWord))
{
// Don't add if already there.
lpChain->dwCount++;
return S_OK;
}
wByteUsed = *(LPUW)lstWord + 2;
#ifndef _32BIT
if (lpsipb->cbTextUsed + wByteUsed > MAX_STOPWORD_BUFSIZE) {
/* There are too many stop words */
return ERR_TOOMANYSTOPS;
}
#endif
lpsipb->cbTextUsed += wByteUsed ;
/* Copy the word into the word buffer block */
if ((lpChain = (LPCHAIN)BlockCopy (lpsipb->lpBlkMgr, lstWord, wByteUsed,
sizeof(CHAIN) - 1)) == NULL)
return E_OUTOFMEMORY;
lpChain->dwCount = 0;
/* Compute hash key */
wHash = GetHashKey(lpsipb->wTabSize, lstWord);
/* Add the word to the hash table */
CH_NEXT(lpChain) = lpsipb->HashTab[wHash];
lpsipb->HashTab[wHash] = lpChain;
return S_OK; // Function worked.
}
/*************************************************************************
* @doc API RETRIEVAL
*
* @func void FAR PASCAL | MVStopListDispose |
* Frees memory associated with a stop list.
*
* @parm LPSIPB | lpsipb |
* Pointer to stop-word information structure
*************************************************************************/
PUBLIC void EXPORT_API FAR PASCAL MVStopListDispose (_LPSIPB lpsipb)
{
if (lpsipb == NULL)
return;
/* Free the word buffer */
BlockFree(lpsipb->lpBlkMgr);
/* Free the stop info structure */
GlobalLockedStructMemFree((LPV)lpsipb);
}
/*************************************************************************
* @doc API RETRIEVAL
*
* @func HRESULT FAR PASCAL | MVStopListIndexLoad |
* Read a stop-word list stored in the subfile/dos file.
*
* @parm HFPB | hfpb |
* Handle to input file. Can be mvfs subfile or separate dos file.
*
* @parm LPSIPB | lpsipb |
* Pointer to stop-word information structure
*
* @parm LPIDX | lpidx |
* Pointer to index structure
*
* @parm LSZ | lszWordBreaker |
* Word breaker to be used
*
* @rdesc S_OK if succeeded, other errors if failed.
*************************************************************************/
/*
The strings are stored in the file in a sequence of pascal strings
*/
PUBLIC HRESULT EXPORT_API FAR PASCAL MVStopListIndexLoad (HFPB hfpbSysFile,
_LPSIPB lpsipb, LSZ lszStopFile)
{
BYTE argbInBuf[CB_STOP_BUF];
FILEOFFSET lfo;
FILEOFFSET foStart;
HFPB hfpbSubFile;
BOOL fOpenedFile;
HRESULT fRet = S_OK;
WORD cbRead;
int fLast;
LPSTOP lpStopHdr;
LPB lpWord;
WORD wOffsetInBuf;
WORD wLen;
ERRB errb;
/* Sanity check */
if (lpsipb == NULL)
return SetErrCode (NULL, E_INVALIDARG);
/* Open the subfile */
if ((fOpenedFile =
FsTypeFromHfpb(hfpbSubFile = hfpbSysFile) != FS_SUBFILE) &&
(hfpbSubFile = FileOpen
(hfpbSysFile, lszStopFile, hfpbSysFile ? FS_SUBFILE : REGULAR_FILE,
READ, &errb)) == NULL)
{
return errb;
}
// If we didn't open the file, we need to find out where the file seek
// pointer is initially so that we only seek relative to that starting
// position (i.e. the caller owns the part of the file that comes before).
foStart = (fOpenedFile ? MakeFo(0,0) :
FileSeek (hfpbSubFile, MakeFo (0, 0), wFSSeekCur, &fRet));
/* Read and check the file validity */
if (FAILED(fRet) ||
(cbRead = (WORD)FileSeekRead
(hfpbSubFile, (LPV)(lpStopHdr = (LPSTOP)argbInBuf),
FoAddFo(foStart, MakeFo(0, 0)), sizeof(STOP_HDR), &fRet))
!= sizeof(STOP_HDR))
{
exit01:
// Close file only if we were the one's who opened it.
if (fOpenedFile)
(void)FileClose(hfpbSubFile); // Return value not checked
// because the file is open
// for read-only.
return fRet;
}
/* MAC codes. They will be eliminated through optimization */
lpStopHdr->FileStamp = SWAPWORD(lpStopHdr->FileStamp);
lpStopHdr->version = SWAPWORD(lpStopHdr->version);
lpStopHdr->dwFileSize = SWAPLONG(lpStopHdr->dwFileSize);
if (lpStopHdr->FileStamp != STOP_STAMP ||
lpStopHdr->version != VERCURRENT)
{
fRet = SetErrCode(&errb, E_BADVERSION);
goto exit01;
}
/* Start at the beginning of the buffer */
wOffsetInBuf = 0;
for (lfo = FoAddFo(foStart, MakeFo(STOP_HDR_SIZE, 0));;)
{
LPB lpbCur;
WORD cbReadOurs = 0;
if ((cbRead = (WORD)FileSeekRead(hfpbSubFile,
lpbCur = ((LPB)argbInBuf + wOffsetInBuf), lfo,
CB_STOP_BUF - wOffsetInBuf, &errb)) == cbIO_ERROR)
{
SetErrCode(&errb, fRet = E_FILEREAD);
goto exit01;
}
lfo = FoAddDw(lfo, (DWORD)cbRead);
while (cbRead - cbReadOurs++ >= sizeof(WORD))
{
if (*((WORD UNALIGNED * UNALIGNED)lpbCur) == 0)
{
FILEOFFSET foCur;
// Get our current seek position.
foCur = FileSeek (hfpbSubFile, MakeFo (0, 0), wFSSeekCur, &fRet);
// We already advanced cbReadOurs by one in the loop
// condition; advance it by one more to account for
// the second byte of the NULL word. Then we move
// the seek pointer back by the difference so that we
// don't leave it past the end of our data.
FileSeek (hfpbSubFile,
FoSubFo(foCur, MakeFo(cbRead - ++cbReadOurs, 0)),
wFSSeekSet, &fRet);
ITASSERT(SUCCEEDED(fRet));
cbRead = cbReadOurs;
fLast = TRUE;
}
else
lpbCur++;
}
cbRead += wOffsetInBuf; // Catch what's left from previous scan
wOffsetInBuf = 0;
/* Add the word into the stop word list */
for (lpWord = argbInBuf; cbRead > 0;)
{
/* If the whole word has been read in, just add it to the
stop list, else we have to "reconstruct" it
*/
// erinfox: we have to byte-swap on Mac
*(WORD UNALIGNED * UNALIGNED)lpWord = SWAPWORD(*(WORD UNALIGNED * UNALIGNED)lpWord);
wLen = *(LPUW)(lpWord) + 2;
if (wLen <= cbRead)
{
/* Everything fits */
if ((fRet = MVStopListAddWord(lpsipb, lpWord)) != S_OK)
goto exit01;
cbRead -= wLen;
lpWord += wLen; /* Move to next word */
}
else
{
/* Copy the word to the beginning of the buffer */
MEMCPY(argbInBuf, lpWord, cbRead);
wOffsetInBuf = cbRead;
break;
}
}
if (fLast)
break;
}
fRet = S_OK; // Succeeded
goto exit01;
}
/*************************************************************************
* @doc API INDEX RETRIEVAL
*
* @func HRESULT FAR PASCAL | MVStopListLoad |
* Read a stop-word list from an external file. The file must have
* only one stop word per line, or else there is potential loss
* of stop words.
*
* @parm LPSIPB | lpsipb |
* Pointer to stop-word information structure
*
* @parm LSZ | lszStopFile |
* Stop word filename. This is a simple ASCII text file
*
* @parm BREAKER_FUNC | lpfnBreakFunc |
* Word breaker to be used
*
* @parm PHRESULT | phr |
* Pointer to error buffer.
*
* @rdesc S_OK if succeeded, other errors failed.
*************************************************************************/
PUBLIC HRESULT EXPORT_API FAR PASCAL MVStopListLoad(HFPB hfpbIn, _LPSIPB lpsipb,
LSZ lszStopFile, BREAKER_FUNC lpfnBreakFunc,
LPCHARTAB lpCharTab)
{
BYTE argbInBuf[cbSTOP_BUF]; // IO buffer
HFPB hfpb; // File handle
BOOL fOpenedFile;
_LPIBI lpibi; // Pointer to internal breaker info
HANDLE hbi; // Handle to internal brekaer info
HRESULT fRet; // Returned value
BRK_PARMS brkParms; // Breaker parameters structure
LPB lpStart; // Beginning of strings to be parsed
LPB lpEnd; // End of strings to be parsed
WORD wStrLength; // Bytes in string
CB cbTobeRead; // Bytes to be read
CB cbRead; // Bytes actually read
int fLast; // TRUE if this is the last read
int fGetWord; // TRUE if we get a whole word
/* Sanity check */
if (lpsipb == NULL || (lszStopFile == NULL && hfpbIn == NULL)
|| lpfnBreakFunc == NULL)
return E_INVALIDARG;
if ((fOpenedFile = FsTypeFromHfpb(hfpb = hfpbIn) != FS_SUBFILE) &&
(hfpb = (HANDLE)FileOpen
(hfpbIn, lszStopFile, hfpbIn ? FS_SUBFILE : REGULAR_FILE,
READ, &fRet)) == 0)
{
return (fRet);
}
/* Allocate a breaker info block */
if ((hbi = _GLOBALALLOC(DLLGMEM_ZEROINIT, (LCB)sizeof(IBI))) == NULL)
{
return E_OUTOFMEMORY;
}
lpibi = (_LPIBI)_GLOBALLOCK(hbi);
/* Initialize variables */
brkParms.lcbBufOffset = 0L;
brkParms.lpInternalBreakInfo = lpibi;
brkParms.lpvUser = lpsipb;
brkParms.lpfnOutWord = (FWORDCB)FStopCallback;
brkParms.lpStopInfoBlock = NULL;
brkParms.lpCharTab = lpCharTab;
cbTobeRead = cbSTOP_BUF; // Read in a buffer whole
lpStart = lpEnd = (LPB)argbInBuf; // Start & End of string
fGetWord = FALSE; // We didn't get any word yet
wStrLength = 0;
/* The idea is to break the file into sequences of lines, and pass
* each line to the word breaker. The assumption made is that we
* should only have one word per line, since various type breakers
* can only handle one word a type.
*/
for (;;)
{
cbRead = (WORD)FileRead(hfpb, lpEnd, cbTobeRead, &fRet);
if (FAILED(fRet))
{
exit01:
/* Free breaker info block */
_GLOBALUNLOCK(hbi);
_GLOBALFREE(hbi);
/* Close the file */
if (fOpenedFile)
FileClose(hfpb);
return fRet;
}
else
fLast = (cbRead != cbTobeRead);
lpEnd = lpStart;
cbRead += wStrLength; // Get what left in buffer
wStrLength = 0;
while (cbRead != (CB)-1)
{
/* Break the buffer into lines */
if (*lpEnd == '\r' || *lpEnd == '\n' || !cbRead)
{
if (wStrLength)
{
/* Process the word we got */
brkParms.lpbBuf = lpStart;
brkParms.cbBufCount = wStrLength;
if ((fRet = (*lpfnBreakFunc)((LPBRK_PARMS)&brkParms))
!= S_OK)
goto exit01;
/* Flush the breaker buffer */
brkParms.lpbBuf = NULL;
brkParms.cbBufCount = 0;
if ((fRet = (*lpfnBreakFunc)((LPBRK_PARMS)&brkParms))
!= S_OK)
goto exit01;
wStrLength = 0;
}
}
else
{
/* Update the pointer to the new word */
if (wStrLength == 0)
lpStart = lpEnd;
wStrLength++; // Increase string's length
}
cbRead--;
lpEnd++;
}
if (fLast)
break;
/* Now copy the partial string to the beginning of the buffer */
MEMCPY(argbInBuf, lpStart, wStrLength);
lpEnd = (lpStart = argbInBuf) + wStrLength;
cbTobeRead = cbSTOP_BUF - wStrLength; // Read in a buffer whole
}
if (wStrLength)
{
/* Flush the breaker buffer */
brkParms.lpbBuf = NULL;
brkParms.cbBufCount = 0;
if ((fRet = (*lpfnBreakFunc)((LPBRK_PARMS)&brkParms)) != S_OK)
goto exit01;
}
fRet = S_OK; // Succeeded
goto exit01;
}
/*************************************************************************
* @doc INTERNAL
*
* @func WORD NEAR PASCAL | GetHashKey |
* Compute the hash key of a string. This key is used for indexing
* into the stop word hash table
*
* @parm LST | lstWord |
* Pointer to a 2-byte length preceded Pascal-type string
*
* @rdesc
* Return the index into the stop words hash table
*************************************************************************/
PRIVATE WORD NEAR PASCAL GetHashKey (WORD hashSize, LST lstWord)
{
register unsigned int wHash;
register unsigned int nLength;
wHash = 0;
nLength = *(LPUW)lstWord;
lstWord += sizeof(WORD);
for (; nLength; nLength--)
{
wHash = (wHash << 1) | (wHash >> 15);
wHash ^= *lstWord++;
}
wHash %= hashSize;
return ((WORD)wHash);
}
/*************************************************************************
* @doc API RETRIEVAL INDEX
*
* @func LPCHAIN FAR PASCAL | MVStopListFind |
* This looks for a word (lstWord) in a stop-word (lpsipb)
*
* @parm LPSIPB | lpsipb |
* Pointer to stop-word list structure
*
* @parm LST | lstWord |
* Pointer to string to be looked for
*
* @rdesc Pointer to the node if found, NULL otherwise
*************************************************************************/
PUBLIC LPCHAIN EXPORT_API FAR PASCAL MVStopListFind(_LPSIPB lpsipb, LST lstWord)
{
WORD wHash; // Hash key
LPCHAIN lpChain; // Pointer to the word chain
// Sanity check
if (lpsipb == NULL || lstWord == NULL)
return(NULL);
/* Compute hash key */
wHash = GetHashKey(lpsipb->wTabSize, lstWord);
lpChain = lpsipb->HashTab[wHash];
while (lpChain)
{
if (!StringDiff2 (&CH_WORD(lpChain), lstWord))
return (lpChain);
lpChain = CH_NEXT(lpChain);
}
return (NULL);
}
/*************************************************************************
* @doc API RETRIEVAL INDEX
*
* @func HRESULT FAR PASCAL | MVStopListLookup |
* This looks for a word (lstWord) in a stop-word (lpsipb)
*
* @parm LPSIPB | lpsipb |
* Pointer to stop-word list structure
*
* @parm LST | lstWord |
* Pointer to string to be looked for
*
* @rdesc S_OK if found, E_FAIL if not, or other errors
*************************************************************************/
PUBLIC HRESULT EXPORT_API FAR PASCAL MVStopListLookup(_LPSIPB lpsipb, LST lstWord)
{
WORD wHash; // Hash key
LPCHAIN lpChain; // Pointer to the word chain
// Sanity check
if (lpsipb == NULL || lstWord == NULL)
return(E_INVALIDARG);
/* Compute hash key */
wHash = GetHashKey(lpsipb->wTabSize, lstWord);
lpChain = lpsipb->HashTab[wHash];
while (lpChain)
{
if (!StringDiff2 (&CH_WORD(lpChain), lstWord))
return (S_OK);
lpChain = CH_NEXT(lpChain);
}
return (E_FAIL);
}
/*************************************************************************
* @doc API INDEX
*
* @func HRESULT PASCAL FAR | MVStopFileBuild |
* Incorporate the stop word list into the system file
*
* @parm HFPB | hpfbSysFile |
* If non-zero, handle to an opened system file.
*
* @parm LPSIPB | lpsipb |
* Pointer to stop-word information structure
*
* @parm LSZ | lszFilename |
* If hpfbSysFile is non-zero, this is the name of the stop's subfile
* else this is a regular DOS file
*
* @rdesc S_OK if succeeded, E_FAIL if tehre is nothing to build
* or other errors
*************************************************************************/
PUBLIC HRESULT EXPORT_API PASCAL FAR MVStopFileBuild (HFPB hfpbSysFile,
_LPSIPB lpsipb, LSZ lszFilename)
{
HFPB hfpbStop; // Pointer to final index file info.
HRESULT fRet = S_OK;
STOP_HDR Stop_hdr;
HFPB hfpb = 0;
BOOL fCreatedFile;
BYTE Dummy[STOP_HDR_SIZE]; // Dummy buffer to write 0
int i;
LPCHAIN lpChain;
LST lstWord;
WORD wLen;
CB cbByteLeft;
GHANDLE hBuf;
LPB lpbBuf;
LPB lpbStart;
LPB lpbLimit;
ERRB errb;
FILEOFFSET fo;
FILEOFFSET foStart;
/* Sanity check */
if (lpsipb == NULL || (lszFilename == NULL && hfpbSysFile == NULL))
return E_INVALIDARG;
if (lpsipb->cbTextUsed == 0)
return E_FAIL; /* Nothing to build */
if ((fCreatedFile =
FsTypeFromHfpb(hfpbStop = hfpbSysFile) != FS_SUBFILE) &&
(hfpbStop = FileCreate(hfpbSysFile, lszFilename,
hfpbSysFile ? FS_SUBFILE: REGULAR_FILE, &errb)) == 0)
return errb;
// If we didn't open the file, we need to find out where the file seek
// pointer is initially so that we only seek relative to that starting
// position (i.e. the caller owns the part of the file that comes before).
foStart = (fCreatedFile ? MakeFo(0,0) :
FileSeek (hfpbStop, MakeFo (0, 0), wFSSeekCur, &fRet));
if (FAILED(fRet))
goto exit01;
/* Write out the stop file header */
Stop_hdr.FileStamp = STOP_STAMP;
Stop_hdr.version = VERCURRENT;
Stop_hdr.dwFileSize = lpsipb->cbTextUsed;
MEMSET(Dummy, 0, STOP_HDR_SIZE);
/* Write all zeroes to the header area, which is larger than the
* STOP_HDR structure.
*/
if (FileSeekWrite (hfpbStop, Dummy, FoAddFo(foStart, MakeFo (0, 0)),
STOP_HDR_SIZE, &errb) != STOP_HDR_SIZE)
{
fRet = errb;
exit01:
if (fCreatedFile)
FileClose (hfpbStop);
return(fRet);
}
if (FileSeekWrite (hfpbStop, &Stop_hdr, FoAddFo(foStart, MakeFo (0, 0)),
sizeof (STOP_HDR), &errb) != sizeof (STOP_HDR))
{
fRet = errb;
goto exit01;
}
/* Allocate a buffer to flush the data */
if ((hBuf = _GLOBALALLOC (DLLGMEM, cbByteLeft = CB_HUGE_BUF)) == NULL)
{
SetErrCode (&errb, fRet = E_OUTOFMEMORY);
goto exit01;
}
lpbBuf = lpbStart = (LPB)_GLOBALLOCK(hBuf);
lpbLimit = lpbStart + CB_HUGE_BUF - CB_MAX_WORD_LEN;
/* Seek the file to the correct offset */
fo = FoAddFo(foStart, MakeFo (STOP_HDR_SIZE, 0));
if (!FoEquals (FileSeek (hfpbStop, fo, 0, &errb), fo))
{
fRet = E_FILESEEK;
exit02:
_GLOBALUNLOCK(hBuf);
_GLOBALFREE(hBuf);
goto exit01;
}
/* Write out the buffer */
for (i = lpsipb->wTabSize - 1; i >= 0; i--)
{
for (lpChain = lpsipb->HashTab[i]; lpChain;
lpChain = CH_NEXT(lpChain))
{
lstWord = &CH_WORD (lpChain);
MEMCPY (lpbBuf, lstWord, wLen = *(WORD FAR *)lstWord + 2);
lpbBuf += wLen;
if (lpbBuf >= lpbLimit)
{
/* No more room, just flush the buffer */
FileWrite(hfpbStop, lpbStart, (DWORD)(lpbBuf - lpbStart), &errb);
if ((fRet = errb) != S_OK)
goto exit02;
lpbBuf = lpbStart;
}
}
}
/* Flush the buffer */
FileWrite (hfpbStop, lpbStart, (DWORD)(lpbBuf - lpbStart), &errb);
if ((fRet = errb) == S_OK)
{
/* Write a trailing 0 word (i.e. a NULL st) to mark
* the end of the word list.
*/
*((WORD *)lpbStart) = 0;
FileWrite (hfpbStop, lpbStart, sizeof(WORD), &errb);
fRet = errb;
}
goto exit02;
}
PUBLIC HRESULT FAR PASCAL FStopCallback(
LST lstRawWord,
LST lstNormWord,
LFO lfoWordOffset,
_LPSIPB lpsipb)
{
return MVStopListAddWord(lpsipb, lstNormWord);
}
/*************************************************************************
* @doc API RETRIEVAL
*
* @func HRESULT FAR PASCAL | MVStopListEnumWords |
* Enumerate the words in a stop list, getting a pointer to each.
*
* @parm LPSIPB | lpsipb |
* Pointer to stop-word information structure
*
* @parm LST* | plstWord |
* Indirect Pointer to 2-byte length preceded Pascal word that is
* the next word identified by *pdwWordInfo and *ppvWordInfo.
*
* @parm LONG* | plWordInfo |
* Pointer to information used to determine what the next word is
* in the stop word list. Passing -1 along with NULL for *ppvWordInfo
* means start at the beginning. On exit, this contains an appropriate
* value that can be passed in again to get the next word, provided
* that no intervening calls have been made to MVStopListAddWord.
*
* @parm LPVOID* | ppvWordInfo |
* Indirect pointer to information used to determine what the next word is
* in the stop word list. Passing NULL along with -1 for *plWordInfo
* means start at the beginning. On exit, this contains an appropriate
* value that can be passed in again to get the next word, provided
* that no intervening calls have been made to MVStopListAddWord.
*
* @rdesc S_OK if succeeded
* @rdesc E_OUTOFRANGE if there are no more words in the stop list.
*************************************************************************/
PUBLIC HRESULT EXPORT_API FAR PASCAL MVStopListEnumWords(_LPSIPB lpsipb,
LST *plstWord, LONG *plWordInfo, LPVOID *ppvWordInfo)
{
LPCHAIN lpchain = NULL;
LONG iHashChain;
if (lpsipb == NULL || plstWord == NULL ||
plWordInfo == NULL || ppvWordInfo == NULL)
return (SetErrReturn(E_POINTER));
iHashChain = *plWordInfo;
// If after the last call to us, we were left sitting on a hash chain
// element, just advance to the next one (which may be NULL).
if ((lpchain = (LPCHAIN) *ppvWordInfo) != NULL)
lpchain = CH_NEXT(lpchain);
// If we're now sitting on a NULL hash chain (initial condition or we
// reached the end of a previous chain), we need to find the beginning
// of the next chain in the hash table.
while (iHashChain < lpsipb->wTabSize - 1 && lpchain == NULL)
lpchain = lpsipb->HashTab[++iHashChain];
if (iHashChain >= lpsipb->wTabSize - 1 && lpchain == NULL)
return (SetErrReturn(E_OUTOFRANGE));
*plstWord = &CH_WORD(lpchain);
*ppvWordInfo = (LPVOID)lpchain;
*plWordInfo = iHashChain;
return (S_OK);
}
/*************************************************************************
* @doc API RETRIEVAL
*
* @func HRESULT FAR PASCAL | MVStopListFindWordPtr |
* Find a word in the stop list and return a pointer to it.
*
* @parm LPSIPB | lpsipb |
* Pointer to stop-word information structure
*
* @parm LST | lstWord |
* Pointer to a 2-byte length preceded Pascal
* string containing the word to find.
*
* @parm LST* | plstWordInList |
* On exit, indirect pointer to 2-byte length preceded Pascal
* string for the word that was found.
*
* @rdesc S_OK if succeeded
* @rdesc E_NOTFOUND if the word isn't in the stop list
*************************************************************************/
PUBLIC HRESULT EXPORT_API FAR PASCAL MVStopListFindWordPtr(_LPSIPB lpsipb,
LST lstWord, LST *plstWordInList)
{
HRESULT hr = S_OK;
LPCHAIN lpchain;
if ((lpchain = MVStopListFind(lpsipb, lstWord)) != NULL)
*(LST UNALIGNED * UNALIGNED)plstWordInList = &CH_WORD(lpchain);
else
hr = E_NOTFOUND;
return (hr);
}