2390 lines
77 KiB
C
2390 lines
77 KiB
C
|
/*************************************************************************
|
||
|
* *
|
||
|
* SEARCH.C *
|
||
|
* *
|
||
|
* Copyright (C) Microsoft Corporation 1990-1994 *
|
||
|
* All Rights reserved. *
|
||
|
* *
|
||
|
**************************************************************************
|
||
|
* *
|
||
|
* Module Intent *
|
||
|
* Search Core Engine
|
||
|
* *
|
||
|
**************************************************************************
|
||
|
* *
|
||
|
* Current Owner: BinhN *
|
||
|
* *
|
||
|
**************************************************************************/
|
||
|
#include <verstamp.h>
|
||
|
SETVERSIONSTAMP(MVSR);
|
||
|
|
||
|
#include <mvopsys.h>
|
||
|
#include <mem.h>
|
||
|
#include <memory.h>
|
||
|
#ifdef DOS_ONLY
|
||
|
#include <stdio.h>
|
||
|
#include <assert.h>
|
||
|
#endif
|
||
|
#include <mvsearch.h>
|
||
|
#include <groups.h>
|
||
|
#include "common.h"
|
||
|
#include "search.h"
|
||
|
|
||
|
#ifdef _DEBUG
|
||
|
static BYTE NEAR s_aszModule[] = __FILE__; /* Used by error return functions.*/
|
||
|
#endif
|
||
|
|
||
|
#if 0
|
||
|
#define KEEP_SEARCHING ((int)-1)
|
||
|
#define STRING_MATCH 0
|
||
|
#define NOT_FOUND 1
|
||
|
#endif
|
||
|
|
||
|
#define KEEP_OCC TRUE
|
||
|
#define RESET_OCC_FLAG TRUE
|
||
|
|
||
|
typedef struct
|
||
|
{
|
||
|
unsigned char b1;
|
||
|
unsigned char b2;
|
||
|
} TWOBYTE;
|
||
|
|
||
|
#ifdef _BIG_E
|
||
|
#define BYTE1(p) (((TWOBYTE FAR *)&p)->b1)
|
||
|
#define BYTE2(p) (((TWOBYTE FAR *)&p)->b2)
|
||
|
#else
|
||
|
#define BYTE1(p) (((TWOBYTE FAR *)&p)->b2)
|
||
|
#define BYTE2(p) (((TWOBYTE FAR *)&p)->b1)
|
||
|
#endif
|
||
|
|
||
|
typedef HRESULT (PASCAL FAR *FDECODE) (PNODEINFO, CKEY, LPDW);
|
||
|
|
||
|
/*************************************************************************
|
||
|
* EXTERNAL VARIABLES
|
||
|
* All those variables must be read only
|
||
|
*************************************************************************/
|
||
|
|
||
|
extern OPSYM OperatorArray[];
|
||
|
extern FNHANDLER HandlerFuncTable[];
|
||
|
extern FDECODE DecodeTable[];
|
||
|
|
||
|
/*************************************************************************
|
||
|
*
|
||
|
* API FUNCTIONS
|
||
|
* Those functions should be exported in a .DEF file
|
||
|
*************************************************************************/
|
||
|
PUBLIC LPIDX EXPORT_API FAR PASCAL MVIndexOpen (HFPB, LSZ, PHRESULT);
|
||
|
PUBLIC void EXPORT_API FAR PASCAL MVIndexClose (LPIDX);
|
||
|
PUBLIC LPHL EXPORT_API FAR PASCAL MVIndexSearch (LPIDX, LPQT,
|
||
|
PSRCHINFO, LPGROUP, PHRESULT);
|
||
|
/*************************************************************************
|
||
|
*
|
||
|
* INTERNAL GLOBAL FUNCTIONS
|
||
|
* All of them should be declared far, unless they are known to be called
|
||
|
* in the same segment
|
||
|
*************************************************************************/
|
||
|
VOID PASCAL FAR CleanMarkedOccList (LPITOPIC);
|
||
|
VOID PASCAL FAR TopicWeightCalc(LPITOPIC);
|
||
|
BOOL NEAR PASCAL FGroupLookup(LPGROUP, DWORD);
|
||
|
LPB PASCAL FAR NextChar (LPB pStr, BYTE prgbLeadByteTable[]);
|
||
|
__inline BOOL PASCAL FAR CompareChar (LPB pStr1, LPB pStr2, BYTE prgbLeadByteTable[]);
|
||
|
|
||
|
/*************************************************************************
|
||
|
*
|
||
|
* INTERNAL PRIVATE FUNCTIONS
|
||
|
* All of them should be declared near
|
||
|
*************************************************************************/
|
||
|
|
||
|
#ifndef SIMILARITY
|
||
|
PUBLIC int PASCAL FAR CompareTerm(_LPQTNODE lpQtNode, LST lstTermWord,
|
||
|
LST lstBtreeWord, DWORD dwBtreeFieldId, BYTE prgbLeadByteTable[]);
|
||
|
#else
|
||
|
PRIVATE int PASCAL NEAR CompareTerm(_LPQTNODE lpQtNode, LST lstTermWord,
|
||
|
LST lstBtreeWord, DWORD dwBtreeFieldId, BYTE prgbLeadByteTable[]);
|
||
|
#endif
|
||
|
|
||
|
|
||
|
#ifndef SIMILARITY
|
||
|
PUBLIC HRESULT PASCAL FAR SkipOccList(_LPQT lpqt, PNODEINFO pNodeInfo, DWORD dwOccs);
|
||
|
#else
|
||
|
PRIVATE HRESULT PASCAL NEAR SkipOccList(_LPQT lpqt, PNODEINFO pNodeInfo, DWORD dwOccs);
|
||
|
#endif
|
||
|
|
||
|
PRIVATE HRESULT PASCAL NEAR FCaptureOccList(_LPIDX, LPRETV, PNODEINFO, DWORD, int,
|
||
|
_LPQTNODE, int);
|
||
|
PRIVATE HRESULT PASCAL NEAR LoadNode (_LPQT, int, _LPQTNODE, _LPQTNODE,
|
||
|
LPRETV, int, int);
|
||
|
PRIVATE int PASCAL NEAR WildCardCompare (LPB, LPB, BYTE []);
|
||
|
PRIVATE HRESULT PASCAL NEAR GetWordDataLocation (_LPQT, LPRETV,
|
||
|
_LPQTNODE);
|
||
|
PRIVATE HRESULT PASCAL NEAR GetWordData (_LPQT, LPRETV,
|
||
|
int, _LPQTNODE, _LPQTNODE, int, int);
|
||
|
|
||
|
|
||
|
#define FGetDword(a,b,c) (*DecodeTable[b.cschScheme])(a, b, c)
|
||
|
|
||
|
/*************************************************************************
|
||
|
* @doc EXTERNAL API RETRIEVAL
|
||
|
*
|
||
|
* @func LPIDX FAR PASCAL | MVIndexOpen |
|
||
|
* Open an index file
|
||
|
*
|
||
|
* @parm HANDLE | hfpbSysFile |
|
||
|
* If non-zero, this is the handle of an already opened system file
|
||
|
*
|
||
|
* @parm LSZ | lszFilename |
|
||
|
* If hpfbSysFile is non-zero, this is the index subfile filename.
|
||
|
* If it is 0, it is the filename of a regular DOS file
|
||
|
*
|
||
|
* @parm PHRESULT | phr |
|
||
|
* Pointer to error buffer. This error buffer will be used for all
|
||
|
* subsequential index retrieval related calls
|
||
|
*
|
||
|
* @rdesc If succeeded, the function will return a pointer to index structure.
|
||
|
* If failed, it will return NULL, and the error buffer will contain the
|
||
|
* description of the error
|
||
|
*************************************************************************/
|
||
|
|
||
|
PUBLIC LPIDX EXPORT_API FAR PASCAL MVIndexOpen (HFPB hfpbSysFile,
|
||
|
LSZ lszFilename, PHRESULT phr)
|
||
|
{
|
||
|
_LPIDX lpidx; // Index information.
|
||
|
HIDX hidx; // Handle to "lpidx".
|
||
|
HRESULT fRet;
|
||
|
HANDLE handle;
|
||
|
LANGID langidFull;
|
||
|
LANGID langidPrimary;
|
||
|
|
||
|
/* Allocate an IDX structure */
|
||
|
if ((hidx = _GLOBALALLOC(GMEM_MOVEABLE | GMEM_ZEROINIT,
|
||
|
sizeof(IDX))) == NULL)
|
||
|
{
|
||
|
SetErrCode(phr, E_OUTOFMEMORY);
|
||
|
return NULL;
|
||
|
}
|
||
|
lpidx = (_LPIDX)_GLOBALLOCK(hidx);
|
||
|
lpidx->hStruct = hidx;
|
||
|
|
||
|
#if 0
|
||
|
lpidx->lpfnfInterCb = lpfnfInterCb;
|
||
|
lpidx->lpvCbParams = lpvCbParams;
|
||
|
#endif
|
||
|
|
||
|
lpidx->lperrb = phr;
|
||
|
|
||
|
/* Regular DOS file */
|
||
|
if ((lpidx->hfpbIdxSubFile = (HFPB)FileOpen (hfpbSysFile, lszFilename,
|
||
|
hfpbSysFile ? FS_SUBFILE : REGULAR_FILE, READ, phr)) == 0)
|
||
|
{
|
||
|
exit0:
|
||
|
FreeHandle(hidx);
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
|
||
|
if ((fRet = ReadIndexHeader(lpidx->hfpbIdxSubFile, &lpidx->ih)) != S_OK)
|
||
|
{
|
||
|
exit01:
|
||
|
SetErrCode (phr, fRet);
|
||
|
IndexCloseFile(lpidx);
|
||
|
goto exit0;
|
||
|
}
|
||
|
if (lpidx->ih.version != VERCURRENT || lpidx->ih.FileStamp != INDEX_STAMP)
|
||
|
{
|
||
|
fRet = E_BADVERSION;
|
||
|
goto exit01;
|
||
|
}
|
||
|
|
||
|
/* Set the slack size */
|
||
|
lpidx->wSlackSize = LEAF_SLACK;
|
||
|
|
||
|
langidPrimary = PRIMARYLANGID(langidFull = LANGIDFROMLCID(lpidx->ih.lcid));
|
||
|
|
||
|
/* Build the Lead-Byte Table */
|
||
|
if (langidPrimary == LANG_JAPANESE
|
||
|
|| langidPrimary == LANG_CHINESE
|
||
|
|| langidPrimary == LANG_KOREAN)
|
||
|
{
|
||
|
if (NULL == (handle = _GLOBALALLOC
|
||
|
(GMEM_MOVEABLE | GMEM_ZEROINIT, 256)))
|
||
|
{
|
||
|
SetErrCode (phr, E_OUTOFMEMORY);
|
||
|
fRet = E_OUTOFMEMORY;
|
||
|
goto exit01;
|
||
|
}
|
||
|
lpidx->pLeadByteTable = (LPBYTE)_GLOBALLOCK (handle);
|
||
|
lpidx->hLeadByteTable = handle;
|
||
|
switch (langidPrimary)
|
||
|
{
|
||
|
case LANG_JAPANESE:
|
||
|
MEMSET (lpidx->pLeadByteTable + 0x81, '\1', 0x1F);
|
||
|
MEMSET (lpidx->pLeadByteTable + 0xE0, '\1', 0x1D);
|
||
|
break;
|
||
|
|
||
|
case LANG_CHINESE:
|
||
|
switch (SUBLANGID(langidFull))
|
||
|
{
|
||
|
case SUBLANG_CHINESE_TRADITIONAL:
|
||
|
MEMSET (lpidx->pLeadByteTable + 0x81, '\1', 0x7E);
|
||
|
break;
|
||
|
|
||
|
case SUBLANG_CHINESE_SIMPLIFIED:
|
||
|
default:
|
||
|
// Simplified Chinese and Korean have the same lead-bytes
|
||
|
MEMSET (lpidx->pLeadByteTable + 0xA1, '\1', 0x5E);
|
||
|
break;
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
case LANG_KOREAN:
|
||
|
// Simplified Chinese and Korean have the same lead-bytes
|
||
|
MEMSET (lpidx->pLeadByteTable + 0xA1, '\1', 0x5E);
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if ((fRet = TopNodeRead(lpidx)) != S_OK)
|
||
|
{
|
||
|
if (lpidx->pLeadByteTable)
|
||
|
{
|
||
|
_GLOBALUNLOCK (lpidx->hLeadByteTable);
|
||
|
_GLOBALFREE (lpidx->hLeadByteTable);
|
||
|
}
|
||
|
goto exit01;
|
||
|
}
|
||
|
|
||
|
/* The the callback key */
|
||
|
lpidx->dwKey = CALLBACKKEY;
|
||
|
return (LPIDX)lpidx;
|
||
|
}
|
||
|
|
||
|
/*************************************************************************
|
||
|
* @doc EXTERNAL API RETRIEVAL
|
||
|
*
|
||
|
* @func void FAR PASCAL | MVIndexClose |
|
||
|
* Close an index file, and release all allocated memory associated with
|
||
|
* the index
|
||
|
*
|
||
|
* @parm LPIDX | lpidx |
|
||
|
* Pointer to index information structure (got from IndexOpen())
|
||
|
*************************************************************************/
|
||
|
// Shuts down an index.
|
||
|
|
||
|
PUBLIC void EXPORT_API FAR PASCAL MVIndexClose(_LPIDX lpidx)
|
||
|
{
|
||
|
if (lpidx == NULL)
|
||
|
return;
|
||
|
TopNodePurge(lpidx);
|
||
|
IndexCloseFile(lpidx);
|
||
|
if (lpidx->pLeadByteTable)
|
||
|
{
|
||
|
_GLOBALUNLOCK (lpidx->hLeadByteTable);
|
||
|
_GLOBALFREE (lpidx->hLeadByteTable);
|
||
|
}
|
||
|
FreeHandle(lpidx->hStruct);
|
||
|
}
|
||
|
|
||
|
/*************************************************************************
|
||
|
* @doc EXTERNAL API RETRIEVAL
|
||
|
*
|
||
|
* @func void FAR PASCAL | MVGetIndexInfoLpidx |
|
||
|
* Fills in an INDEXINFO struct given an LPIDX. All members of the
|
||
|
* INDEXINFO struct are filled in except for dwMemSize.
|
||
|
*
|
||
|
* @parm LPIDX | lpidx |
|
||
|
* Pointer to index information structure (got from IndexOpen())
|
||
|
* @parm INDEXINFO* | lpindexinfo |
|
||
|
* Pointer to public index information structure.
|
||
|
*************************************************************************/
|
||
|
PUBLIC void EXPORT_API PASCAL FAR MVGetIndexInfoLpidx(LPIDX lpidx,
|
||
|
INDEXINFO *lpindexinfo)
|
||
|
{
|
||
|
_LPIDX _lpidx;
|
||
|
|
||
|
if (lpidx == NULL || lpindexinfo == NULL)
|
||
|
return;
|
||
|
|
||
|
_lpidx = (_LPIDX) lpidx;
|
||
|
|
||
|
lpindexinfo->dwBlockSize = _lpidx->ih.dwBlockSize;
|
||
|
lpindexinfo->Occf = _lpidx->ih.occf;
|
||
|
lpindexinfo->Idxf = _lpidx->ih.idxf;
|
||
|
lpindexinfo->dwCodePageID = _lpidx->ih.dwCodePageID;
|
||
|
lpindexinfo->lcid = _lpidx->ih.lcid;
|
||
|
lpindexinfo->dwBreakerInstID = _lpidx->ih.dwBreakerInstID;
|
||
|
}
|
||
|
|
||
|
/*************************************************************************
|
||
|
* @doc EXTERNAL API RETRIEVAL
|
||
|
*
|
||
|
* @func void FAR PASCAL | MVStopSearch |
|
||
|
* This function will stop the search process. Typically it can be
|
||
|
* only used in a multithreaded environment, where another thread
|
||
|
* will use the query structure, which is currently accessed by the
|
||
|
* the current search, to tell the search process to stop.
|
||
|
*
|
||
|
* @parm LPQT | lpqt |
|
||
|
* Pointer to the query structure used by MVIndexSearch()
|
||
|
*************************************************************************/
|
||
|
PUBLIC VOID EXPORT_API FAR PASCAL MVStopSearch (_LPQT lpqt)
|
||
|
{
|
||
|
lpqt->fInterrupt = (BYTE)E_INTERRUPT;
|
||
|
}
|
||
|
|
||
|
/*************************************************************************
|
||
|
* @doc EXTERNAL API RETRIEVAL
|
||
|
*
|
||
|
* @func void FAR PASCAL | MVSearchSetCallback |
|
||
|
* Set appropriate user's call back function to be called during the search.
|
||
|
* The user's function will be polled at interval. It should return
|
||
|
* S_OK if there is nothing to process, E_INTERRUPT to abort the
|
||
|
* search and dispose the search result, or ERR_TOOMANYDOCS to abort the
|
||
|
* search, but keep the partial result
|
||
|
* @parm LPQT | lpqt |
|
||
|
* Pointer to query structure returned by MVQueryParse().
|
||
|
* @parm PFCALLBACK_MSG | pfCallBackMsg |
|
||
|
* Pointer to call back structure
|
||
|
* @rdesc Return S_OK if successful, or E_INVALIDARG if any parameter
|
||
|
* is NULL
|
||
|
*************************************************************************/
|
||
|
PUBLIC HRESULT EXPORT_API FAR PASCAL MVSearchSetCallback (_LPQT lpqt,
|
||
|
PFCALLBACK_MSG pfCallBackMsg)
|
||
|
{
|
||
|
if (lpqt == NULL || pfCallBackMsg == NULL)
|
||
|
return(E_INVALIDARG);
|
||
|
lpqt->cStruct.Callback = *pfCallBackMsg;
|
||
|
return(S_OK);
|
||
|
}
|
||
|
|
||
|
/*************************************************************************
|
||
|
* @doc EXTERNAL API RETRIEVAL
|
||
|
*
|
||
|
* @func LPHL FAR PASCAL | MVIndexSearch |
|
||
|
* Carry the search
|
||
|
*
|
||
|
* @parm LPIDX | lpidx |
|
||
|
* Pointer to index information.
|
||
|
*
|
||
|
* @parm LPQT | lpqt |
|
||
|
* Pointer to query tree (returned by MVQueryParse())
|
||
|
*
|
||
|
* @parm PSRCHINFO | pSrchInfo |
|
||
|
* Pointer to search information data
|
||
|
*
|
||
|
* @parm _LPGROUP | lpResGroup |
|
||
|
* Pointer to resulting group
|
||
|
*
|
||
|
* @parm PHRESULT | phr |
|
||
|
* Pointer to error buffer
|
||
|
*
|
||
|
* @rdesc Pointer to hitlist structure if succeeded, even there is
|
||
|
* no hits (use MVHitListEntries() to find out how many hits have been
|
||
|
* returned). It will return NULL if failed. The error buffer
|
||
|
* (see IndexOpen()) will contain descriptions about the cause of
|
||
|
* the failure. There is one special case when the function returns
|
||
|
* a non-null pointer, even there is error, that is when it can't
|
||
|
* write the result to the disk, and everything is still in memory.
|
||
|
*
|
||
|
*************************************************************************/
|
||
|
|
||
|
PUBLIC LPHL EXPORT_API FAR PASCAL MVIndexSearch (_LPIDX lpidx,
|
||
|
_LPQT lpqt, PSRCHINFO pSrchInfo, _LPGROUP lpResGroup, PHRESULT phr)
|
||
|
{
|
||
|
HRESULT fRet; // Return from this function.
|
||
|
LPRETV lpRetV; // Retrieval memory/files.
|
||
|
GHANDLE hRetv;
|
||
|
OCCF occf; // Index occurence flags temporary variable.
|
||
|
_LPHL lphl; // Pointer to hitlist
|
||
|
_LPQTNODE lpTreeTop;
|
||
|
|
||
|
if (lpidx == NULL || lpqt == NULL || pSrchInfo == NULL)
|
||
|
{
|
||
|
/* We get some bad arguments!! */
|
||
|
SetErrCode (phr, E_INVALIDARG);
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
fRet = E_FAIL; // Assume thing will go wrong
|
||
|
|
||
|
// Transfer all the information about the index to the query tree
|
||
|
lpqt->foIdxRoot = lpidx->ih.foIdxRoot; /* Top node offset */
|
||
|
lpqt->dwBlockSize = lpidx->ih.dwBlockSize; /* Index block size */
|
||
|
lpqt->cIdxLevels = lpidx->ih.cIdxLevels; /* Index's depth */
|
||
|
lpqt->occf = lpidx->ih.occf;
|
||
|
lpqt->idxf = lpidx->ih.idxf;
|
||
|
lpqt->foIdxRoot = lpidx->ih.foIdxRoot;
|
||
|
lpqt->ckeyTopicId = lpidx->ih.ckeyTopicId;
|
||
|
lpqt->ckeyOccCount = lpidx->ih.ckeyOccCount;
|
||
|
lpqt->ckeyWordCount = lpidx->ih.ckeyWordCount;
|
||
|
lpqt->ckeyOffset = lpidx->ih.ckeyOffset;
|
||
|
if (lpqt->cQuery == 1)
|
||
|
lpqt->fFlag |= ALL_ANDORNOT;
|
||
|
|
||
|
#if 1
|
||
|
if (pSrchInfo->dwMemAllowed)
|
||
|
{
|
||
|
if (DO_FAST_MERGE(pSrchInfo, lpqt))
|
||
|
{
|
||
|
SetBlockCount (lpqt->lpTopicMemBlock, (WORD)(pSrchInfo->dwMemAllowed /
|
||
|
(sizeof(TOPIC_LIST) * cTOPIC_PER_BLOCK)));
|
||
|
|
||
|
SetBlockCount (lpqt->lpOccMemBlock, 1);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
SetBlockCount (lpqt->lpTopicMemBlock, (WORD)(pSrchInfo->dwMemAllowed * 2 /
|
||
|
(5 * sizeof(TOPIC_LIST) * cTOPIC_PER_BLOCK)));
|
||
|
|
||
|
SetBlockCount (lpqt->lpOccMemBlock, (WORD)(pSrchInfo->dwMemAllowed * 3 /
|
||
|
(5 * sizeof(OCCURENCE) * cOCC_PER_BLOCK)));
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
/* Allocate hitlist */
|
||
|
if ((lphl = (_LPHL)GLOBALLOCKEDSTRUCTMEMALLOC(sizeof (HL))) == NULL)
|
||
|
{
|
||
|
SetErrCode(phr, E_OUTOFMEMORY);
|
||
|
return NULL;
|
||
|
}
|
||
|
lphl->lLastTopicId = 0xffffffff;
|
||
|
lphl->lcMaxTopic = lpidx->ih.lcTopics;
|
||
|
|
||
|
/* Allocate a return value structure */
|
||
|
|
||
|
if ((hRetv = _GLOBALALLOC(GMEM_MOVEABLE | GMEM_ZEROINIT,
|
||
|
sizeof(RETV))) == NULL)
|
||
|
{
|
||
|
SetErrCode(phr, E_OUTOFMEMORY);
|
||
|
exit0:
|
||
|
if (fRet != S_OK && fRet != E_TOOMANYTOPICS)
|
||
|
{
|
||
|
MVHitListDispose(lphl);
|
||
|
lphl = NULL;
|
||
|
}
|
||
|
return (LPHL)lphl;
|
||
|
}
|
||
|
|
||
|
lpRetV = (LPRETV)_GLOBALLOCK(hRetv);
|
||
|
lpRetV->lpqt = lpqt;
|
||
|
|
||
|
if ((fRet = TopNodeRead(lpidx)) != S_OK)
|
||
|
{
|
||
|
SetErrCode (phr, fRet);
|
||
|
exit02:
|
||
|
FreeHandle(hRetv);
|
||
|
goto exit0;
|
||
|
}
|
||
|
|
||
|
//
|
||
|
// Count the number of occurence fields present. My retrieval
|
||
|
// occurence record is going to cost 4 bytes per field.
|
||
|
//
|
||
|
|
||
|
occf = lpqt->occf;
|
||
|
for (lpRetV->cOccFields = 0; occf; lpRetV->cOccFields++)
|
||
|
occf &= occf - 1;
|
||
|
|
||
|
lpqt->dwOccSize = lpRetV->dwOccSize =
|
||
|
sizeof(OCCURENCE) + lpRetV->cOccFields * sizeof (DWORD);
|
||
|
|
||
|
lpRetV->fRank = ((pSrchInfo->Flag &
|
||
|
(QUERYRESULT_RANK | QUERYRESULT_NORMALIZE)) != 0);
|
||
|
|
||
|
// Set pointer to various buffer
|
||
|
lpRetV->LeafInfo.pTopNode = lpidx->lrgbTopNode;
|
||
|
lpRetV->LeafInfo.pStemNode = lpRetV->pNodeBuf;
|
||
|
lpRetV->LeafInfo.pLeafNode = lpRetV->pNodeBuf;
|
||
|
lpRetV->LeafInfo.pDataNode = lpRetV->pDataBuf;
|
||
|
lpRetV->LeafInfo.hfpbIdx = lpidx->hfpbIdxSubFile; // Index file to read from
|
||
|
|
||
|
lpRetV->DataInfo.pTopNode = lpidx->lrgbTopNode;
|
||
|
lpRetV->DataInfo.pStemNode = lpRetV->pNodeBuf;
|
||
|
lpRetV->DataInfo.pLeafNode = lpRetV->pNodeBuf;
|
||
|
lpRetV->DataInfo.pDataNode = lpRetV->pDataBuf;
|
||
|
lpRetV->DataInfo.hfpbIdx = lpidx->hfpbIdxSubFile; // Index file to read from
|
||
|
lpRetV->lcid = lpidx->ih.lcid;
|
||
|
lpRetV->pLeadByteTable = lpidx->pLeadByteTable;
|
||
|
|
||
|
// Save search information
|
||
|
lpRetV->SrchInfo = *pSrchInfo;
|
||
|
if (pSrchInfo->dwValue == 0)
|
||
|
lpRetV->SrchInfo.dwValue = (DWORD)(-1);
|
||
|
else
|
||
|
lpRetV->SrchInfo.dwValue = lpidx->ih.lcTopics/pSrchInfo->dwValue;
|
||
|
|
||
|
if ( (fRet = ResolveTree(lpqt, lpTreeTop = lpqt->lpTopNode,
|
||
|
lpRetV, E_FAIL)) != S_OK)
|
||
|
{
|
||
|
SetErrCode (phr, fRet);
|
||
|
|
||
|
/* Free the Topic and Occurrence memory blocks since they are
|
||
|
* not freed by QueryTreeFree(), or MVHitListDispose() at this
|
||
|
* point
|
||
|
*/
|
||
|
|
||
|
if (fRet != E_TOOMANYTOPICS)
|
||
|
{
|
||
|
BlockFree ((LPV)lpqt->lpTopicMemBlock);
|
||
|
BlockFree ((LPV)lpqt->lpOccMemBlock);
|
||
|
lpqt->lpTopicMemBlock = NULL;
|
||
|
lpqt->lpOccMemBlock = NULL;
|
||
|
goto exit02;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (lpqt->fFlag & HAS_NEAR_RESULT)
|
||
|
{
|
||
|
NearHandlerCleanUp (lpqt, lpTreeTop);
|
||
|
}
|
||
|
|
||
|
/* Create a group if requested */
|
||
|
if ((pSrchInfo->Flag & QUERYRESULT_GROUPCREATE) && lpResGroup)
|
||
|
{
|
||
|
LPITOPIC lpCurTopic; /* Topic's current pointer */
|
||
|
LPB lpbGrpBitVect;
|
||
|
DWORD maxTopicId;
|
||
|
/* Initialize the pointer */
|
||
|
lpbGrpBitVect = lpResGroup->lpbGrpBitVect;
|
||
|
|
||
|
maxTopicId = lpResGroup->dwSize * 8;
|
||
|
for (lpCurTopic = QTN_TOPICLIST(lpTreeTop); lpCurTopic;
|
||
|
lpCurTopic = lpCurTopic->pNext)
|
||
|
{
|
||
|
/* Set the bit */
|
||
|
if (lpCurTopic->dwTopicId < maxTopicId)
|
||
|
{
|
||
|
lpbGrpBitVect[(DWORD)(lpCurTopic->dwTopicId / 8)] |= 1 <<
|
||
|
(lpCurTopic->dwTopicId % 8);
|
||
|
}
|
||
|
}
|
||
|
lpResGroup->lcItem = lpTreeTop->cTopic; // erinfox: this wasn't getting set!
|
||
|
}
|
||
|
|
||
|
if ((pSrchInfo->Flag & QUERYRESULT_UIDSORT) == 0)
|
||
|
{
|
||
|
|
||
|
// if we are skipping occurrence info, topic weights
|
||
|
// will have already been calculated directly
|
||
|
if (lpRetV->fRank && !DO_FAST_MERGE(pSrchInfo, lpqt))
|
||
|
TopicWeightCalc(QTN_TOPICLIST(lpTreeTop));
|
||
|
|
||
|
if (lpqt->fFlag & (HAS_NEAR_RESULT | ORDERED_BASED))
|
||
|
{
|
||
|
SortResult (lpqt, lpTreeTop, ORDERED_BASED);
|
||
|
lpqt->fFlag &= ~(HAS_NEAR_RESULT | TO_BE_SORTED);
|
||
|
}
|
||
|
|
||
|
/* Sort the result depending on ranking or not */
|
||
|
if (lpRetV->fRank)
|
||
|
SortResult ((LPQT)lpqt, lpTreeTop, WEIGHT_BASED);
|
||
|
else
|
||
|
SortResult ((LPQT)lpqt, lpTreeTop, HIT_COUNT_BASED);
|
||
|
}
|
||
|
|
||
|
/* Update HitList info structure, cut off the unwanted list */
|
||
|
if (lphl->lpTopicList = lpTreeTop->lpTopicList)
|
||
|
lphl->lcReturnedTopics = lphl->lcTotalNumOfTopics = lpTreeTop->cTopic;
|
||
|
|
||
|
// Only return the number of topics that the user requested
|
||
|
// if dwTopicCount == 0, it means that the user wants to return all
|
||
|
|
||
|
if (pSrchInfo->dwTopicCount != 0 &&
|
||
|
pSrchInfo->dwTopicCount < lphl->lcReturnedTopics)
|
||
|
lphl->lcReturnedTopics = pSrchInfo->dwTopicCount;
|
||
|
|
||
|
lphl->lpOccMemBlock = lpqt->lpOccMemBlock;
|
||
|
lphl->lpTopicMemBlock = lpqt->lpTopicMemBlock;
|
||
|
|
||
|
#if 1
|
||
|
/* WARNING: The following code should be commented out for
|
||
|
* diskless devices. No returned error is checked, since
|
||
|
* if disk writes fail, everything is still in memory
|
||
|
*/
|
||
|
|
||
|
if ((pSrchInfo->Flag & QUERYRESULT_IN_MEM) == 0)
|
||
|
{
|
||
|
if ((fRet = MVHitListFlush (lphl, lphl->lcReturnedTopics)) != S_OK)
|
||
|
SetErrCode (phr, fRet);
|
||
|
}
|
||
|
#endif
|
||
|
goto exit02;
|
||
|
}
|
||
|
|
||
|
/*************************************************************************
|
||
|
* @doc INTERNAL
|
||
|
*
|
||
|
* @func HRESULT PASCAL NEAR | ResolveTree |
|
||
|
* This function will read in the data from the index file for
|
||
|
* each word, and combine them according to the operators.
|
||
|
*
|
||
|
* @func _LPQT | lpqt |
|
||
|
* Index information
|
||
|
*
|
||
|
* @parm _LPQTNODE | lpQtNode |
|
||
|
* Query tree top node to be resolved
|
||
|
*
|
||
|
* @parm LPRETV | lpRetV |
|
||
|
* Returned values
|
||
|
*
|
||
|
* @parm int | fDivide |
|
||
|
* Divide the weight between occurences
|
||
|
*
|
||
|
* @rdesc S_OK, or other errors
|
||
|
*************************************************************************/
|
||
|
PUBLIC HRESULT PASCAL NEAR ResolveTree(_LPQT lpqt, _LPQTNODE lpQtNode,
|
||
|
LPRETV lpRetV, int fDivide)
|
||
|
{
|
||
|
_LPQTNODE lpLeft; /* Left node */
|
||
|
_LPQTNODE lpRight; /* Right node */
|
||
|
WORD OpVal; /* Operator value */
|
||
|
WORD NodeType; /* type of node */
|
||
|
HRESULT fRet = S_OK; /* Return value */
|
||
|
HRESULT fOutOfMemory = S_OK;
|
||
|
_LPQT lpQueryTree = lpRetV->lpqt;
|
||
|
_LPQTNODE FAR *rgStack;
|
||
|
HANDLE hStack;
|
||
|
int StackTop = -1;
|
||
|
|
||
|
/* Allocate a stack large enough to handle the tree's "recursion" */
|
||
|
if ((hStack = _GLOBALALLOC(DLLGMEM_ZEROINIT, (LCB)lpQueryTree->TreeDepth *
|
||
|
sizeof(_LPQTNODE))) == NULL)
|
||
|
return E_OUTOFMEMORY;
|
||
|
|
||
|
rgStack = (_LPQTNODE FAR *)_GLOBALLOCK(hStack);
|
||
|
|
||
|
/* Traverse the tree */
|
||
|
for (; lpQtNode;)
|
||
|
{
|
||
|
if (QTN_FLAG(lpQtNode) & PROCESSED)
|
||
|
{
|
||
|
/* This node has already been processed, just move up one
|
||
|
* level, and continue the process
|
||
|
*/
|
||
|
goto PopStack;
|
||
|
}
|
||
|
|
||
|
/* Handle TERM_NODE */
|
||
|
|
||
|
if ((NodeType = QTN_NODETYPE(lpQtNode)) == TERM_NODE)
|
||
|
{
|
||
|
lpQueryTree->lpTopicStartSearch = NULL;
|
||
|
lpQueryTree->lpOccStartSearch = NULL;
|
||
|
if ((fRet = LoadNode (lpqt, OR_OP, NULL, lpQtNode,
|
||
|
lpRetV, fDivide, fOutOfMemory)) != S_OK)
|
||
|
{
|
||
|
if (fRet != E_TOOMANYTOPICS)
|
||
|
goto Exit;
|
||
|
|
||
|
fOutOfMemory = E_TOOMANYTOPICS;
|
||
|
// kevynct: delay abort until processing of operator node
|
||
|
// goto TooManyHits;
|
||
|
}
|
||
|
if (QTN_TOPICLIST(lpQtNode))
|
||
|
QTN_NODETYPE(lpQtNode) = EXPRESSION_NODE;
|
||
|
else
|
||
|
QTN_NODETYPE(lpQtNode) = NULL_NODE;
|
||
|
|
||
|
/* Mark that the node has been processed */
|
||
|
QTN_FLAG(lpQtNode) |= PROCESSED;
|
||
|
goto PopStack;
|
||
|
}
|
||
|
|
||
|
OpVal = lpQtNode->OpVal;
|
||
|
if (NodeType == OPERATOR_NODE)
|
||
|
{
|
||
|
|
||
|
if ((QTN_FLAG(lpLeft = QTN_LEFT(lpQtNode)) & PROCESSED) == 0)
|
||
|
{
|
||
|
/* Resolve left tree if we have not resolve it yet
|
||
|
* Push the current node onto the stack, and process the
|
||
|
* left node
|
||
|
*/
|
||
|
rgStack[++StackTop] = lpQtNode;
|
||
|
lpQtNode = lpLeft;
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
/* Assertion for correctness */
|
||
|
RET_ASSERT (QTN_NODETYPE(lpLeft) == EXPRESSION_NODE ||
|
||
|
QTN_NODETYPE(lpLeft) == NULL_NODE);
|
||
|
|
||
|
/* Binary operator. */
|
||
|
|
||
|
/* Special cases */
|
||
|
if (QTN_NODETYPE(lpLeft) == NULL_NODE)
|
||
|
{
|
||
|
switch (OpVal)
|
||
|
{
|
||
|
case AND_OP: // NULL & a = NULL
|
||
|
case NEAR_OP: // NULL NEAR a = NULL
|
||
|
case PHRASE_OP: // NULL PHRASE a = NULL ??
|
||
|
case NOT_OP: // NULL not a = NULL
|
||
|
/*
|
||
|
* Change the sub-tree to a node and forget about
|
||
|
* the right sub-tree that is not processed yet
|
||
|
*/
|
||
|
*lpQtNode = *lpLeft;
|
||
|
QTN_RIGHT(lpQtNode) = QTN_LEFT(lpQtNode) = NULL;
|
||
|
goto PopStack;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// kevynct: Handle partial hit list:
|
||
|
//
|
||
|
// In case we run out of memory for the left tree, we can sometimes still
|
||
|
// partially handle the right tree. For example, we keep going if AND-like op with
|
||
|
// right term node since this will likely at least increase chance of a smaller, more
|
||
|
// meaningful result. For OR-like operators, we ignore right sub-tree altogether if
|
||
|
// we haven't already traversed it.
|
||
|
//
|
||
|
// In any case, if there was a partial hitlist this function will still return
|
||
|
// with E_TOOMANYTOPICS.
|
||
|
|
||
|
if (fOutOfMemory)
|
||
|
{
|
||
|
switch (OpVal)
|
||
|
{
|
||
|
case OR_OP:
|
||
|
// if right subtree already processed, keep it, since all memory
|
||
|
// has already been allocated by this point and the handler will merely
|
||
|
// combine.
|
||
|
if (QTN_FLAG(QTN_RIGHT(lpQtNode)) & PROCESSED)
|
||
|
break;
|
||
|
/*
|
||
|
* Change the sub-tree to a node and forget about
|
||
|
* the right sub-tree that is not processed yet
|
||
|
*/
|
||
|
*lpQtNode = *lpLeft;
|
||
|
QTN_RIGHT(lpQtNode) = QTN_LEFT(lpQtNode) = NULL;
|
||
|
goto PopStack;
|
||
|
|
||
|
case AND_OP:
|
||
|
case NEAR_OP:
|
||
|
case PHRASE_OP:
|
||
|
case NOT_OP:
|
||
|
// continue processing if right node is a single term OR we've already
|
||
|
// processed it. otherwise, another left node will get loaded later and we know we are
|
||
|
// already oom.
|
||
|
|
||
|
if ((QTN_FLAG(QTN_RIGHT(lpQtNode)) & PROCESSED)
|
||
|
||
|
||
|
QTN_NODETYPE(QTN_RIGHT(lpQtNode)) == TERM_NODE)
|
||
|
break;
|
||
|
// warning: fallthru
|
||
|
default:
|
||
|
goto TooManyHits;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Make some preparations before resolving the right tree */
|
||
|
|
||
|
lpQueryTree->lpTopicStartSearch = NULL;
|
||
|
lpQueryTree->lpOccStartSearch = NULL;
|
||
|
|
||
|
/* Do some preparations for NOT operator */
|
||
|
if (OpVal == NOT_OP)
|
||
|
{
|
||
|
MarkTopicList(lpLeft);
|
||
|
}
|
||
|
|
||
|
if (OpVal != PHRASE_OP && OpVal != NEAR_OP &&
|
||
|
(lpQueryTree->fFlag & TO_BE_SORTED))
|
||
|
{
|
||
|
|
||
|
if (lpQueryTree->fFlag & HAS_NEAR_RESULT)
|
||
|
NearHandlerCleanUp (lpQueryTree, lpLeft);
|
||
|
|
||
|
/* We have to sort the left tree, which is the result of PHRASE,
|
||
|
* to remove redundancies. This step should only be done after
|
||
|
* we finishes processing ALL PHRASE terms. Same for NEAR
|
||
|
*/
|
||
|
|
||
|
lpQueryTree->fFlag &= ~TO_BE_SORTED;
|
||
|
SortResult (lpQueryTree, lpLeft, ORDERED_BASED);
|
||
|
}
|
||
|
|
||
|
/* Resolve the right tree */
|
||
|
if (QTN_NODETYPE(lpRight = QTN_RIGHT(lpQtNode)) == TERM_NODE)
|
||
|
{
|
||
|
|
||
|
/* Handle EXPRESSION_TERM */
|
||
|
|
||
|
if ((fRet = LoadNode (lpqt, OpVal, lpLeft, lpRight,
|
||
|
lpRetV, fDivide, fOutOfMemory)) != S_OK)
|
||
|
{
|
||
|
if (fRet != E_TOOMANYTOPICS)
|
||
|
goto Exit;
|
||
|
|
||
|
fOutOfMemory = E_TOOMANYTOPICS;
|
||
|
// kevynct: delay abort until processing of operator node
|
||
|
// goto TooManyHits;
|
||
|
}
|
||
|
|
||
|
switch (OpVal)
|
||
|
{
|
||
|
case NEAR_OP:
|
||
|
RemoveUnmarkedNearTopicList(lpQueryTree, lpLeft);
|
||
|
lpQueryTree->fFlag |= TO_BE_SORTED | HAS_NEAR_RESULT;
|
||
|
break;
|
||
|
|
||
|
case PHRASE_OP:
|
||
|
RemoveUnmarkedTopicList(lpQueryTree, lpLeft, !KEEP_OCC);
|
||
|
lpQueryTree->fFlag |= TO_BE_SORTED;
|
||
|
break;
|
||
|
|
||
|
case AND_OP:
|
||
|
RemoveUnmarkedTopicList(lpQueryTree, lpLeft, KEEP_OCC);
|
||
|
CleanMarkedOccList (lpLeft->lpTopicList);
|
||
|
break;
|
||
|
|
||
|
case NOT_OP:
|
||
|
RemoveUnmarkedTopicList(lpQueryTree, lpLeft, KEEP_OCC);
|
||
|
break;
|
||
|
}
|
||
|
if (QTN_TOPICLIST(lpLeft))
|
||
|
QTN_NODETYPE(lpLeft) = EXPRESSION_NODE;
|
||
|
else
|
||
|
QTN_NODETYPE(lpLeft) = NULL_NODE;
|
||
|
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
|
||
|
if ((QTN_FLAG(lpRight = QTN_RIGHT(lpQtNode)) &
|
||
|
PROCESSED) == 0)
|
||
|
{
|
||
|
|
||
|
/* Resolve right tree if we have not resolved it yet
|
||
|
* Push the current node onto the stack, and process the
|
||
|
* left node
|
||
|
*/
|
||
|
|
||
|
rgStack[++StackTop] = lpQtNode;
|
||
|
lpQtNode = lpRight;
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
/* Apply the operator */
|
||
|
if ((fRet = (*HandlerFuncTable[OpVal])(lpQueryTree,
|
||
|
lpLeft, NULL, (BYTE FAR *)lpRight,
|
||
|
EXPRESSION_EXPRESSION)) != S_OK)
|
||
|
{
|
||
|
|
||
|
/* Copy the result, and release the nodes */
|
||
|
if (fRet != E_TOOMANYTOPICS)
|
||
|
goto Exit;
|
||
|
|
||
|
// kevynct: we check for out of memory below
|
||
|
}
|
||
|
switch (OpVal)
|
||
|
{
|
||
|
case NEAR_OP:
|
||
|
lpQueryTree->fFlag |= HAS_NEAR_RESULT;
|
||
|
RemoveUnmarkedNearTopicList(lpQueryTree, lpLeft);
|
||
|
break;
|
||
|
|
||
|
case PHRASE_OP:
|
||
|
RemoveUnmarkedTopicList(lpQueryTree, lpLeft, !KEEP_OCC);
|
||
|
break;
|
||
|
|
||
|
case NOT_OP:
|
||
|
RemoveUnmarkedTopicList(lpQueryTree, lpLeft, KEEP_OCC);
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
*lpQtNode = *lpLeft; // Change the sub-tree to a node
|
||
|
QTN_FLAG(lpQtNode) |= PROCESSED;
|
||
|
#if 0
|
||
|
FreeHandle (lpLeft->hStruct);
|
||
|
FreeHandle (lpRight->hStruct);
|
||
|
#endif
|
||
|
QTN_LEFT(lpQtNode) = QTN_RIGHT(lpQtNode) = NULL;
|
||
|
|
||
|
// kevynct: only quit if this error comes from processing a real operator node
|
||
|
// since fOutOfMemory is not set in that case above, whereas it IS set
|
||
|
// when processing term node. Just a hack.
|
||
|
if (fRet == E_TOOMANYTOPICS && !fOutOfMemory)
|
||
|
goto TooManyHits;
|
||
|
}
|
||
|
PopStack:
|
||
|
if (StackTop >= 0)
|
||
|
{
|
||
|
lpQtNode = rgStack[StackTop];
|
||
|
StackTop--;
|
||
|
}
|
||
|
else
|
||
|
break;
|
||
|
|
||
|
}
|
||
|
|
||
|
// kevynct: if we got this far, the tree was completed, but we may have only
|
||
|
// been processing a partial hitlist (e.g. multiple "and") so we need
|
||
|
// to still notify of possible oom even though all cleanup has been done
|
||
|
fRet = fOutOfMemory;
|
||
|
|
||
|
Exit:
|
||
|
/* Release the stack */
|
||
|
FreeHandle(hStack);
|
||
|
return fRet;
|
||
|
|
||
|
|
||
|
TooManyHits:
|
||
|
/* If we hit that label, it means that we have too many hits
|
||
|
* lpQtNode is the left node, the right node has been
|
||
|
* processed. What we have to do now is to keep the partial
|
||
|
* result, and release all nodes
|
||
|
*/
|
||
|
|
||
|
if (StackTop >= 0)
|
||
|
{
|
||
|
/* The root node is saved on the stack */
|
||
|
lpLeft = QTN_LEFT(*rgStack);
|
||
|
lpRight = QTN_RIGHT(*rgStack);
|
||
|
QTN_LEFT(*rgStack) = QTN_RIGHT(*rgStack) = NULL;
|
||
|
*rgStack[0] = *lpQtNode;
|
||
|
}
|
||
|
FreeHandle(hStack);
|
||
|
return E_TOOMANYTOPICS;
|
||
|
}
|
||
|
|
||
|
VOID PASCAL FAR TopicWeightCalc(LPITOPIC lpCurTopic)
|
||
|
{
|
||
|
LPIOCC lpCurOcc;
|
||
|
WORD wWeight;
|
||
|
|
||
|
for (; lpCurTopic; lpCurTopic = lpCurTopic->pNext)
|
||
|
{
|
||
|
wWeight = 0;
|
||
|
for (lpCurOcc = lpCurTopic->lpOccur; lpCurOcc;
|
||
|
lpCurOcc = lpCurOcc->pNext)
|
||
|
{
|
||
|
if (wWeight > (WORD)(wWeight + lpCurOcc->wWeight))
|
||
|
{
|
||
|
wWeight = MAX_WEIGHT;
|
||
|
break;
|
||
|
}
|
||
|
else
|
||
|
wWeight += lpCurOcc->wWeight;
|
||
|
}
|
||
|
lpCurTopic->wWeight = wWeight;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
#if 0
|
||
|
/*************************************************************************
|
||
|
* @doc INTERNAL
|
||
|
*
|
||
|
* @func HRESULT FAR PASCAL | GetWordDataLocation |
|
||
|
* This function will search the index for the given word. It will
|
||
|
* return back information about:
|
||
|
* - The number of topics
|
||
|
* - The location of the data
|
||
|
* - The size of the data
|
||
|
* - Pointer to the next word (for wildcard search)
|
||
|
* @parm _LPQT | lpqt |
|
||
|
* Pointer to index structure
|
||
|
* @parm LPRETV | lpRetV |
|
||
|
* Pointer to "globals"
|
||
|
* @parm _LPQTNODE | lpCurQtNode |
|
||
|
* Current node in the query tree
|
||
|
* @rdesc S_OK or other errors
|
||
|
*************************************************************************/
|
||
|
PRIVATE HRESULT NEAR PASCAL GetWordDataLocation (_LPQT lpqt,
|
||
|
LPRETV lpRetV, _LPQTNODE lpCurQtNode)
|
||
|
{
|
||
|
int cLevel;
|
||
|
int cMaxLevel;
|
||
|
int fCheckFieldId;
|
||
|
LST lstSearchStr;
|
||
|
LPB lpCurPtr;
|
||
|
int nCmp;
|
||
|
HRESULT fRet;
|
||
|
int f1stIsWild;
|
||
|
LPB lpMaxAddress;
|
||
|
PNODEINFO pLeafInfo = &lpRetV->LeafInfo;
|
||
|
DWORD dwTemp;
|
||
|
LPB astBTreeWord = lpRetV->pBTreeWord;
|
||
|
WORD wLen;
|
||
|
DWORD dwFieldID;
|
||
|
ERRB errb;
|
||
|
|
||
|
BYTE lstModified[CB_MAX_WORD_LEN + sizeof (SHORT)];
|
||
|
|
||
|
lstSearchStr = QTN_TOKEN(lpCurQtNode)->lpString;
|
||
|
f1stIsWild = (lstSearchStr[2] == WILDCARD_CHAR ||
|
||
|
lstSearchStr[2] == WILDCARD_STAR);
|
||
|
|
||
|
pLeafInfo->nodeOffset = lpqt->foIdxRoot;
|
||
|
pLeafInfo->iLeafLevel = lpqt->cIdxLevels - 1;
|
||
|
pLeafInfo->dwBlockSize = lpqt->dwBlockSize;
|
||
|
|
||
|
/* Copy and change all '*' and '?' to 0. This will
|
||
|
* ensure that things gets compared correctly with
|
||
|
* the top node's entries
|
||
|
*/
|
||
|
MEMCPY (lstModified, lstSearchStr,
|
||
|
*((LPW)lstSearchStr) + sizeof (SHORT));
|
||
|
for (nCmp = *((LPW)lstModified) + 1; nCmp > 2; nCmp--)
|
||
|
{
|
||
|
if (lstModified[nCmp] == '*' || lstModified[nCmp] == '?')
|
||
|
{
|
||
|
lstModified[nCmp] = 0;
|
||
|
lstModified[0] = nCmp - 2;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Point node-resolution variables at the right things. This
|
||
|
* sets these up to read b-tree nodes. Fields not set here are
|
||
|
* set as appropriate elsewhere.
|
||
|
*/
|
||
|
|
||
|
/* Set the flag */
|
||
|
fCheckFieldId = ((lpqt->occf & OCCF_FIELDID) &&
|
||
|
(lpCurQtNode->dwFieldId != DW_NIL_FIELD));
|
||
|
|
||
|
astBTreeWord[0] = 0;
|
||
|
cMaxLevel = lpqt->cIdxLevels - 1;
|
||
|
|
||
|
/*
|
||
|
First we have to find which tree level the word is in. The number of
|
||
|
searches is equal to the number of tree levels at most. The
|
||
|
structure of the directory node is a sequence of:
|
||
|
- Words: PASCAL strings
|
||
|
- Data offset: will tell us where is the
|
||
|
offset of the record in the index file
|
||
|
*/
|
||
|
for (cLevel = 0; cLevel < cMaxLevel ; cLevel++)
|
||
|
{
|
||
|
//
|
||
|
// Get a node.
|
||
|
//
|
||
|
if ((fRet = ReadStemNode ((PNODEINFO)pLeafInfo, cLevel)) != S_OK)
|
||
|
{
|
||
|
return SetErrCode (&errb, fRet);
|
||
|
}
|
||
|
lpMaxAddress = pLeafInfo->pMaxAddress;
|
||
|
lpCurPtr = pLeafInfo->pCurPtr;
|
||
|
|
||
|
//
|
||
|
// Loop through it. This compares the word I'm
|
||
|
// looking for against the word in the b-tree.
|
||
|
// If the word in the b-tree is >= the word I'm
|
||
|
// looking for, I'm done.
|
||
|
//
|
||
|
// If I run off the end of the node, there can be
|
||
|
// no match for this term, so I skip the entire
|
||
|
// process.
|
||
|
//
|
||
|
for (;;)
|
||
|
{
|
||
|
|
||
|
if (lpCurPtr >= lpMaxAddress)
|
||
|
return S_OK;
|
||
|
lpCurPtr = ExtractWord(astBTreeWord, lpCurPtr, &wLen);
|
||
|
|
||
|
/* Read in NodeId record */
|
||
|
lpCurPtr += ReadFileOffset (&pLeafInfo->nodeOffset, lpCurPtr);
|
||
|
|
||
|
if (f1stIsWild)
|
||
|
break;
|
||
|
|
||
|
if (StrCmpPascal2(lstModified, astBTreeWord) <= 0)
|
||
|
break;
|
||
|
|
||
|
// erinfox:
|
||
|
// if stemming is turned on, there could be a case in which the stemmed
|
||
|
// word is less than the search term, but the unstemmed word is greater.
|
||
|
// if we don't check the unstemmed, we'll skip this node erroneously.
|
||
|
if (fStemmed && StrCmpPascal2(lstModified, astBTreeWord) <= 0)
|
||
|
break;
|
||
|
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* At this point, pLeafInfo->nodeOffset is the node id of the leaf that
|
||
|
is supposed to contain the searched word. Read in the leaf node
|
||
|
*/
|
||
|
if ((fRet = ReadLeafNode ((PNODEINFO)pLeafInfo, cLevel)) != S_OK)
|
||
|
{
|
||
|
return fRet;
|
||
|
}
|
||
|
|
||
|
lpCurPtr = pLeafInfo->pCurPtr;
|
||
|
lpMaxAddress = pLeafInfo->pMaxAddress;
|
||
|
|
||
|
//
|
||
|
// Second step is to deal with the leaf node(s). I'm going to
|
||
|
// find and capture some occurence lists. I'll probably have to
|
||
|
// ignore some bogus ones first.
|
||
|
//
|
||
|
for (;;)
|
||
|
{
|
||
|
// Check for out of data
|
||
|
if (lpCurPtr >= lpMaxAddress)
|
||
|
{
|
||
|
// Get the offset of the next node
|
||
|
ReadFileOffset (&pLeafInfo->nodeOffset, pLeafInfo->pBuffer);
|
||
|
if (FoIsNil (pLeafInfo->nodeOffset))
|
||
|
return S_OK;
|
||
|
|
||
|
// Read the next node
|
||
|
if ((fRet = ReadStemNode ((PNODEINFO)pLeafInfo, cLevel))
|
||
|
!= S_OK)
|
||
|
{
|
||
|
return SetErrCode (&errb, fRet);
|
||
|
}
|
||
|
lpCurPtr =
|
||
|
pLeafInfo->pBuffer + FOFFSET_SIZE + sizeof (SHORT);
|
||
|
lpMaxAddress = pLeafInfo->pMaxAddress;
|
||
|
}
|
||
|
|
||
|
// Extract the word
|
||
|
lpCurPtr = ExtractWord(astBTreeWord, lpCurPtr, &wLen);
|
||
|
|
||
|
// Save the word length
|
||
|
lpCurQtNode->wRealLength = wLen;
|
||
|
|
||
|
if (lpqt->occf & OCCF_FIELDID)
|
||
|
lpCurPtr += CbByteUnpack (&dwFieldID, lpCurPtr);
|
||
|
|
||
|
nCmp = CompareTerm (lpCurQtNode, astBTreeWord, fCheckFieldId ?
|
||
|
dwFieldID : lpCurQtNode->dwFieldId, lpRetV->pLeadByteTable);
|
||
|
|
||
|
switch (nCmp)
|
||
|
{
|
||
|
case KEEP_SEARCHING:
|
||
|
// Skip TopicCount
|
||
|
lpCurPtr += CbByteUnpack (&dwTemp, lpCurPtr);
|
||
|
// Skip data offset
|
||
|
lpCurPtr += FOFFSET_SIZE;
|
||
|
// Skip DataSize
|
||
|
lpCurPtr += CbByteUnpack (&dwTemp, lpCurPtr);
|
||
|
break;
|
||
|
|
||
|
case STRING_MATCH:
|
||
|
lpCurPtr += CbByteUnpack (&lpCurQtNode->cTopic, lpCurPtr);
|
||
|
lpCurPtr += ReadFileOffset (&lpCurQtNode->foData, lpCurPtr);
|
||
|
lpCurPtr += CbByteUnpack (&lpCurQtNode->cbData, lpCurPtr);
|
||
|
|
||
|
// Set FieldId to give back the field id
|
||
|
lpCurQtNode->dwFieldId = dwFieldID;
|
||
|
|
||
|
// Set return pointer to beginning of next node
|
||
|
if (lpCurQtNode->iCurOff == 0)
|
||
|
lpCurQtNode->iCurOff = lpCurPtr - pLeafInfo->pBuffer;
|
||
|
|
||
|
return S_OK;
|
||
|
|
||
|
case NOT_FOUND: // No unconditional "break" above.
|
||
|
return S_OK;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
/*************************************************************************
|
||
|
* @doc INTERNAL
|
||
|
*
|
||
|
* @func HRESULT FAR PASCAL | GetWordData |
|
||
|
* This function will search the index for the given word' data.
|
||
|
* @parm _LPQT | lpqt |
|
||
|
* Pointer to index structure
|
||
|
* @parm LPRETV | lpRetV |
|
||
|
* Pointer to "globals"
|
||
|
* @parm _LPQTNODE | lpCurQtNode |
|
||
|
* Current node in the query tree containing important data
|
||
|
* - The number of topics
|
||
|
* - The location of the data
|
||
|
* - The size of the data
|
||
|
* - Pointer to the next word (for wildcard search)
|
||
|
* @rdesc S_OK or other errors
|
||
|
*************************************************************************/
|
||
|
PUBLIC HRESULT EXPORT_API FAR PASCAL GetWordData (_LPQT lpqt, LPRETV lpRetV,
|
||
|
int Operator, _LPQTNODE lpResQuery, _LPQTNODE lpQtNode, int fDivide, int fOutOfMemory)
|
||
|
{
|
||
|
LPIOCC lpOccur; // The current occurence is collected into
|
||
|
// here.
|
||
|
DWORD dwTopicIDDelta; // Topic-ID delta from previous sub-list.
|
||
|
DWORD dwOccs; // Number of occurences in this sub-list.
|
||
|
DWORD dwTmp; // Scratch variable.
|
||
|
WORD wWeight; // Term-weight associated with this sub-list.
|
||
|
DWORD dwTopicID; // TopicId
|
||
|
WORD wImportance;
|
||
|
DWORD dwCount; // Word count
|
||
|
DWORD dwOffset; // Offset of the word
|
||
|
DWORD dwLength; // Length of the word
|
||
|
TOPIC_LIST FAR *lpResTopicList; // Result TopicList
|
||
|
HRESULT fRet; // Returned value
|
||
|
PNODEINFO pDataInfo;
|
||
|
DWORD dwTopicCount;
|
||
|
_LPQT lpQueryTree; // Query tree
|
||
|
OCCF occf;
|
||
|
BYTE fSkipOccList = FALSE;
|
||
|
|
||
|
pDataInfo = &lpRetV->DataInfo;
|
||
|
if ((pDataInfo->dwDataSizeLeft = lpQtNode->cbData) == 0)
|
||
|
return(S_OK); // There is nothing to process
|
||
|
|
||
|
// Initialize variables
|
||
|
occf = lpqt->occf;
|
||
|
wImportance = QTN_TOKEN(lpQtNode)->wWeight;
|
||
|
lpResTopicList = NULL;
|
||
|
lpQueryTree = lpRetV->lpqt;
|
||
|
dwTopicCount = lpQtNode->cTopic;
|
||
|
wWeight = (WORD)(65535L/dwTopicCount);
|
||
|
|
||
|
// Reset the topic count for lpQtNode so that is will not affect the
|
||
|
// result in case that lpResQuery == NULL
|
||
|
|
||
|
lpQtNode->cTopic = 0;
|
||
|
|
||
|
if (lpResQuery == NULL)
|
||
|
lpResQuery = lpQtNode;
|
||
|
|
||
|
// Initialize the data buffer node values
|
||
|
pDataInfo->pBuffer = pDataInfo->pDataNode;
|
||
|
pDataInfo->nodeOffset = lpQtNode->foData;
|
||
|
|
||
|
// Read the data block
|
||
|
if ((fRet = ReadNewData(pDataInfo)) != S_OK)
|
||
|
return(fRet);
|
||
|
|
||
|
dwTopicID = 0L; // Init occurence record
|
||
|
dwLength = 0;
|
||
|
|
||
|
// One pass through here for each sublist in the Topiclist.
|
||
|
|
||
|
for (; dwTopicCount; dwTopicCount--)
|
||
|
{
|
||
|
|
||
|
/* Check for interrupt now and then */
|
||
|
if ((++lpqt->cInterruptCount) == 0)
|
||
|
{
|
||
|
if (lpqt->fInterrupt == E_INTERRUPT)
|
||
|
return E_INTERRUPT;
|
||
|
if (*lpqt->cStruct.Callback.MessageFunc &&
|
||
|
(fRet = (*lpqt->cStruct.Callback.MessageFunc)(
|
||
|
lpqt->cStruct.Callback.dwFlags,
|
||
|
lpqt->cStruct.Callback.pUserData, NULL)) != S_OK)
|
||
|
return(fRet);
|
||
|
}
|
||
|
|
||
|
// Byte align
|
||
|
if (pDataInfo->ibit != cbitBYTE - 1)
|
||
|
{
|
||
|
pDataInfo->ibit = cbitBYTE - 1;
|
||
|
pDataInfo->pCurPtr ++;
|
||
|
}
|
||
|
|
||
|
// Get value from which I will calculate current doc-ID.
|
||
|
if ((fRet = FGetDword(pDataInfo, lpqt->ckeyTopicId,
|
||
|
&dwTopicIDDelta)) != S_OK)
|
||
|
{
|
||
|
exit0:
|
||
|
return fRet;
|
||
|
}
|
||
|
|
||
|
dwTopicID += dwTopicIDDelta;
|
||
|
//
|
||
|
// Get term-weight if present. I'm going to get this
|
||
|
// even if I'm not doing ranking, because it's in the
|
||
|
// index, and I have to get around it somehow.
|
||
|
//
|
||
|
if (lpqt->idxf & IDXF_NORMALIZE)
|
||
|
{
|
||
|
if ((fRet = FGetBits(pDataInfo, &dwTmp, sizeof (USHORT) * cbitBYTE))
|
||
|
!= S_OK)
|
||
|
goto exit0;
|
||
|
|
||
|
if (wImportance != MAX_WEIGHT)
|
||
|
dwTmp = (dwTmp * wImportance) / 65535;
|
||
|
wWeight = (WORD)dwTmp;
|
||
|
}
|
||
|
//
|
||
|
// If this search includes a group, and the doc is not in the
|
||
|
// group then ignore it
|
||
|
fSkipOccList = (lpQueryTree->lpGroup &&
|
||
|
FGroupLookup(lpQueryTree->lpGroup, dwTopicID) == FALSE);
|
||
|
|
||
|
|
||
|
// erinfox: move test agains fSkipOccList outside
|
||
|
if (!fSkipOccList)
|
||
|
{
|
||
|
if (/*!fSkipOccList && */((lpResTopicList = TopicNodeSearch (lpQueryTree,
|
||
|
lpResQuery, dwTopicID)) == NULL))
|
||
|
{
|
||
|
/* Adding an new occurrence to a non-existing TopicList. */
|
||
|
/* Allocate the new TopicList only if it is an OR
|
||
|
operator. This record should be skipped for all other
|
||
|
operator
|
||
|
*/
|
||
|
if (Operator == OR_OP && !fOutOfMemory)
|
||
|
{
|
||
|
if ((lpResTopicList = TopicNodeAllocate(lpQueryTree)) == NULL)
|
||
|
{
|
||
|
fRet = E_TOOMANYTOPICS;
|
||
|
goto exit0;
|
||
|
}
|
||
|
lpResTopicList->dwTopicId = dwTopicID;
|
||
|
lpResTopicList->lpOccur = NULL;
|
||
|
lpResTopicList->lcOccur = 0;
|
||
|
lpResTopicList->wWeight = 0;
|
||
|
|
||
|
/* Add the new TopicID node into TopicList */
|
||
|
TopicNodeInsert (lpQueryTree, lpResQuery, lpResTopicList);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
/* There is no corresponding Topic list. Consequently, we
|
||
|
don't need to read in the right node's data for
|
||
|
the following operators: AND, PHRASE, NEAR, NOT
|
||
|
*/
|
||
|
fSkipOccList = TRUE;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if (Operator == NOT_OP)
|
||
|
{
|
||
|
/* Don't skip this Topic list since it also contains
|
||
|
* the right node's docId
|
||
|
*/
|
||
|
if (lpResTopicList)
|
||
|
lpResTopicList->fFlag &= ~TO_BE_KEPT;
|
||
|
fSkipOccList = TRUE;
|
||
|
}
|
||
|
else if (Operator == AND_OP && lpQueryTree->lpTopicStartSearch)
|
||
|
lpQueryTree->lpTopicStartSearch->fFlag |= TO_BE_KEPT;
|
||
|
}
|
||
|
}
|
||
|
lpQueryTree->lpOccStartSearch = NULL;
|
||
|
|
||
|
if ((occf & (OCCF_OFFSET | OCCF_COUNT)) == 0)
|
||
|
continue;
|
||
|
|
||
|
// Figure out how many occurences there are in this
|
||
|
// sub-list.
|
||
|
//
|
||
|
if ((fRet = FGetDword(pDataInfo, lpqt->ckeyOccCount,
|
||
|
&dwOccs)) != S_OK)
|
||
|
goto exit0;
|
||
|
|
||
|
if (fSkipOccList || fOutOfMemory)
|
||
|
{
|
||
|
skip_occ_list:
|
||
|
if ((fRet = SkipOccList (lpqt, pDataInfo, dwOccs)) != S_OK)
|
||
|
goto exit0;
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
if ((lpqt->idxf & IDXF_NORMALIZE) == FALSE && lpRetV->fRank)
|
||
|
{
|
||
|
wWeight = (WORD)(wWeight * dwOccs);
|
||
|
}
|
||
|
|
||
|
//
|
||
|
// If I'm doing ranking, divide the weight for
|
||
|
// this topic amongst all the occurences in
|
||
|
// the topic if I need to.
|
||
|
//
|
||
|
if (lpRetV->fRank && fDivide)
|
||
|
{
|
||
|
if (dwOccs > 65535L)
|
||
|
wWeight = 0;
|
||
|
else if ((WORD)dwOccs > 1)
|
||
|
wWeight /= (WORD)dwOccs;
|
||
|
}
|
||
|
|
||
|
// optimization for ISBU/IR:
|
||
|
// if no highlighting info is needed, and this is not near-type query
|
||
|
// then store the term weights in the topic list directly, and skip the occurrence
|
||
|
// list completely. If this is an AND or OR operator, then increment the existing
|
||
|
// weight since the occurrences are undergoing union. NOT operator leaves
|
||
|
// current weight unchanged.
|
||
|
|
||
|
if (DO_FAST_MERGE(&lpRetV->SrchInfo, lpqt))
|
||
|
{
|
||
|
if (lpResTopicList && (Operator == OR_OP || Operator == AND_OP) && lpRetV->fRank)
|
||
|
lpResTopicList->wWeight = (WORD) min(MAX_WEIGHT, lpResTopicList->wWeight + wWeight * dwOccs);
|
||
|
goto skip_occ_list;
|
||
|
}
|
||
|
|
||
|
//
|
||
|
// One pass through here for each occurence in
|
||
|
// this sub-list. If this index doesn't really
|
||
|
// have sub-lists it will still make one pass
|
||
|
// through here anyway, at which time it will
|
||
|
// write the doc-ID and possibly the term-weight
|
||
|
// and field-ID, then drop out.
|
||
|
//
|
||
|
dwCount = 0L;
|
||
|
dwOffset = 0L;
|
||
|
|
||
|
for (; dwOccs; dwOccs--)
|
||
|
{
|
||
|
// interrupt about every 4096
|
||
|
if ((dwOccs & 0x0FFF) == 0)
|
||
|
{
|
||
|
if (lpqt->fInterrupt == E_INTERRUPT)
|
||
|
{
|
||
|
fRet = E_INTERRUPT;
|
||
|
goto exit;
|
||
|
}
|
||
|
if (*lpqt->cStruct.Callback.MessageFunc &&
|
||
|
(fRet = (*lpqt->cStruct.Callback.MessageFunc)(
|
||
|
lpqt->cStruct.Callback.dwFlags,
|
||
|
lpqt->cStruct.Callback.pUserData, NULL)) != S_OK)
|
||
|
goto exit;
|
||
|
}
|
||
|
|
||
|
// Get word-count, if present.
|
||
|
//
|
||
|
if ((lpOccur = OccNodeAllocate(lpQueryTree)) == NULL)
|
||
|
{
|
||
|
fRet = E_TOOMANYTOPICS;
|
||
|
goto exit;
|
||
|
}
|
||
|
lpOccur->dwFieldId = lpQtNode->dwFieldId;
|
||
|
lpOccur->cLength = lpQtNode->wRealLength;
|
||
|
|
||
|
// If the caller requested term strings, put in the occurrence
|
||
|
// record a pointer to the term that currently matches the query
|
||
|
// we're gathering occurrence data for.
|
||
|
if ((lpRetV->SrchInfo.Flag & QUERY_GETTERMS) != 0)
|
||
|
lpOccur->lpvTerm = lpQtNode->lpvIndexedTerm;
|
||
|
|
||
|
if (occf & OCCF_COUNT)
|
||
|
{
|
||
|
if ((fRet = FGetDword(pDataInfo, lpqt->ckeyWordCount,
|
||
|
&dwTmp)) != S_OK)
|
||
|
{
|
||
|
exit1:
|
||
|
/* Just release the occurence node */
|
||
|
lpOccur->pNext = (LPIOCC)lpQueryTree->lpOccFreeList;
|
||
|
lpQueryTree->lpOccFreeList = (LPSLINK)lpOccur;
|
||
|
goto exit0;
|
||
|
}
|
||
|
dwCount += dwTmp;
|
||
|
lpOccur->dwCount = dwCount; // Needed for phrase and near
|
||
|
}
|
||
|
|
||
|
// Get byte-offset, if present.
|
||
|
//
|
||
|
if (occf & OCCF_OFFSET)
|
||
|
{
|
||
|
if ((fRet = FGetDword(pDataInfo, lpqt->ckeyOffset, &dwTmp))
|
||
|
!= S_OK)
|
||
|
{
|
||
|
goto exit1;
|
||
|
}
|
||
|
dwOffset += dwTmp;
|
||
|
lpOccur->dwOffset = dwOffset;
|
||
|
}
|
||
|
// Get term-weight, if present.
|
||
|
//
|
||
|
if (lpRetV->fRank)
|
||
|
{
|
||
|
if (!fDivide)
|
||
|
wWeight = 0;
|
||
|
lpOccur->wWeight = wWeight;
|
||
|
}
|
||
|
|
||
|
#ifndef CW
|
||
|
if ((fRet = (*HandlerFuncTable[Operator])(lpQueryTree,
|
||
|
lpQtNode, lpResTopicList, (BYTE FAR *)lpOccur,
|
||
|
|
||
|
EXPRESSION_TERM)) != S_OK)
|
||
|
{
|
||
|
goto exit;
|
||
|
}
|
||
|
#else
|
||
|
switch (Operator)
|
||
|
{
|
||
|
case NEAR_OP:
|
||
|
if ((fRet = NearHandler(lpQueryTree,
|
||
|
lpQtNode, lpResTopicList, (BYTE FAR *)lpOccur,
|
||
|
EXPRESSION_TERM)) != S_OK)
|
||
|
{
|
||
|
goto exit;
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
case PHRASE_OP:
|
||
|
if ((fRet = PhraseHandler(lpQueryTree,
|
||
|
lpQtNode, lpResTopicList, (BYTE FAR *)lpOccur,
|
||
|
EXPRESSION_TERM)) != S_OK)
|
||
|
{
|
||
|
goto exit;
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
case AND_OP:
|
||
|
if ((fRet = AndHandler(lpQueryTree,
|
||
|
lpQtNode, lpResTopicList, (BYTE FAR *)lpOccur,
|
||
|
EXPRESSION_TERM)) != S_OK)
|
||
|
{
|
||
|
goto exit;
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
case NOT_OP:
|
||
|
if ((fRet = NotHandler(lpQueryTree,
|
||
|
lpQtNode, lpResTopicList, (BYTE FAR *)lpOccur,
|
||
|
EXPRESSION_TERM)) != S_OK)
|
||
|
{
|
||
|
goto exit;
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
case OR_OP:
|
||
|
if ((fRet = OrHandler(lpQueryTree,
|
||
|
lpQtNode, lpResTopicList, (BYTE FAR *)lpOccur,
|
||
|
EXPRESSION_TERM)) != S_OK)
|
||
|
{
|
||
|
goto exit;
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
fRet = E_FAIL;
|
||
|
goto exit;
|
||
|
}
|
||
|
#endif
|
||
|
}
|
||
|
}
|
||
|
|
||
|
fRet = S_OK;
|
||
|
|
||
|
exit:
|
||
|
/* Check to make sure that there are occurrences associcated with the
|
||
|
* TopicList. The main reason for no occurrence is that the user hits
|
||
|
* cancel when occurrences are being read in. Cancel will cause the
|
||
|
* read to fail, and there is no occurrence associated with the Topic
|
||
|
* List, which in turn, will cause hili code to fail. So, if there is
|
||
|
* no occurrence, just remove the list
|
||
|
*/
|
||
|
|
||
|
if (lpResTopicList && lpResTopicList->lcOccur == 0
|
||
|
&&
|
||
|
!DO_FAST_MERGE(&lpRetV->SrchInfo, lpqt)
|
||
|
&&
|
||
|
(lpqt->occf & (OCCF_OFFSET | OCCF_COUNT)))
|
||
|
RemoveNode(lpQueryTree, (LPV)lpResQuery, NULL,
|
||
|
(LPSLINK)lpResTopicList, TOPICLIST_NODE);
|
||
|
goto exit0;
|
||
|
}
|
||
|
|
||
|
/*************************************************************************
|
||
|
* @doc INTERNAL
|
||
|
*
|
||
|
* @func HRESULT PASCAL NEAR | LoadNode |
|
||
|
* Load all the data related to a word from the index file,
|
||
|
* and apply the operator to them and the resulting data
|
||
|
*
|
||
|
* @parm _LPQT | lpqt |
|
||
|
* Index information
|
||
|
*
|
||
|
* @parm int | Operator |
|
||
|
* What operator we are dealing with
|
||
|
*
|
||
|
* @parm _LPQTNODE | lpResQuery |
|
||
|
* Resulting query node
|
||
|
*
|
||
|
* @parm _LPQTNODE | lpCurQtNode |
|
||
|
* Current query node
|
||
|
*
|
||
|
* @parm LPRETV | lpRetV |
|
||
|
* Returned result
|
||
|
*
|
||
|
* @parm int | fDivide |
|
||
|
* Divide the weight between occurences
|
||
|
*
|
||
|
* @rdesc S_OK if succeeded, errors otherwise
|
||
|
*************************************************************************/
|
||
|
|
||
|
PRIVATE HRESULT PASCAL NEAR LoadNode (_LPQT lpqt, int Operator,
|
||
|
_LPQTNODE lpResQuery, _LPQTNODE lpCurQtNode, LPRETV lpRetV, int fDivide, int fOutOfMemory)
|
||
|
{
|
||
|
int cLevel;
|
||
|
int cMaxLevel;
|
||
|
int fCheckFieldId;
|
||
|
LST lstSearchStr;
|
||
|
LPB lpCurPtr;
|
||
|
int nCmp;
|
||
|
HRESULT fRet;
|
||
|
int f1stIsWild;
|
||
|
LPB lpMaxAddress;
|
||
|
PNODEINFO pLeafInfo = &lpRetV->LeafInfo;
|
||
|
DWORD dwTemp;
|
||
|
LPB astBTreeWord = lpRetV->pBTreeWord;
|
||
|
WORD wLen;
|
||
|
DWORD dwFieldID;
|
||
|
DWORD dwTotalTopic;
|
||
|
LPB lstModified = lpRetV->pModifiedWord;
|
||
|
BYTE fStemmed;
|
||
|
LPB pBTreeWord;
|
||
|
ERRB errb;
|
||
|
WORD cByteMatched = 0;
|
||
|
|
||
|
fStemmed = ((lpRetV->SrchInfo.Flag & STEMMED_SEARCH) != 0) &&
|
||
|
(PRIMARYLANGID(LANGIDFROMLCID(lpRetV->lcid)) == LANG_ENGLISH);
|
||
|
|
||
|
lstSearchStr = QTN_TOKEN(lpCurQtNode)->lpString;
|
||
|
f1stIsWild = (lstSearchStr[2] == WILDCARD_CHAR ||
|
||
|
lstSearchStr[2] == WILDCARD_STAR);
|
||
|
|
||
|
// Make sure to turn of stemming if there is any wildcard characters
|
||
|
|
||
|
for (nCmp = *((LPW)lstSearchStr) + 1; nCmp >= 2; nCmp--)
|
||
|
{
|
||
|
if (lstSearchStr[nCmp] == '*' || lstSearchStr[nCmp] == '?')
|
||
|
{
|
||
|
fStemmed = FALSE;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Turned off stemming for short words
|
||
|
if (*(LPW)lstSearchStr < 3)
|
||
|
fStemmed = FALSE;
|
||
|
|
||
|
pLeafInfo->nodeOffset = lpqt->foIdxRoot;
|
||
|
pLeafInfo->iLeafLevel = lpqt->cIdxLevels - 1;
|
||
|
pLeafInfo->dwBlockSize = lpqt->dwBlockSize;
|
||
|
|
||
|
if (fStemmed)
|
||
|
{
|
||
|
if ((fRet = ExtStemWord(lpRetV->SrchInfo.lpvIndexObjBridge,
|
||
|
&lpRetV->pStemmedQueryWord[0], lstSearchStr)) != S_OK)
|
||
|
{
|
||
|
return(fRet);
|
||
|
}
|
||
|
MEMCPY (lstModified, lpRetV->pStemmedQueryWord,
|
||
|
*(LPW)lpRetV->pStemmedQueryWord + sizeof(WORD));
|
||
|
pBTreeWord = lpRetV->pStemmedBTreeWord;
|
||
|
|
||
|
for (nCmp = 2; nCmp <= *(LPW)lstModified+1; nCmp++)
|
||
|
{
|
||
|
if (lstModified[nCmp] == lstSearchStr[nCmp])
|
||
|
cByteMatched++;
|
||
|
else
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// Restore the original word
|
||
|
MEMCPY (lstModified, lstSearchStr,
|
||
|
*((LPW)lstSearchStr) + sizeof (SHORT));
|
||
|
// Zero terminated for wildcard search
|
||
|
lstModified [*((LPW)lstModified) + 2] = 0;
|
||
|
|
||
|
pBTreeWord = lpRetV->pBTreeWord;
|
||
|
}
|
||
|
|
||
|
/* Change all '*' and '?' to 0. This will
|
||
|
* ensure that things gets compared correctly with
|
||
|
* the top node's entries
|
||
|
*/
|
||
|
for (nCmp = *((LPW)lstModified) + 1; nCmp >= 2; nCmp--)
|
||
|
{
|
||
|
if (lpRetV->pLeadByteTable
|
||
|
&& lpRetV->pLeadByteTable[lstModified[nCmp - 1]])
|
||
|
{
|
||
|
nCmp--;
|
||
|
}
|
||
|
else if (lstModified[nCmp] == '*' || lstModified[nCmp] == '?')
|
||
|
{
|
||
|
lstModified[nCmp] = 0;
|
||
|
*(LPW)lstModified = nCmp - 2;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Point node-resolution variables at the right things. This
|
||
|
* sets these up to read b-tree nodes. Fields not set here are
|
||
|
* set as appropriate elsewhere.
|
||
|
*/
|
||
|
|
||
|
/* Set the flag */
|
||
|
fCheckFieldId = ((lpqt->occf & OCCF_FIELDID) &&
|
||
|
(lpCurQtNode->dwFieldId != DW_NIL_FIELD));
|
||
|
|
||
|
astBTreeWord[0] = 0;
|
||
|
cMaxLevel = lpqt->cIdxLevels - 1;
|
||
|
|
||
|
/*
|
||
|
First we have to find which tree level the word is in. The number of
|
||
|
searches is equal to the number of tree levels at most. The
|
||
|
structure of the directory node is a sequence of:
|
||
|
- Words: PASCAL strings
|
||
|
- Data offset: will tell us where is the
|
||
|
offset of the record in the index file
|
||
|
*/
|
||
|
for (cLevel = 0; cLevel < cMaxLevel ; cLevel++)
|
||
|
{
|
||
|
//
|
||
|
// Get a node.
|
||
|
//
|
||
|
if ((fRet = ReadStemNode ((PNODEINFO)pLeafInfo, cLevel)) != S_OK)
|
||
|
{
|
||
|
return SetErrCode (&errb, fRet);
|
||
|
}
|
||
|
lpMaxAddress = pLeafInfo->pMaxAddress;
|
||
|
lpCurPtr = pLeafInfo->pCurPtr;
|
||
|
|
||
|
//
|
||
|
// Loop through it. This compares the word I'm
|
||
|
// looking for against the word in the b-tree.
|
||
|
// If the word in the b-tree is >= the word I'm
|
||
|
// looking for, I'm done.
|
||
|
//
|
||
|
// If I run off the end of the node, there can be
|
||
|
// no match for this term, so I skip the entire
|
||
|
// process.
|
||
|
//
|
||
|
for (;;)
|
||
|
{
|
||
|
|
||
|
if (lpCurPtr >= lpMaxAddress)
|
||
|
return S_OK;
|
||
|
lpCurPtr = ExtractWord(astBTreeWord, lpCurPtr, &wLen);
|
||
|
|
||
|
if (fStemmed)
|
||
|
{
|
||
|
if ((fRet = ExtStemWord(lpRetV->SrchInfo.lpvIndexObjBridge,
|
||
|
pBTreeWord, astBTreeWord)) != S_OK)
|
||
|
return(fRet);
|
||
|
}
|
||
|
|
||
|
/* Read in NodeId record */
|
||
|
lpCurPtr += ReadFileOffset (&pLeafInfo->nodeOffset, lpCurPtr);
|
||
|
|
||
|
if (f1stIsWild)
|
||
|
break;
|
||
|
if (StrCmpPascal2(lstModified, pBTreeWord) <= 0)
|
||
|
break;
|
||
|
|
||
|
// erinfox:
|
||
|
// if stemming is turned on, there could be a case in which the stemmed
|
||
|
// word is less than the search term, but the unstemmed word is greater.
|
||
|
// if we don't check the unstemmed, we'll skip this node erroneously.
|
||
|
if (fStemmed && StrCmpPascal2(lstModified, astBTreeWord) <= 0)
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* At this point, pLeafInfo->nodeOffset is the node id of the leaf that
|
||
|
is supposed to contain the searched word. Read in the leaf node
|
||
|
*/
|
||
|
if ((fRet = ReadLeafNode ((PNODEINFO)pLeafInfo, cLevel)) != S_OK)
|
||
|
{
|
||
|
return fRet;
|
||
|
}
|
||
|
|
||
|
lpCurPtr = pLeafInfo->pCurPtr;
|
||
|
lpMaxAddress = pLeafInfo->pMaxAddress;
|
||
|
dwTotalTopic = 0;
|
||
|
|
||
|
//
|
||
|
// Second step is to deal with the leaf node(s). I'm going to
|
||
|
// find and capture some occurence lists. I'll probably have to
|
||
|
// ignore some bogus ones first.
|
||
|
//
|
||
|
|
||
|
// Reset the word
|
||
|
if (fStemmed)
|
||
|
{
|
||
|
MEMCPY (lstModified, lpRetV->pStemmedQueryWord,
|
||
|
*(LPW)lpRetV->pStemmedQueryWord + sizeof(WORD));
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
MEMCPY (lstModified, lstSearchStr,
|
||
|
*((LPW)lstSearchStr) + sizeof (SHORT));
|
||
|
}
|
||
|
|
||
|
for (;;)
|
||
|
{
|
||
|
// Check for out of data
|
||
|
if (lpCurPtr >= lpMaxAddress)
|
||
|
{
|
||
|
// Get the offset of the next node
|
||
|
ReadFileOffset (&pLeafInfo->nodeOffset, pLeafInfo->pBuffer);
|
||
|
if (FoIsNil (pLeafInfo->nodeOffset))
|
||
|
{
|
||
|
lpCurQtNode->cTopic = dwTotalTopic;
|
||
|
return S_OK;
|
||
|
}
|
||
|
|
||
|
// Read the next node
|
||
|
if ((fRet = ReadLeafNode ((PNODEINFO)pLeafInfo, cLevel))
|
||
|
!= S_OK)
|
||
|
{
|
||
|
return SetErrCode (&errb, fRet);
|
||
|
}
|
||
|
lpCurPtr =
|
||
|
pLeafInfo->pBuffer + FOFFSET_SIZE + sizeof (SHORT);
|
||
|
lpMaxAddress = pLeafInfo->pMaxAddress;
|
||
|
}
|
||
|
|
||
|
/* Check for interrupt now and then */
|
||
|
if ((++lpqt->cInterruptCount) == 0)
|
||
|
{
|
||
|
if (lpqt->fInterrupt == E_INTERRUPT)
|
||
|
return E_INTERRUPT;
|
||
|
if (*lpqt->cStruct.Callback.MessageFunc &&
|
||
|
(fRet = (*lpqt->cStruct.Callback.MessageFunc)(
|
||
|
lpqt->cStruct.Callback.dwFlags,
|
||
|
lpqt->cStruct.Callback.pUserData, NULL)) != S_OK)
|
||
|
return(fRet);
|
||
|
}
|
||
|
|
||
|
// Extract the word
|
||
|
lpCurPtr = ExtractWord(astBTreeWord, lpCurPtr, &wLen);
|
||
|
|
||
|
if (fStemmed)
|
||
|
{
|
||
|
if ((fRet = ExtStemWord(lpRetV->SrchInfo.lpvIndexObjBridge,
|
||
|
pBTreeWord, astBTreeWord)) != S_OK)
|
||
|
return(fRet);
|
||
|
}
|
||
|
|
||
|
// Save the word length
|
||
|
lpCurQtNode->wRealLength = wLen;
|
||
|
|
||
|
if (lpqt->occf & OCCF_FIELDID)
|
||
|
lpCurPtr += CbByteUnpack (&dwFieldID, lpCurPtr);
|
||
|
|
||
|
nCmp = CompareTerm (lpCurQtNode, lstModified, pBTreeWord, fCheckFieldId ?
|
||
|
dwFieldID : lpCurQtNode->dwFieldId, lpRetV->pLeadByteTable);
|
||
|
|
||
|
switch (nCmp)
|
||
|
{
|
||
|
case KEEP_SEARCHING:
|
||
|
// Skip TopicCount
|
||
|
lpCurPtr += CbByteUnpack (&dwTemp, lpCurPtr);
|
||
|
// Skip data offset
|
||
|
lpCurPtr += FOFFSET_SIZE;
|
||
|
// Skip DataSize
|
||
|
lpCurPtr += CbByteUnpack (&dwTemp, lpCurPtr);
|
||
|
break;
|
||
|
|
||
|
case STRING_MATCH:
|
||
|
|
||
|
lpCurPtr += CbByteUnpack (&lpCurQtNode->cTopic, lpCurPtr);
|
||
|
lpCurPtr += ReadFileOffset (&lpCurQtNode->foData, lpCurPtr);
|
||
|
lpCurPtr += CbByteUnpack (&lpCurQtNode->cbData, lpCurPtr);
|
||
|
|
||
|
// Check for Topic count. This can be 0 if the word has been deleted
|
||
|
// from the index
|
||
|
if (lpCurQtNode->cTopic == 0)
|
||
|
break;
|
||
|
|
||
|
if (lpRetV->SrchInfo.Flag & LARGEQUERY_SEARCH)
|
||
|
{
|
||
|
// long search optimization: clip noise words.
|
||
|
// Johnms- eliminate frequent words.
|
||
|
// typically, you eliminate if in more than 1/7 of documents.
|
||
|
if (lpRetV->SrchInfo.dwValue < lpCurQtNode->cTopic)
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
// Add the raw (i.e. unstemmed) term from the index that currently
|
||
|
// matches the query term for this node to the query result term
|
||
|
// dictionary, and pass a pointer to the term in the dictionary
|
||
|
// to GetWordData so that it can add it to the occurrence records.
|
||
|
if ((lpRetV->SrchInfo.Flag & QUERY_GETTERMS) != 0 &&
|
||
|
(fRet = ExtAddQueryResultTerm(
|
||
|
lpRetV->SrchInfo.lpvIndexObjBridge,
|
||
|
astBTreeWord,
|
||
|
&lpCurQtNode->lpvIndexedTerm)) != S_OK)
|
||
|
{
|
||
|
return (fRet);
|
||
|
}
|
||
|
|
||
|
// Save the info
|
||
|
pLeafInfo->pCurPtr = lpCurPtr;
|
||
|
|
||
|
if ((fRet = GetWordData (lpqt, lpRetV,
|
||
|
Operator, lpResQuery, lpCurQtNode, fDivide,
|
||
|
fOutOfMemory)) != S_OK)
|
||
|
{
|
||
|
// kevynct: no need to overwrite count on error since
|
||
|
// we may be attempting to continue
|
||
|
lpCurQtNode->cTopic += dwTotalTopic;
|
||
|
return(fRet);
|
||
|
}
|
||
|
|
||
|
// Accumulate the topic count, since cTopic will be destroyed
|
||
|
// if there is more searches for this node (such as wildcard)
|
||
|
|
||
|
dwTotalTopic += lpCurQtNode->cTopic;
|
||
|
break;
|
||
|
|
||
|
case NOT_FOUND: // No unconditional "break" above.
|
||
|
if (fStemmed && (strncmp (lstSearchStr+ 2, pBTreeWord + 2,
|
||
|
cByteMatched) == 0))
|
||
|
{
|
||
|
// Continue searching in case stemming is messed up
|
||
|
// by non-alphabetic word, such as the sequence:
|
||
|
// subtopic subtopic2 subtopics
|
||
|
lpCurPtr += CbByteUnpack (&dwTemp, lpCurPtr);
|
||
|
// Skip data offset
|
||
|
lpCurPtr += FOFFSET_SIZE;
|
||
|
// Skip DataSize
|
||
|
lpCurPtr += CbByteUnpack (&dwTemp, lpCurPtr);
|
||
|
break;
|
||
|
}
|
||
|
lpCurQtNode->cTopic = dwTotalTopic;
|
||
|
return S_OK;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*************************************************************************
|
||
|
* @doc INTERNAL
|
||
|
*
|
||
|
* @func int PASCAL NEAR | CompareTerm |
|
||
|
* This function compares two Pascal strings
|
||
|
*
|
||
|
* @parm _LPQTNODE FAR* | lpQtNode |
|
||
|
* Query tree node
|
||
|
*
|
||
|
* @parm LST | lstSrchStr |
|
||
|
* String to be searched
|
||
|
*
|
||
|
* @parm LST | lstBtreeWord |
|
||
|
* The word from the b-tree.
|
||
|
*
|
||
|
* @parm DWORD | dwBtreeFieldId |
|
||
|
* The field-ID from the index b-tree. if it is DW_NIL_FIELD,
|
||
|
* then there is no need to check
|
||
|
*
|
||
|
* @parm DWORD | dwLanguage |
|
||
|
* The language of the index that we are searching
|
||
|
*
|
||
|
* @rdesc
|
||
|
* The returned values are:
|
||
|
* @flag NOT_FOUND |
|
||
|
* The words do not match, and we have passed the interested point
|
||
|
* @flag KEEP_SEARCHING |
|
||
|
* The words do not match, but we should continue the search for
|
||
|
* the match may be ahead
|
||
|
* @flag STRING_MATCH |
|
||
|
* The words match
|
||
|
*************************************************************************/
|
||
|
#ifndef SIMILARITY
|
||
|
PUBLIC int PASCAL FAR CompareTerm(_LPQTNODE lpQtNode,LST lstTermWord,
|
||
|
LST lstBtreeWord, DWORD dwBtreeFieldId, BYTE prgbLeadByteTable[])
|
||
|
#else
|
||
|
PRIVATE int PASCAL NEAR CompareTerm(_LPQTNODE lpQtNode, LST lstTermWord,
|
||
|
LST lstBtreeWord, DWORD dwBtreeFieldId, BYTE prgbLeadByteTable[])
|
||
|
#endif
|
||
|
{
|
||
|
int nCmp; // The result of compare
|
||
|
BYTE FAR *lstTermHiWord;// Pointer to the hi term string
|
||
|
DWORD dwTermFieldId;
|
||
|
|
||
|
/* Get the variables */
|
||
|
dwTermFieldId = lpQtNode->dwFieldId;
|
||
|
|
||
|
switch (QTN_FLAG(lpQtNode))
|
||
|
{
|
||
|
case EXACT_MATCH:
|
||
|
/*
|
||
|
* This is very straight, it just compares the two words.
|
||
|
*/
|
||
|
if ((nCmp = StrCmpPascal2(lstTermWord, lstBtreeWord)) < 0)
|
||
|
{
|
||
|
/* lstTermWord > lstBtreeWord */
|
||
|
return NOT_FOUND;
|
||
|
}
|
||
|
if (nCmp)
|
||
|
return KEEP_SEARCHING;
|
||
|
if (dwBtreeFieldId < dwTermFieldId)
|
||
|
return KEEP_SEARCHING;
|
||
|
if (dwBtreeFieldId == dwTermFieldId)
|
||
|
return STRING_MATCH;
|
||
|
if (dwBtreeFieldId > dwTermFieldId)
|
||
|
return NOT_FOUND;
|
||
|
break;
|
||
|
|
||
|
case TERM_RANGE_MATCH:
|
||
|
/*
|
||
|
* This makes sure that the b-tree word is between the
|
||
|
* two term words provided, and that the field-ID's
|
||
|
* match up.
|
||
|
*/
|
||
|
lstTermHiWord = lpQtNode->lpHiString;
|
||
|
if ((nCmp = StrCmpPascal2(lstTermWord, lstBtreeWord)) > 0)
|
||
|
{
|
||
|
/* lstTermWord < lstBtreeWord */
|
||
|
return KEEP_SEARCHING;
|
||
|
}
|
||
|
if ((nCmp = StrCmpPascal2(lstTermHiWord, lstBtreeWord)) < 0)
|
||
|
{
|
||
|
/* lstTermHiWord > lstBtreeWord */
|
||
|
return NOT_FOUND;
|
||
|
}
|
||
|
if (dwTermFieldId != dwBtreeFieldId)
|
||
|
return KEEP_SEARCHING;
|
||
|
break;
|
||
|
|
||
|
case WILDCARD_MATCH:
|
||
|
|
||
|
/* Zero-terminated lstBtreeWord */
|
||
|
lstBtreeWord[*((LPW)lstBtreeWord) + sizeof (SHORT)] = 0;
|
||
|
|
||
|
if ((nCmp = WildCardCompare
|
||
|
(lstTermWord, lstBtreeWord, prgbLeadByteTable)) != STRING_MATCH)
|
||
|
return nCmp;
|
||
|
if (dwTermFieldId != dwBtreeFieldId)
|
||
|
return KEEP_SEARCHING;
|
||
|
break;
|
||
|
|
||
|
}
|
||
|
return STRING_MATCH;
|
||
|
}
|
||
|
|
||
|
/*************************************************************************
|
||
|
* @doc INTERNAL
|
||
|
*
|
||
|
* @func HRESULT PASCAL NEAR | SkipOccList |
|
||
|
* This function will skip on occurence list in the index.
|
||
|
* @parm _LPQT | lpqt |
|
||
|
* Pointer to Index information.
|
||
|
* @parm PNODEINFO | pNodeInfo |
|
||
|
* Current leaf info.
|
||
|
* @parm DWORD | dwOccs |
|
||
|
* Number of occurrences to be skipped
|
||
|
* @rdesc S_OK if successfully skip the occurence list
|
||
|
*************************************************************************/
|
||
|
#ifndef SIMILARITY
|
||
|
PUBLIC HRESULT PASCAL FAR SkipOccList(_LPQT lpqt, PNODEINFO pNodeInfo, DWORD dwOccs)
|
||
|
#else
|
||
|
PRIVATE HRESULT PASCAL NEAR SkipOccList(_LPQT lpqt, PNODEINFO pNodeInfo, DWORD dwOccs)
|
||
|
#endif
|
||
|
{
|
||
|
DWORD dwTmp; // Trash variable.
|
||
|
HRESULT fRet; // Returned value
|
||
|
|
||
|
//
|
||
|
// One pass through here for each occurence in the
|
||
|
// current sub-list.
|
||
|
//
|
||
|
for (; dwOccs; dwOccs--)
|
||
|
{
|
||
|
//
|
||
|
// Keeping word-counts? If so, get it.
|
||
|
//
|
||
|
if (lpqt->occf & OCCF_COUNT)
|
||
|
{
|
||
|
if ((fRet = FGetDword(pNodeInfo, lpqt->ckeyWordCount,
|
||
|
&dwTmp)) != S_OK)
|
||
|
{
|
||
|
return fRet;
|
||
|
}
|
||
|
}
|
||
|
//
|
||
|
// Keeping byte-offsets? If so, get it.
|
||
|
//
|
||
|
if (lpqt->occf & OCCF_OFFSET)
|
||
|
{
|
||
|
if ((fRet = FGetDword(pNodeInfo, lpqt->ckeyOffset,
|
||
|
&dwTmp)) != S_OK)
|
||
|
{
|
||
|
return fRet;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return S_OK;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*************************************************************************
|
||
|
* @doc INTERNAL
|
||
|
*
|
||
|
* @func BOOL FAR PASCAL | FGroupLookup |
|
||
|
* Given a item number, this function will check to see if the item
|
||
|
* belongs to a group or not.
|
||
|
*
|
||
|
* @parm LPGROUP | lpGroup |
|
||
|
* Pointer to the group to be checked
|
||
|
*
|
||
|
* @parm DWORD | dwTopicId |
|
||
|
* Item number to be checked
|
||
|
*
|
||
|
* @rdesc The function will return 0 if the item is not in the group,
|
||
|
* non-zero otherwise
|
||
|
*************************************************************************/
|
||
|
BOOL NEAR PASCAL FGroupLookup(_LPGROUP lpGroup, DWORD dwTopicId)
|
||
|
{
|
||
|
/* Check for empty group */
|
||
|
if (lpGroup->lcItem == 0)
|
||
|
return 0;
|
||
|
|
||
|
if (dwTopicId < lpGroup->minItem || dwTopicId > lpGroup->maxItem)
|
||
|
return 0;
|
||
|
#if 0
|
||
|
|
||
|
// Currently the group always starts at 0., so there is no need
|
||
|
// to recalculate dwTopicId as below
|
||
|
|
||
|
dwTopicId -= (lpGroup->minItem / 8) * 8;
|
||
|
#endif
|
||
|
|
||
|
return (lpGroup->lpbGrpBitVect[(DWORD)(dwTopicId / 8)] &
|
||
|
(1 << (dwTopicId % 8)));
|
||
|
}
|
||
|
|
||
|
|
||
|
PRIVATE int PASCAL NEAR WildCardCompare
|
||
|
(LPB pWildString, LPB pString, BYTE prgbLeadByteTable[])
|
||
|
{
|
||
|
LPB pBack;
|
||
|
unsigned int wMinLength = 0;
|
||
|
int f1stIsWild;
|
||
|
int fRet = KEEP_SEARCHING;
|
||
|
int fGotWild = FALSE;
|
||
|
|
||
|
pWildString += sizeof (SHORT); /* Skip the length */
|
||
|
f1stIsWild = (*pWildString == WILDCARD_CHAR ||
|
||
|
*pWildString == WILDCARD_STAR);
|
||
|
|
||
|
/* Calculate the minimum length of the string */
|
||
|
// pback is used as temp here
|
||
|
for (pBack = pWildString; *pBack; pBack++)
|
||
|
{
|
||
|
if (prgbLeadByteTable && prgbLeadByteTable[*pBack])
|
||
|
{
|
||
|
wMinLength += 2;
|
||
|
pBack++;
|
||
|
}
|
||
|
else if (*pBack != '*')
|
||
|
wMinLength ++;
|
||
|
}
|
||
|
|
||
|
if (wMinLength > *((LPW)pString))
|
||
|
{
|
||
|
if (f1stIsWild)
|
||
|
return KEEP_SEARCHING;
|
||
|
}
|
||
|
|
||
|
pString += sizeof (SHORT); /* Skip the length */
|
||
|
|
||
|
pBack = NULL; /* Reset pBack */
|
||
|
for (;;)
|
||
|
{
|
||
|
switch (*pWildString)
|
||
|
{
|
||
|
case '?':
|
||
|
if (*pString == 0)
|
||
|
return fRet;
|
||
|
pWildString++;
|
||
|
pString = NextChar (pString, prgbLeadByteTable);
|
||
|
fGotWild = TRUE;
|
||
|
break;
|
||
|
|
||
|
case '*':
|
||
|
fGotWild = TRUE;
|
||
|
/* Optimization: *???? == * */
|
||
|
for (; *pWildString; pWildString++)
|
||
|
{
|
||
|
switch (*pWildString)
|
||
|
{
|
||
|
case '*':
|
||
|
pBack = pWildString;
|
||
|
case '?':
|
||
|
continue;
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
if (*pWildString == 0)
|
||
|
{
|
||
|
/* Terminated by '*'. Match all */
|
||
|
return STRING_MATCH;
|
||
|
}
|
||
|
|
||
|
/* Skip the chars until we get a 1st match */
|
||
|
while (*pString)
|
||
|
{
|
||
|
if (!CompareChar (pString, pWildString, prgbLeadByteTable))
|
||
|
break;
|
||
|
|
||
|
pString = NextChar (pString, prgbLeadByteTable);
|
||
|
}
|
||
|
// This is inteded to fall through to continue processing
|
||
|
|
||
|
default:
|
||
|
if (!CompareChar (pString, pWildString, prgbLeadByteTable))
|
||
|
{
|
||
|
if (*pString == 0) /* We finish both strings */
|
||
|
return STRING_MATCH;
|
||
|
pString = NextChar (pString, prgbLeadByteTable);
|
||
|
pWildString = NextChar (pWildString, prgbLeadByteTable);
|
||
|
break;
|
||
|
}
|
||
|
else if (f1stIsWild || // *pWildString == 0 ||
|
||
|
// *pString < *pWildString
|
||
|
CompareChar (pString, pWildString, prgbLeadByteTable) < 0)
|
||
|
{
|
||
|
fRet = KEEP_SEARCHING;
|
||
|
}
|
||
|
else if (fGotWild == FALSE)
|
||
|
fRet = NOT_FOUND;
|
||
|
|
||
|
/* The chars do not match. Check to see for back up */
|
||
|
if (!pBack || *pString == 0)
|
||
|
{
|
||
|
return fRet;
|
||
|
}
|
||
|
|
||
|
/* Back up the string */
|
||
|
pWildString = pBack;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
/*************************************************************************
|
||
|
* @doc INTERNAL
|
||
|
*
|
||
|
* @func HRESULT FAR PASCAL | TopNodeRead |
|
||
|
* Makes sure the index b-tree top node is in memory. Reads it if
|
||
|
* necessary. The index file must be open and the index header must
|
||
|
* be in memory or this call will break.
|
||
|
*
|
||
|
* @parm _LPQT | lpidx |
|
||
|
* Index information.
|
||
|
*
|
||
|
* @rdesc S_OK, if succeeded, otherwise error values
|
||
|
*************************************************************************/
|
||
|
|
||
|
PUBLIC HRESULT PASCAL FAR TopNodeRead( _LPIDX lpidx)
|
||
|
{
|
||
|
DWORD dwBlockSize = lpidx->ih.dwBlockSize;
|
||
|
|
||
|
if (lpidx->hTopNode != NULL)
|
||
|
return S_OK;
|
||
|
if ((lpidx->hTopNode = _GLOBALALLOC(GMEM_MOVEABLE, dwBlockSize)) == NULL)
|
||
|
{
|
||
|
return E_OUTOFMEMORY;
|
||
|
}
|
||
|
lpidx->lrgbTopNode = (LRGB)_GLOBALLOCK(lpidx->hTopNode);
|
||
|
if (FileSeekRead
|
||
|
(lpidx->hfpbIdxSubFile, lpidx->lrgbTopNode, lpidx->ih.foIdxRoot,
|
||
|
dwBlockSize, lpidx->lperrb) != (long)dwBlockSize)
|
||
|
{
|
||
|
TopNodePurge(lpidx);
|
||
|
return E_ASSERT;
|
||
|
}
|
||
|
return S_OK;
|
||
|
}
|
||
|
|
||
|
/*************************************************************************
|
||
|
* @doc INTERNAL
|
||
|
*
|
||
|
* @func void PASCAL FAR | TopNodePurge |
|
||
|
* Get rid of the index b-tree top node if it's in memory.
|
||
|
*
|
||
|
* @parm _LPIDX | lpidx |
|
||
|
* Pointer to index structure
|
||
|
*************************************************************************/
|
||
|
PUBLIC void FAR PASCAL TopNodePurge(_LPIDX lpidx)
|
||
|
{
|
||
|
if (lpidx->hTopNode == NULL) // Already gone.
|
||
|
return;
|
||
|
FreeHandle(lpidx->hTopNode);
|
||
|
lpidx->hTopNode = NULL;
|
||
|
}
|
||
|
|
||
|
/*************************************************************************
|
||
|
* @doc INTERNAL
|
||
|
*
|
||
|
* @func void FAR PASCAL | IndexCloseFile |
|
||
|
* Close the index file. Error not checked since it is opened
|
||
|
* for read only
|
||
|
*
|
||
|
* @parm _LPIDX | lpidx |
|
||
|
* Pointer to index structure
|
||
|
*************************************************************************/
|
||
|
PUBLIC void PASCAL FAR IndexCloseFile(_LPIDX lpidx)
|
||
|
{
|
||
|
if (lpidx->hfpbIdxSubFile != NULL)
|
||
|
{
|
||
|
FileClose(lpidx->hfpbIdxSubFile);
|
||
|
lpidx->hfpbIdxSubFile = NULL;
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
/*************************************************************************
|
||
|
* @doc INTERNAL
|
||
|
*
|
||
|
* @func LPB FAR PASCAL | NextChar |
|
||
|
* Get the next character in a string based on a DBCS lead-byte table
|
||
|
*
|
||
|
* @parm LPB | pStr |
|
||
|
* Pointer to character in a string to skip
|
||
|
*
|
||
|
* @parm BYTE * | prgbLeadByteTable |
|
||
|
* Array of DBCS lead bytes (assumed to have 256 elements)
|
||
|
* Each element should be set to 1 or 0 to indeicate if that index
|
||
|
* is considered a lead-byte.
|
||
|
*
|
||
|
* @rdesc Returns a pointer to the next character in pStr
|
||
|
*************************************************************************/
|
||
|
LPB FAR PASCAL NextChar (LPB pStr, BYTE prgbLeadByteTable[])
|
||
|
{
|
||
|
if (!prgbLeadByteTable)
|
||
|
return (pStr + 1);
|
||
|
if (prgbLeadByteTable[*pStr])
|
||
|
return (pStr + 2);
|
||
|
return (pStr + 1);
|
||
|
}
|
||
|
|
||
|
|
||
|
/*************************************************************************
|
||
|
* @doc INTERNAL
|
||
|
*
|
||
|
* @func BOOL FAR PASCAL | CompareChar |
|
||
|
* Compares the first character in pStr1 to the first
|
||
|
* character in pStr2, using the supplied DBCS lead-byte table.
|
||
|
*
|
||
|
* @parm LPB | pStr1 |
|
||
|
* Pointer to character in a string to compare
|
||
|
*
|
||
|
* @parm LPB | pStr2 |
|
||
|
* Pointer to character in a string to compare
|
||
|
*
|
||
|
* @parm BYTE * | prgbLeadByteTable |
|
||
|
* Array of DBCS lead bytes (assumed to have 256 elements).
|
||
|
* Each element should be set to 1 or 0 to indeicate if that index
|
||
|
* is considered a lead-byte.
|
||
|
*
|
||
|
* @rdesc The difference between the first bytes of pStr1 and pStr2.
|
||
|
* If the first bytes are equal and are lead bytes then the
|
||
|
* difference between the second bytes is returned.
|
||
|
*************************************************************************/
|
||
|
__inline BOOL FAR PASCAL CompareChar
|
||
|
(LPB pStr1, LPB pStr2, BYTE prgbLeadByteTable[])
|
||
|
{
|
||
|
// Get rid of obvious mismatches
|
||
|
if (*pStr1 != *pStr2)
|
||
|
return (*pStr1 - *pStr2);
|
||
|
// We now know the first bytes are equal.
|
||
|
// If there is no lead byte table we have a match
|
||
|
if (!prgbLeadByteTable)
|
||
|
return (0);
|
||
|
// If lead bytes, check the trail bytes
|
||
|
if (prgbLeadByteTable[*pStr1])
|
||
|
return (*(pStr1 + 1) - *(pStr2 + 1));
|
||
|
// Not lead bytes then they must be equal
|
||
|
return (0);
|
||
|
}
|