windows-nt/Source/XPSP1/NT/enduser/stuff/itircl/fts/search/combine.c

1619 lines
46 KiB
C
Raw Normal View History

2020-09-26 03:20:57 -05:00
//#define _DUMPALL
#include <mvopsys.h>
#include <mem.h>
#include <memory.h>
#include <orkin.h>
#ifdef DOS_ONLY
#include <assert.h>
#endif // DOS_ONLY
#include <mvsearch.h>
#include "common.h"
#include "search.h"
#ifdef _DEBUG
static BYTE NEAR s_aszModule[] = __FILE__; /* Used by error return functions.*/
#endif
typedef int (PASCAL NEAR * FCMP)(LPV, LPV);
#define MAX_HEAP_ENTRIES 0xffff/sizeof(LPV) // Maximum entries for heap sort
#define MIN_HEAP_ENTRIES 100 // Minimum entries for heap sort
/*************************************************************************
*
* INTERNAL GLOBAL FUNCTIONS
*************************************************************************/
PUBLIC HRESULT PASCAL NEAR OrHandler(LPQT, _LPQTNODE, LPITOPIC, LPV, int);
PUBLIC HRESULT PASCAL NEAR AndHandler(LPQT, _LPQTNODE, LPITOPIC, LPV, int);
PUBLIC HRESULT PASCAL NEAR NotHandler(LPQT, _LPQTNODE, LPITOPIC, LPV, int);
PUBLIC HRESULT PASCAL NEAR NearHandler(LPQT, _LPQTNODE, LPITOPIC, LPV, int);
PUBLIC HRESULT PASCAL NEAR PhraseHandler(LPQT, _LPQTNODE, LPITOPIC, LPV, int);
PUBLIC VOID PASCAL NEAR RemoveUnmarkedTopicList (LPQT, _LPQTNODE, BOOL);
PUBLIC VOID PASCAL NEAR RemoveUnmarkedNearTopicList (_LPQT, _LPQTNODE);
PUBLIC VOID PASCAL NEAR MergeOccurence(LPQT, LPITOPIC , LPITOPIC);
PUBLIC VOID PASCAL NEAR SortResult (LPQT, _LPQTNODE, WORD);
PUBLIC VOID PASCAL NEAR NearHandlerCleanUp (LPQT, _LPQTNODE);
PUBLIC HRESULT PASCAL NEAR TopicListSort (_LPQTNODE, BOOL);
/*************************************************************************
* GLOBAL VARIABLES
*************************************************************************/
extern FNHANDLER HandlerFuncTable[];
/*************************************************************************
*
* INTERNAL PRIVATE FUNCTIONS
* All of them should be declared near
*************************************************************************/
PRIVATE VOID PASCAL NEAR RemoveQuery(LPQT, _LPQTNODE);
PUBLIC HRESULT PASCAL NEAR ProximityCheck(LPITOPIC, LPIOCC, WORD);
PRIVATE HRESULT PASCAL NEAR HandleNullNode(LPQT, _LPQTNODE , _LPQTNODE, int);
PRIVATE VOID PASCAL NEAR RemoveUnmarkedOccList (LPQT, LPITOPIC, LPIOCC, int);
PRIVATE VOID PASCAL NEAR OccurenceSort (LPQT, LPITOPIC);
PRIVATE int PASCAL NEAR FRange(DWORD, DWORD, WORD);
PRIVATE HRESULT PASCAL NEAR InsertMarker (LPQT, LPITOPIC);
PRIVATE LPIOCC PASCAL NEAR FindMarker (LPIOCC);
PRIVATE HRESULT PASCAL NEAR NearListMatch (LPIOCC, LPIOCC, WORD);
PRIVATE VOID PASCAL NEAR HeapUp (LPITOPIC far*, WORD, FCMP);
PRIVATE VOID PASCAL NEAR HeapDown (LPITOPIC far*, int, FCMP);
PRIVATE int PASCAL NEAR TopicWeightCompare (LPV, LPV);
PRIVATE int PASCAL NEAR HitCountCompare (LPV, LPV);
/*************************************************************************
* @doc INTERNAL
*
* @func HRESULT PASCAL NEAR | OrHandler |
* Handle ORing the strings
*
* @parm int | fOperationType |
* Tell what kinds of operations we are dealing with
*
* @parm _LPQTNODE | lpResQtNode |
* The query node structure that we add the result to
*
* @parm LPV | lpStruct |
* Vanilla pointers to different types of structures we are dealing with.
* The contents of those pointers are determined by the value of
* fOperationType, for EXPRESSION_TERM, this is a LPIOCC, for
* EXPRESSION_EXPRESSION, this is a_LPQTNODE
*
* @rdesc S_OK : if the operation has been carried
* E_FAIL : if some errors happened (out-of-memory)
*
* @comm The implementation is straightforward:
*************************************************************************/
PUBLIC HRESULT PASCAL NEAR OrHandler(_LPQT lpQueryTree, _LPQTNODE lpResQtNode,
LPITOPIC lpResTopicList, LPV lpStruct, int fOperationType)
{
_LPQTNODE lpCurQtNode;
LPITOPIC lpCurTopicList;
LPITOPIC lpNextTopicList;
switch (fOperationType) {
case EXPRESSION_TERM:
/* We are adding a new occurence into a TopicID list. This can
only happens when we are loading the infos for a query's
TERM_NODE node
*/
RET_ASSERT(lpResTopicList);
/* Adding the new occurence to the TopicID list */
return OccNodeInsert(lpQueryTree, lpResTopicList, (LPIOCC)lpStruct);
break;
case EXPRESSION_EXPRESSION:
lpCurQtNode = (_LPQTNODE)lpStruct;
/* Handle different variations of:
(EXPRESSION_NODE | NULL_NODE) or (NULL_NODE | EXPRESSION_NODE)
*/
if (HandleNullNode(lpQueryTree, lpResQtNode, lpCurQtNode, OR_OP))
return S_OK;
/* Make sure that we are pointing to the right place to search
*/
lpQueryTree->lpTopicStartSearch = lpResQtNode->lpTopicList;
/* Thread the TopicID List and add them to lpResQtNode */
for (lpCurTopicList = QTN_TOPICLIST(lpCurQtNode); lpCurTopicList;
lpCurTopicList = lpNextTopicList) {
lpNextTopicList = lpCurTopicList->pNext;
/* Find the location of the TopicID List in the query. */
if ((lpResTopicList = TopicNodeSearch(lpQueryTree, lpResQtNode,
lpCurTopicList->dwTopicId)) == NULL){
/* The list doesn't exist yet, so we just transfer the
new TopicID list to lpResQtNode
*/
RemoveNode(lpQueryTree, (LPV) lpCurQtNode, NULL,
(LPSLINK) lpCurTopicList, (TOPICLIST_NODE | DONT_FREE));
TopicNodeInsert (lpQueryTree, lpResQtNode, lpCurTopicList);
}
else {
/* Merging two TopicList together by adding the new
occurence list to the old result doc list
*/
MergeOccurence(lpQueryTree, lpResTopicList, lpCurTopicList);
/* Remove the now empty TopicList */
RemoveNode(lpQueryTree, (LPV) lpCurQtNode, NULL,
(LPSLINK) lpCurTopicList, TOPICLIST_NODE);
}
}
/* Assure that all nodes are transferred */
RET_ASSERT (QTN_TOPICLIST(lpCurQtNode) == NULL) ;
break;
}
return S_OK;
}
/*************************************************************************
* @doc INTERNAL
*
* @func PUBLIC HRESULT PASCAL NEAR | AndHandler |
* Handle Anding the strings
*
* @parm _LPQTNODE | lpResQtNode |
* The query structure that we add the result to
*
* @parm LPITOPIC | lpResTopicList |
* The TopicList structure that we add the result to
*
* @parm LPV | lpStruct |
* Vanilla pointers to different types of structures we are dealing with.
* The contents of those pointers are determined by the value of
* fOperationType, for EXPRESSION_TERM, this is a LPIOCC, for
* EXPRESSION_EXPRESSION, this is a_LPQTNODE
*
* @parm int | fOperationType |
* Tell what kinds of nodes we are handling, query-occurence or
* query-query
*
* @rdesc S_OK : if the operation has been carried
* errors : if some errors happened (out-of-memory)
*************************************************************************/
PUBLIC HRESULT PASCAL NEAR AndHandler(_LPQT lpQueryTree, _LPQTNODE lpResQtNode,
LPITOPIC lpResTopicList, LPV lpStruct, int fOperationType)
{
_LPQTNODE lpCurQtNode;
LPITOPIC lpTopicNode1;
LPITOPIC lpTopicNode2;
LPITOPIC lpNextTopic1;
LPITOPIC lpNextTopic2;
LPITOPIC lpPrev;
long fResult;
switch (fOperationType) {
case EXPRESSION_TERM:
RET_ASSERT (lpResTopicList);
((LPIOCC)lpStruct)->fFlag |= TO_BE_KEPT;
lpResTopicList->fFlag |= TO_BE_KEPT;
/* Adding the new occurence to the TopicID list */
return OccNodeInsert(lpQueryTree, lpResTopicList, (LPIOCC)lpStruct);
case EXPRESSION_EXPRESSION:
/* Doing an AND combination is equivalent to merging the
* two lists together for same doc ID
*/
lpCurQtNode = (_LPQTNODE)lpStruct;
if (HandleNullNode(lpQueryTree, lpResQtNode, lpCurQtNode, AND_OP))
return S_OK;
/* Initialize variables */
lpTopicNode1 = QTN_TOPICLIST(lpResQtNode);
lpTopicNode2 = QTN_TOPICLIST(lpCurQtNode);
lpPrev = NULL;
while (lpTopicNode1 && lpTopicNode2)
{
/* Get the next nodes */
lpNextTopic1 = lpTopicNode1->pNext;
lpNextTopic2 = lpTopicNode2->pNext;
if ((fResult = lpTopicNode1->dwTopicId -
lpTopicNode2->dwTopicId) == 0)
{
/* The TopicIds match */
/* Merge the occurrences together */
MergeOccurence (lpQueryTree, lpTopicNode1, lpTopicNode2);
lpPrev = lpTopicNode1;
lpTopicNode1 = lpNextTopic1;
lpTopicNode2 = lpNextTopic2;
}
else if (fResult < 0)
{
/* List 1 < List 2 */
/* Remove Topic node 1*/
TopicNodeFree(lpQueryTree, lpResQtNode, lpPrev, lpTopicNode1);
lpTopicNode1 = lpNextTopic1;
}
else
{
/* List 1 > List 2 */
lpTopicNode2 = lpNextTopic2;
}
}
/* Free remaining doc list */
while (lpTopicNode1)
{
/* Get the next nodes */
lpNextTopic1 = lpTopicNode1->pNext;
/* Remove Topic node 1*/
TopicNodeFree(lpQueryTree, lpResQtNode, lpPrev, lpTopicNode1);
lpTopicNode1 = lpNextTopic1;
}
/* Free doc 2 list */
RemoveQuery(lpQueryTree, lpCurQtNode);
if (QTN_TOPICLIST(lpResQtNode) == NULL)
QTN_NODETYPE(lpResQtNode) = NULL_NODE;
return S_OK;
default: /* Weird parameters */
RET_ASSERT(UNREACHED);
}
}
/*************************************************************************
* @doc INTERNAL
*
* @func PUBLIC HRESULT PASCAL NEAR | NotHandler |
* Handle NOT the strings
*
* @parm _LPQTNODE | lpResQtNode |
* The query structure that we add the result to
*
* @parm LPITOPIC | lpResTopicList |
* The TopicList structure that we add the result to
*
* @parm LPV | lpStruct |
* Vanilla pointers to different types of structures we are dealing with.
* The contents of those pointers are determined by the value of
* fOperationType, for EXPRESSION_TERM, this is a LPIOCC, for
* EXPRESSION_EXPRESSION, this is a_LPQTNODE
*
* @parm int | fOperationType |
* Tell what kinds of nodes we are handling, query-occurence or
* query-query
*
* @rdesc S_OK : if the operation has been carried
* errors : if some errors happened (out-of-memory)
*************************************************************************/
PUBLIC HRESULT PASCAL NEAR NotHandler(_LPQT lpQueryTree, _LPQTNODE lpResQtNode,
LPITOPIC lpResTopicList, LPV lpStruct, int fOperationType)
{
_LPQTNODE lpCurQtNode;
LPITOPIC lpTopicNode1;
LPITOPIC lpTopicNode2;
LPITOPIC lpNextTopic1;
LPITOPIC lpNextTopic2;
LPITOPIC lpPrev;
long fResult;
switch (fOperationType) {
case EXPRESSION_TERM:
RET_ASSERT(UNREACHED);
break;
case EXPRESSION_EXPRESSION:
lpCurQtNode = (_LPQTNODE)lpStruct;
if (HandleNullNode(lpQueryTree, lpResQtNode,
lpCurQtNode, NOT_OP))
return S_OK;
/* Initialize variables */
lpTopicNode1 = QTN_TOPICLIST(lpResQtNode);
lpTopicNode2 = QTN_TOPICLIST(lpCurQtNode);
lpPrev = NULL;
while (lpTopicNode1 && lpTopicNode2) {
/* Get the next nodes */
lpNextTopic1 = lpTopicNode1->pNext;
lpNextTopic2 = lpTopicNode2->pNext;
if ((fResult = lpTopicNode1->dwTopicId -
lpTopicNode2->dwTopicId) == 0) {
/* The TopicIds match */
TopicNodeFree(lpQueryTree, lpResQtNode, lpPrev, lpTopicNode1);
lpTopicNode1 = lpNextTopic1;
lpTopicNode2 = lpNextTopic2;
}
else if (fResult < 0) {
/* List 1 < List 2 */
lpPrev = lpTopicNode1;
lpTopicNode1 = lpNextTopic1;
}
else {
/* List 1 > List 2 */
lpTopicNode2 = lpNextTopic2;
}
}
/* Free doc 2 list */
RemoveQuery(lpQueryTree, lpCurQtNode);
if (QTN_TOPICLIST(lpResQtNode) == NULL)
QTN_NODETYPE(lpResQtNode) = NULL_NODE;
return S_OK;
default: /* Weird parameters */
RET_ASSERT(UNREACHED);
}
return S_OK;
}
/*************************************************************************
* NEARHANDLER Description
*
* Sematics:
* The current chosen sematics is:
* A near B near C --> (A near B) and (B near C)
* Other possible sematics of NEAR for (A near B near C) are:
* - A and B anc C must be near each other
* - Any two of A or B or C can be near each other
*
* Observation:
* With the above semantics, we notice that only the last word (ie. B)
* has meaning in the comparison with C.
*
* Implementation:
* A special node will be used to differentiate between occurrences
* coming from A and from B. Only the ones from B will be used in the
* combination with C. Consider the following example (ProxDist = 5):
* A B C
* 10 15 5 (the numbers are word counts)
* 14 18 16
* After combining A and B we will end up with:
* 15
* 18
* M <- marker separates occurrences from A and B
* 10
* 14
* After that we only combine B's terms with C's terms. The result will
* look like:
* 16
* M <- marker separates occurrences from B and C
* 15
* 18
* M <- marker separates occurrences from A and B
* 10
* 14
* Note that C's 5 is dropped since there is no match with B, even
* that A's 10 matches it.
* After sorting and getting rid of the marker nodes, the final result
* will look as followed:
* 10 14 15 16 18
*************************************************************************/
PRIVATE HRESULT PASCAL NEAR NearHandlerInsert (_LPQT lpQueryTree,
LPITOPIC lpResTopicList, LPIOCC lpStartOcc, LPIOCC lpCurOcc)
{
HRESULT fRet = FALSE;
if (!(lpCurOcc->fFlag & IS_MARKER_NODE) &&
(fRet = NearListMatch(lpCurOcc, lpStartOcc, lpQueryTree->wProxDist))) {
/* Insert the occurrence node */
lpCurOcc->pNext = lpResTopicList->lpOccur;
lpResTopicList->lpOccur = lpCurOcc;
lpResTopicList->lcOccur ++;
}
else {
/* Remove the occurrence node */
RemoveNode(lpQueryTree, (LPV) NULL, (LPSLINK)NULL,
(LPSLINK) lpCurOcc, OCCURENCE_NODE);
}
return (fRet);
}
/*************************************************************************
* @doc INTERNAL
*
* @func LPIOCC PASCAL NEAR | FindMarker |
* Given a starting occurrence node, traverse it and find the first
* marker node
*
* @parm LPIOCC | lpStartOcc |
* Starting node
*
* @rdesc The marker node
*************************************************************************/
PRIVATE LPIOCC PASCAL NEAR FindMarker (LPIOCC lpStartOcc)
{
LPIOCC lpCurOcc;
for (lpCurOcc = lpStartOcc; lpCurOcc; lpCurOcc = lpCurOcc->pNext) {
if (lpCurOcc->fFlag & IS_MARKER_NODE)
break;
}
return lpCurOcc;
}
/*************************************************************************
* @doc INTERNAL
*
* @func HRESULT PASCAL NEAR | InsertMarker |
* This function will insert a marker node at the beginning of
* lpResTopicList->lpOccur
*
* @parm LPQT | lpQueryTree |
* Pointer to query tree where all globals are
*
* @parm LPITOPIC | lpResTopicList |
* Pointer to TopicId node
*
* @rdesc S_OK
*************************************************************************/
PRIVATE HRESULT PASCAL NEAR InsertMarker (LPQT lpQueryTree, LPITOPIC lpResTopicList)
{
LPMARKER lpMark;
/* Do some preparations by allocating marker nodes */
if ((lpResTopicList->fFlag & HAS_MARKER) == FALSE) {
if (!(lpMark = (LPMARKER)OccNodeAllocate(lpQueryTree)))
return E_TOOMANYTOPICS;
lpMark->fFlag |= (IS_MARKER_NODE | TO_BE_KEPT);
/* Link the markers together with the nodes */
lpMark->pNext = lpResTopicList->lpOccur;
lpResTopicList->lpOccur = (LPIOCC)lpMark;
/* Link with the next marker */
lpMark->pNextMark = (LPMARKER)FindMarker (lpMark->pNext);
/* Mark that we already has a marker */
lpResTopicList->fFlag |= HAS_MARKER;
/* Increment lcOccur, since this node will be removed
* like a regular occurrence node */
lpResTopicList->lcOccur ++;
}
return S_OK;
}
/*************************************************************************
* @doc INTERNAL
*
* @func HRESULT PASCAL NEAR | NearListMatch |
* Traverse a list and match all nodes against lpCurOcc
*
* @parm LPIOCC | lpCurOcc |
* Occurrence node to be compared with
*
* @parm LPIOCC | lpStartOcc |
* Start of the occurrence list
*
* @parm WORD | wProxDist |
* Proximity distance
*
* @rdesc
* return TRUE if the the node matches
*************************************************************************/
PRIVATE HRESULT PASCAL NEAR NearListMatch (LPIOCC lpCurOcc,
LPIOCC lpStartOcc, WORD wProxDist)
{
LPIOCC lpResOcc;
BOOL fMatch = FALSE;
for (lpResOcc = lpStartOcc; lpResOcc; lpResOcc = lpResOcc->pNext) {
if (lpResOcc->fFlag & IS_MARKER_NODE)
break;
if (!FRange(lpResOcc->dwCount, lpCurOcc->dwCount,
wProxDist)) {
lpResOcc->fFlag |= TO_BE_KEPT;
fMatch = TRUE;
}
}
if (fMatch)
lpCurOcc->fFlag |= TO_BE_KEPT;
return fMatch;
}
/*************************************************************************
* @doc INTERNAL
*
* @func VOID PASCAL FAR | NearHandlerTopicCleanUp |
* Clean up a TopicList by going thru each sequence of occurrence
* delimited by marker, do the check again and remove all extra
* occurrences
*
* @parm _LPQT | lpQueryTree |
* Pointer to query tree
*
* @parm _LPQTNODE | lpResQtNode |
* Pointer to result query node
*
* @parm LPITOPIC | lpCurTopic |
* Current doc id
*************************************************************************/
PUBLIC HRESULT PASCAL FAR NearHandlerTopicCleanUp (_LPQT lpQueryTree,
_LPQTNODE lpResQtNode, LPITOPIC lpCurTopic)
{
LPMARKER lpMarkStart;
LPMARKER lpCurMark;
LPIOCC lpCurOcc;
LPIOCC lpStartOcc;
BOOL fDone;
/* Find the first marker */
lpMarkStart = (LPMARKER)FindMarker(lpCurTopic->lpOccur);
if (lpMarkStart == NULL) {
/* This branch has been cleaned up of marker nodes */
return E_FAIL;
}
/* The first occurrences from the start to lpMarkStart must be
* TO_BE_KEPT, so we don't have to check them since they just
* freshly came from a near handler. All we have to do
* is set the flag */
for (lpCurOcc = lpCurTopic->lpOccur; lpCurOcc && !(lpCurOcc->fFlag &
IS_MARKER_NODE); lpCurOcc = lpCurOcc->pNext)
lpCurOcc->fFlag |= TO_BE_KEPT;
if (lpMarkStart->pNextMark == NULL) {
/* Simple A NEAR B, just set the flag */
for (lpCurOcc = lpMarkStart->pNext; lpCurOcc && !(lpCurOcc->fFlag &
IS_MARKER_NODE); lpCurOcc = lpCurOcc->pNext)
lpCurOcc->fFlag |= TO_BE_KEPT;
}
else {
/* Complex NEAR terms, such as: A NEAR B NEAR C */
lpStartOcc = lpMarkStart->pNext;
lpCurMark = lpMarkStart->pNextMark;
fDone = FALSE;
for (;lpCurMark; lpCurMark = lpCurMark->pNextMark)
{
for (lpCurOcc = lpCurMark->pNext;
lpCurOcc && !(lpCurOcc->fFlag & IS_MARKER_NODE);
lpCurOcc = lpCurOcc->pNext) {
if (NearListMatch (lpCurOcc, lpStartOcc,
lpQueryTree->wProxDist) == FALSE)
{
fDone = TRUE;
}
}
lpStartOcc = lpCurMark->pNext;
if (fDone)
break;
}
}
/* Clear up all the marker node flag to ensure that they will be
* removed */
while (lpMarkStart) {
lpMarkStart->fFlag &= ~(TO_BE_KEPT | IS_MARKER_NODE);
lpMarkStart = lpMarkStart->pNextMark;
}
return S_OK;
}
PUBLIC VOID PASCAL NEAR NearHandlerCleanUp (_LPQT lpQueryTree,
_LPQTNODE lpResQtNode)
{
LPITOPIC lpCurTopic;
for (lpCurTopic = QTN_TOPICLIST(lpResQtNode); lpCurTopic;
lpCurTopic = lpCurTopic->pNext) {
if (NearHandlerTopicCleanUp (lpQueryTree, lpResQtNode,
lpCurTopic) == S_OK) {
RemoveUnmarkedOccList(lpQueryTree, lpCurTopic,
lpCurTopic->lpOccur, TRUE);
}
}
}
/*************************************************************************
* @doc INTERNAL
*
* @func PUBLIC HRESULT PASCAL NEAR | NearHandler |
* Handle NEAR operation
*
* @parm _LPQTNODE | lpResQtNode |
* The query structure that we add the result to
*
* @parm LPITOPIC | lpResTopicList |
* The TopicList structure that we add the result to
*
* @parm LPV | lpStruct |
* Vanilla pointers to different types of structures we are dealing with.
* The contents of those pointers are determined by the value of
* fOperationType, for EXPRESSION_TERM, this is a LPIOCC, for
* EXPRESSION_EXPRESSION, this is a_LPQTNODE
* @parm int | fOperationType |
* Tell what kinds of nodes we are handling, query-occurence or
* query-query
*
* @rdesc S_OK : if the operation has been carried
* errors : if some errors happened (out-of-memory)
*************************************************************************/
PUBLIC HRESULT PASCAL NEAR NearHandler(_LPQT lpQueryTree, _LPQTNODE lpResQtNode,
LPITOPIC lpResTopicList, LPV lpStruct, int fOperationType)
{
_LPQTNODE lpCurQtNode; /* Current query tree node */
LPITOPIC lpCurTopicList; /* Current TopicId node */
LPITOPIC lpNextTopicList;
LPIOCC lpCurOcc;
LPIOCC lpStartOcc;
LPITOPIC lpPrevRes;
LPSLINK lpTmp; //erinfox
switch (fOperationType) {
case EXPRESSION_TERM:
/* Insert a marker node if necessary */
if (InsertMarker (lpQueryTree, lpResTopicList) != S_OK)
return E_TOOMANYTOPICS;
/* Look for the starting point */
lpStartOcc = FindMarker(lpResTopicList->lpOccur);
RET_ASSERT(lpStartOcc);
/* Handle the near operation */
if (NearHandlerInsert (lpQueryTree, lpResTopicList, lpStartOcc->pNext,
(LPIOCC)lpStruct))
lpResTopicList->fFlag |= TO_BE_KEPT;
break;
case EXPRESSION_EXPRESSION:
lpCurQtNode = (_LPQTNODE)lpStruct;
if (HandleNullNode(lpQueryTree, lpResQtNode,
lpCurQtNode, NEAR_OP))
return S_OK;
/* Now doing the real jobs */
/* Make sure that we are pointing to the right place to search
*/
lpQueryTree->lpTopicStartSearch = lpResQtNode->lpTopicList;
/* First check the coming data from lpCurTopicList.
* If there isn't an equivalent TopicId in QTN_TOPICLIST(lpResQtNode),
* then remove it
*/
for (lpCurTopicList = QTN_TOPICLIST(lpCurQtNode); lpCurTopicList;
lpCurTopicList = lpNextTopicList) {
lpNextTopicList = lpCurTopicList->pNext;
/* Find the location of the TopicID List in the query. */
if ((lpResTopicList = TopicNodeSearch(lpQueryTree,
lpResQtNode, lpCurTopicList->dwTopicId)) == NULL) {
/* Can't find equivalent TopicId in the result list, just
remove lpCurTopicList
*/
TopicNodeFree(lpQueryTree, lpCurQtNode, NULL, lpCurTopicList);
continue;
}
/* An equivalent TopicId is found */
/* Insert a marker node if necessary */
if (InsertMarker (lpQueryTree, lpResTopicList) != S_OK)
return E_TOOMANYTOPICS;
/* Look for the starting point */
RET_ASSERT(lpResTopicList->lpOccur)
lpStartOcc = FindMarker(lpResTopicList->lpOccur);
lpStartOcc = lpStartOcc->pNext; // Skip marker node
for (lpCurOcc = lpCurTopicList->lpOccur; lpCurOcc;
lpCurOcc = lpCurTopicList->lpOccur) {
/* "Unlink" lpCurOcc */
lpCurTopicList->lpOccur = lpCurOcc->pNext;
/* Handle the near operation */
if (NearHandlerInsert (lpQueryTree, lpResTopicList,
lpStartOcc, lpCurOcc)) {
lpResTopicList->fFlag |= TO_BE_KEPT;
}
}
RET_ASSERT(lpCurTopicList->lpOccur == NULL);
RemoveNode(lpQueryTree, (LPV) lpCurQtNode, NULL,
(LPSLINK) lpCurTopicList, TOPICLIST_NODE);
if (lpResTopicList->lpOccur->fFlag & IS_MARKER_NODE) {
/* We didn't find any match, remove this TopicList */
// erinfox - I don't know lpPrevRes for lpResTopicList, so I get it here
for (lpPrevRes = NULL, lpTmp = (LPSLINK)lpResQtNode->lpTopicList; lpTmp;
lpTmp = lpTmp->pNext)
{
if (lpTmp == (LPSLINK)lpResTopicList)
break;
lpPrevRes = (LPITOPIC) lpTmp;
}
TopicNodeFree(lpQueryTree, lpResQtNode, lpPrevRes, lpResTopicList);
}
else {
/* Remove all unmarked occurrences, but don't
* reset the TO_BE_KEPT flag */
RemoveUnmarkedOccList(lpQueryTree, lpResTopicList,
lpResTopicList->lpOccur, FALSE);
}
}
RemoveQuery(lpQueryTree, lpCurQtNode);
if (QTN_TOPICLIST(lpResQtNode) == NULL)
QTN_NODETYPE(lpResQtNode) = NULL_NODE;
return S_OK;
break;
}
return S_OK;
}
/*************************************************************************
* @doc INTERNAL
*
* @func PUBLIC HRESULT PASCAL NEAR | PhraseHandler |
* Handle PHRASE operation
*
* @parm _LPQTNODE | lpResQtNode |
* The query structure that we add the result to
*
* @parm LPITOPIC | lpResTopicList |
* The TopicList structure that we add the result to
*
* @parm LPV | lpStruct |
* Vanilla pointers to different types of structures we are dealing with.
* The contents of those pointers are determined by the value of
* fOperationType, for EXPRESSION_TERM, this is a LPIOCC, for
* EXPRESSION_EXPRESSION, this is a_LPQTNODE
*
* @parm int | fOperationType |
* Tell what kinds of nodes we are handling, query-occurence or
* query-query
*
* @rdesc S_OK : if the operation has been carried
* errors : if some errors happened (out-of-memory)
*************************************************************************/
PUBLIC HRESULT PASCAL NEAR PhraseHandler(_LPQT lpQueryTree, _LPQTNODE lpResQtNode,
LPITOPIC lpResTopicList, LPIOCC lpCurOcc, int fOperationType)
{
LPIOCC lpResOcc;
BOOL fResult = 0;
LPIOCC lpStartOcc = NULL;
RET_ASSERT(fOperationType == EXPRESSION_TERM);
/* Start at the beginning if necessary */
if ((lpResOcc = lpQueryTree->lpOccStartSearch) == NULL)
lpResOcc = lpResTopicList->lpOccur;
for (; lpResOcc; lpResOcc = lpResOcc->pNext) {
if ((lpResOcc->fFlag & TO_BE_SKIPPED) == 0) {
if ((fResult = (int)(lpCurOcc->dwCount - lpResOcc->dwCount))
== 1) {
/* The nodes are consecutive, mark them TO_BE_KEPT */
lpResOcc->fFlag |= TO_BE_KEPT | TO_BE_SKIPPED;
lpResOcc->fFlag &= ~TO_BE_COMPARED;
lpCurOcc->fFlag |= TO_BE_KEPT | TO_BE_SKIPPED |
TO_BE_COMPARED;
lpResTopicList->fFlag |= TO_BE_KEPT;
break;
}
/* Reset lpStartOcc */
lpStartOcc = NULL;
if (fResult <= 0) {
/* CurOcc is less than what is in the result list */
break;
}
}
else {
/* Get a skipped node. Mark the assumed starting node */
if (lpStartOcc == NULL) {
lpStartOcc = lpResOcc;
}
}
}
if (fResult == 1) {
/* Add this node, and mark the starting point for next search */
lpQueryTree->lpOccStartSearch = lpCurOcc->pNext = lpResOcc->pNext;
lpResOcc->pNext = lpCurOcc;
lpResTopicList->lcOccur ++;
/* Mark all previous nodes TO_BE_KEPT */
if (lpStartOcc) {
for (; lpStartOcc != lpCurOcc; lpStartOcc = lpStartOcc->pNext)
lpStartOcc->fFlag |= TO_BE_KEPT;
}
}
else {
RemoveNode(lpQueryTree, (LPV) NULL, (LPSLINK)NULL,
(LPSLINK) lpCurOcc, OCCURENCE_NODE);
}
return S_OK;
}
/*************************************************************************
* @doc INTERNAL
*
* @func VOID PASCAL NEAR | MergeOccurence |
* Merge two occurences lists together
*
* @parm _LPQT | lpQueryTree |
* Pointer to query tree (where global variables are)
*
* @parm LPITOPIC | lpResTopicList |
* Resulting TopicList that has the merged occurence list
*
* @parm LPITOPIC | lpCurTopicList |
* TopicList that has the occurrence list to be merged to the
* resulting list
*************************************************************************/
PUBLIC VOID PASCAL NEAR MergeOccurence(_LPQT lpQueryTree,
LPITOPIC lpResTopicList, LPITOPIC lpCurTopicList)
{
register LPIOCC lpTmpOcc;
register LPIOCC lpNextOcc;
/* Reset lpOccStartSearch */
lpQueryTree->lpOccStartSearch = NULL;
for (lpTmpOcc = lpCurTopicList->lpOccur; lpTmpOcc; lpTmpOcc = lpNextOcc){
lpNextOcc = lpTmpOcc->pNext;
OccNodeInsert(lpQueryTree, lpResTopicList, lpTmpOcc);
}
lpCurTopicList->lpOccur = NULL;
lpCurTopicList->lcOccur = 0;
}
/*************************************************************************
* @doc INTERNAL
*
* @func VOID PASCAL NEAR | RemoveUnmarkedTopicList |
* Remove all the TopicLists that are not marked TO_BE_KEPT
*
* @parm _LPQT | lpQueryTree |
* Pointer to query tree (for globasl variables)
*
* @parm _LPQTNODE | lpQtNode |
* Query tree node to be checked
*
* @parm HRESULT | fKeepOccurence |
* If 0, then check and remove all occurrences nodes that are not
* marked TO_BE_KEPT
*************************************************************************/
PUBLIC VOID PASCAL NEAR RemoveUnmarkedTopicList (_LPQT lpQueryTree,
_LPQTNODE lpQtNode, BOOL fKeepOccurence)
{
register LPITOPIC lpTopicList;
register LPITOPIC lpNextTopicList;
// erinfox: add to keep track of previous node
register LPITOPIC lpPrev;
/* Traverse the doclist */
for (lpPrev = NULL, lpTopicList = QTN_TOPICLIST(lpQtNode); lpTopicList;
lpTopicList = lpNextTopicList) {
lpNextTopicList = lpTopicList->pNext;
if ((lpTopicList->fFlag & TO_BE_KEPT) == 0) {
/* Free the doc node and its occurences list */
TopicNodeFree(lpQueryTree, lpQtNode, lpPrev, lpTopicList);
}
else {
lpTopicList->fFlag &= ~(TO_BE_KEPT | HAS_MARKER); // Reset the flag
if (!fKeepOccurence) {
/* Check the occurences list, and free all nodes that
* are not marked TO_BE_KEPT
*/
RemoveUnmarkedOccList(lpQueryTree, lpTopicList,
lpTopicList->lpOccur, TRUE);
if (lpTopicList->lpOccur == NULL) {
RemoveNode(lpQueryTree, (LPV) lpQtNode, NULL,
(LPSLINK) lpTopicList, TOPICLIST_NODE);
}
}
lpPrev = lpTopicList;
}
}
}
/*************************************************************************
* @doc INTERNAL
*
* @func VOID PASCAL NEAR | RemoveUnmarkedNearTopicList |
* Remove all the TopicLists that are not marked TO_BE_KEPT
*
* @parm _LPQT | lpQueryTree |
* Pointer to query tree (for globasl variables)
*
* @parm _LPQTNODE | lpQtNode |
* Query tree node to be checked
*
* @parm BOOL | fKeepOccurence |
* If 0, then check and remove all occurrences nodes that are not
* marked TO_BE_KEPT
*************************************************************************/
PUBLIC VOID PASCAL NEAR RemoveUnmarkedNearTopicList (_LPQT lpQueryTree,
_LPQTNODE lpQtNode)
{
register LPITOPIC lpTopicList;
register LPITOPIC lpNextTopicList;
LPIOCC lpMark;
// erinfox - add to keep track of previous node
register LPITOPIC lpPrev;
/* Traverse the doclist */
for (lpPrev = NULL, lpTopicList = QTN_TOPICLIST(lpQtNode); lpTopicList;
lpTopicList = lpNextTopicList) {
lpNextTopicList = lpTopicList->pNext;
if ((lpTopicList->fFlag & TO_BE_KEPT) == 0) {
/* Free the doc node and its occurences list */
TopicNodeFree(lpQueryTree, lpQtNode, lpPrev, lpTopicList);
}
else {
// Reset the flag
lpTopicList->fFlag &= ~(TO_BE_KEPT | HAS_MARKER);
/* Find the marker */
lpMark = FindMarker(lpTopicList->lpOccur);
RemoveUnmarkedOccList(lpQueryTree, lpTopicList,
lpTopicList->lpOccur, TRUE);
/* Remove all unmarked occurrences between this marker and
* the next one */
if (lpMark)
RemoveUnmarkedOccList(lpQueryTree, lpTopicList,
lpMark->pNext, TRUE);
lpPrev = lpTopicList;
}
}
}
/*************************************************************************
* @doc INTERNAL
*
* @func PASCAL NEAR | MarkTopicList |
* Mark all TopicId nodes in a doc list TO_BE_KEPT
*
* @parm _LPQTNODE | lpQtNode |
* Pointer to the query tree node that contains the doc list
*************************************************************************/
PUBLIC VOID PASCAL NEAR MarkTopicList (_LPQTNODE lpQtNode)
{
register LPITOPIC lpTopicList;
for (lpTopicList = QTN_TOPICLIST(lpQtNode); lpTopicList;
lpTopicList = lpTopicList->pNext) {
lpTopicList->fFlag |= TO_BE_KEPT;
}
}
/*************************************************************************
* @doc INTERNAL
*
* @func VOID PASCAL NEAR | RemoveUnmarkedOccList |
* Remove all the occurrence nodes that are not marked TO_BE_KEPT
*
* @parm LPQT | lpQueryTree |
* Pointer to query tree (for global variables)
*
* @parm int | fResetFlag |
* Do we reset the TO_BE_KEPT flag ?
*
* @parm LPIOCC | lpTopicList |
* Pointer to Topic list to be checked
*
* @parm LPIOCC | lpOccList |
* Pointer to occurrence list to be checked
*************************************************************************/
PRIVATE VOID PASCAL NEAR RemoveUnmarkedOccList (LPQT lpQueryTree,
LPITOPIC lpTopicList, LPIOCC lpOccList, int fResetFlag)
{
register LPIOCC lpNextOccList;
register LPIOCC lpPrevOccList;
lpPrevOccList = NULL;
for (;lpOccList && !(lpOccList->fFlag & IS_MARKER_NODE);
lpOccList = lpNextOccList) {
lpNextOccList = lpOccList->pNext;
if ((lpOccList->fFlag & TO_BE_KEPT) == 0) {
RemoveNode(lpQueryTree, (LPV) lpTopicList, (LPSLINK)lpPrevOccList,
(LPSLINK) lpOccList, OCCURENCE_NODE);
}
else if (fResetFlag) {
lpOccList->fFlag &= ~TO_BE_KEPT; // Reset the flag
if (lpOccList->fFlag & TO_BE_COMPARED)
lpOccList->fFlag &= ~TO_BE_SKIPPED;
lpPrevOccList = lpOccList;
}
}
/* Reset the flag of the marker node */
if (lpOccList)
lpOccList->fFlag &= ~TO_BE_KEPT;
}
/*************************************************************************
* @doc INTERNAL
*
* @func VOID PASCAL NEAR | CleanMarkedOccList |
* Clean all the occurrence nodes from TO_BE_KEPT
*
* @parm LPIOCC | lpTopicList |
* Pointer to Topic list to be checked
*************************************************************************/
VOID PASCAL FAR CleanMarkedOccList (LPITOPIC lpTopicList)
{
register LPIOCC lpCurOcc;
for (;lpTopicList; lpTopicList = lpTopicList->pNext)
{
for (lpCurOcc = lpTopicList->lpOccur; lpCurOcc;
lpCurOcc = lpCurOcc->pNext)
{
lpCurOcc->fFlag &= ~TO_BE_KEPT;
}
}
}
/*************************************************************************
* @doc INTERNAL
*
* @func HRESULT PASCAL NEAR | HandleNullNode |
* Handle NULL query node. This is an optimization which will
* save processing time.
*
* @parm LPQT | lpQueryTree |
* Pointer to query tree (for global variables)
*
* @parm _LPQTNODE | lpResQtNode |
* Pointer to result query tree node
*
* @parm _LPQTNODE | lpCurQtNode |
* Pointer to query tree node
*
* @parm int | Operator |
* What operator are we dealing with
*
* @rdesc FALSE, if no optimization can be done, TRUE otherwise
*************************************************************************/
PRIVATE HRESULT PASCAL NEAR HandleNullNode(LPQT lpQueryTree,
_LPQTNODE lpResQtNode, _LPQTNODE lpCurQtNode, int Operator)
{
_LPQTNODE lpChild;
if (QTN_NODETYPE(lpResQtNode) != NULL_NODE &&
QTN_NODETYPE(lpCurQtNode) != NULL_NODE)
return FALSE;
if (Operator == NOT_OP) {
if (QTN_NODETYPE(lpResQtNode) == NULL_NODE) {
/* NULL ! a = NULL */
RemoveQuery(lpQueryTree, lpCurQtNode);
QTN_NODETYPE(lpCurQtNode) = NULL_NODE;
return TRUE;
}
else if (QTN_NODETYPE(lpCurQtNode) == NULL_NODE) {
/* a ! NULL = a */
return TRUE;
}
return FALSE;
}
lpChild = QTN_NODETYPE(lpResQtNode) == NULL_NODE ?
lpCurQtNode : lpResQtNode;
switch (Operator) {
case AND_OP: // a & NULL = NULL
case NEAR_OP: // a # NULL = NULL
case PHRASE_OP: // a + NULL = NULL ??
RemoveQuery(lpQueryTree, lpChild);
QTN_NODETYPE(lpChild) = NULL_NODE;
return TRUE;
case OR_OP: // a | NULL = a
if (QTN_NODETYPE(lpResQtNode) == NULL_NODE) {
*lpResQtNode = *lpChild;
QTN_NODETYPE(lpChild) = NULL_NODE;
QTN_LEFT(lpChild) = QTN_RIGHT(lpChild) = NULL;
QTN_TOPICLIST(lpChild) = NULL;
}
return TRUE;
}
return FALSE;
}
PRIVATE int PASCAL NEAR FRange(DWORD dwCount1, DWORD dwCount2, WORD cProxDist)
{
long fResult;
int fRet = 1;
fResult = dwCount1 - dwCount2;
if (fResult < 0) {
fRet = -1;
fResult = -fResult;
}
if (fResult != 0 && fResult <= (long)cProxDist)
return 0;
else
return fRet;
}
/*************************************************************************
* @doc INTERNAL
*
* @func VOID PASCAL NEAR | RemoveQuery |
* Remove all doc nodes for a query node
*
* @parm LPQT | lpQueryTree |
* Pointer to query tree (for global variables)
*
* @parm _LPQTNODE | lpCurQtNode |
* Pointer to query tree node to be cleared
*************************************************************************/
PRIVATE VOID PASCAL NEAR RemoveQuery(LPQT lpQueryTree, _LPQTNODE lpCurQtNode)
{
register LPITOPIC lpCurTopicList;
register LPITOPIC lpNextTopicList;
/* Remove all occurences of all doclist */
if ((lpCurTopicList = QTN_TOPICLIST(lpCurQtNode)) == NULL)
return;
for (; lpCurTopicList; lpCurTopicList = lpNextTopicList)
{
lpNextTopicList = lpCurTopicList->pNext;
TopicNodeFree(lpQueryTree, lpCurQtNode, NULL, lpCurTopicList);
}
QTN_TOPICLIST(lpCurQtNode) = NULL;
}
/*************************************************************************
* @doc INTERNAL
*
* @func VOID PASCAL NEAR | SortResult |
* Sort the results according to flag
*
* @parm _LPQT | lpQueryTree |
* Pointer to query tree (containing globals)
*
* @parm _LPQTNODE | lpQtNode |
* Pointer to query node
*
* @parm WORD | fFlag |
* Tell how to sort the result:
* @flag ORDERED_BASED |
* Everything is ordered TopicId, hit offsets
* @flag HIT_COUNT_BASED |
* The doc id with most hit will be returned first
* @flag WEIGHT_BASED |
* The topicId with most weight will be returned first
*************************************************************************/
PUBLIC VOID PASCAL NEAR SortResult (_LPQT lpQueryTree, _LPQTNODE lpQtNode,
WORD fFlag)
{
register LPITOPIC lpTopic;
switch (fFlag) {
case ORDERED_BASED:
for (lpTopic = lpQtNode->lpTopicList; lpTopic; lpTopic = lpTopic->pNext)
OccurenceSort (lpQueryTree, lpTopic);
break;
case HIT_COUNT_BASED:
case WEIGHT_BASED:
TopicListSort (lpQtNode, fFlag);
break;
}
#if defined(_DEBUG) && defined(SIMILARITY) && defined(_DUMPALL)
{
int i;
_DPF1("Sort total: %lu\n", lpQtNode->cTopic);
for (i = 0, lpTopic = lpQtNode->lpTopicList; lpTopic && i < 10; lpTopic = lpTopic->pNext, i++)
{
_DPF2("Topic %lu (%u)\n", lpTopic->dwTopicId, lpTopic->wWeight);
}
}
#endif
}
PRIVATE HRESULT PASCAL NEAR TopicListInsertionSort (_LPQTNODE lpQtNode, BOOL fFlag)
{
LPITOPIC lpPrevTopic;
LPITOPIC lpCurTopic;
LPITOPIC lpNextTopic;
LPITOPIC lpTmpTopic;
FCMP fCompare;
if (fFlag == HIT_COUNT_BASED)
fCompare = HitCountCompare;
else
fCompare = TopicWeightCompare;
for (lpCurTopic = lpQtNode->lpTopicList; lpCurTopic; lpCurTopic = lpNextTopic) {
if (lpNextTopic = lpCurTopic->pNext) {
if ((*fCompare) (lpCurTopic, lpNextTopic) < 0) {
/* Out of order sequence */
/* Unlink the out of order node */
lpCurTopic->pNext = lpNextTopic->pNext;
/* Do an insertion sort */
for (lpPrevTopic = NULL, lpTmpTopic = lpQtNode->lpTopicList;;
lpTmpTopic = lpTmpTopic->pNext) {
if ((*fCompare) (lpTmpTopic, lpNextTopic) < 0) {
/* We just pass the insertion point */
if (lpPrevTopic == NULL) {
lpNextTopic->pNext = lpQtNode->lpTopicList;
lpQtNode->lpTopicList = lpNextTopic;
}
else {
lpNextTopic->pNext = lpPrevTopic->pNext;
lpPrevTopic->pNext = lpNextTopic;
}
break;
}
lpPrevTopic = lpTmpTopic;
}
/* Reset lpNextTopic */
lpNextTopic = lpCurTopic;
}
}
}
return S_OK;
}
/*************************************************************************
* @doc INTERNAL
*
* @func VOID PASCAL NEAR | TopicListSort |
* Sort the results according to flag
*
* @parm _LPQT | lpQueryTree |
* Pointer to query tree (containing globals)
*
* @parm _LPQTNODE | lpQtNode |
* Pointer to query node
*
* @parm WORD | fFlag |
* Tell how to sort the result:
* @flag HIT_COUNT_BASED |
* The doc id with most hit will be returned first
* @flag WEIGHT_BASED |
* The topicId with most weight will be returned first
*************************************************************************/
HRESULT PASCAL NEAR TopicListSort (_LPQTNODE lpQtNode, BOOL fFlag)
{
HANDLE hHeap; /* Handle to heap block */
LPITOPIC far *lrgHeap; /* Pointer to heap block */
TOPIC_LIST Dummy; /* Dummy node to speed up search, compare */
LPITOPIC lpCurTopic; /* Current Topic node */
LPITOPIC lpNextTopic; /* Next Topic node */
LPITOPIC lpInsertPt; /* Current insertion point */
WORD cLastItem;
WORD MaxItem;
LPITOPIC far * lpPQNode;
LPITOPIC lpTopNode;
LPITOPIC lpNextNode;
WORD wCurWeight;
FCMP fCompare;
/* Allocate the heap */
if (lpQtNode->cTopic > MAX_HEAP_ENTRIES)
MaxItem = MAX_HEAP_ENTRIES;
else
MaxItem = (WORD)lpQtNode->cTopic + 1;
/* If the list is short, we can use insertion sort since it is faster
* then preparing and use heap sort
*/
if (MaxItem <= 20)
return TopicListInsertionSort (lpQtNode, fFlag);
if ((hHeap = _GLOBALALLOC(DLLGMEM, MaxItem * sizeof(LPV))) == NULL) {
/* We run out of memory for the heap. Try a smaller size */
if ((hHeap = _GLOBALALLOC(DLLGMEM,
(MaxItem = MIN_HEAP_ENTRIES)* sizeof(LPV))) == NULL) {
/* We really run out of memory, so just do a regular
* insertion sort. It is slow but at least something
* works
*/
return TopicListInsertionSort (lpQtNode, fFlag);
}
}
MaxItem --; /* Since node 0 is used for sentinel */
lrgHeap = (LPITOPIC far *)_GLOBALLOCK (hHeap);
/* Initialize of Dummy */
Dummy.wWeight = 0xffff; // Maximum weigth for sentinel
Dummy.pNext = NULL;
/* Set the sentinel */
lrgHeap[0] = &Dummy;
/* Initialize the variables */
lpInsertPt = &Dummy;
lpCurTopic = lpQtNode->lpTopicList;
if (fFlag == HIT_COUNT_BASED)
fCompare = HitCountCompare;
else
fCompare = TopicWeightCompare;
while (lpCurTopic) {
lpPQNode = &lrgHeap[1];
for (cLastItem = 1; lpCurTopic && cLastItem <= MaxItem;
cLastItem++, lpPQNode++) {
lpNextTopic = lpCurTopic->pNext;
*lpPQNode = lpCurTopic;
lpCurTopic->pNext = NULL;
lpCurTopic = lpNextTopic;
HeapUp (lrgHeap, cLastItem, fCompare);
}
cLastItem--;
/* Set up the last pointer */
for (; cLastItem > 0;) {
lpTopNode = lrgHeap[1];
/* Get the new node's weight */
wCurWeight = lpTopNode->wWeight;
/* Insert into the resulting list in decreasing order */
if (wCurWeight > lpInsertPt->wWeight) {
/* Start from the beginning of the list */
lpInsertPt = &Dummy;
}
while (lpNextNode = lpInsertPt->pNext) {
if (lpNextNode->wWeight < wCurWeight)
break;
lpInsertPt = lpNextNode;
}
lpTopNode->pNext = lpInsertPt->pNext;
lpInsertPt->pNext = lpTopNode;
lpInsertPt = lpTopNode;
lrgHeap[1] = lrgHeap[cLastItem--];
HeapDown (lrgHeap, cLastItem, fCompare);
}
}
/* Update the pointer to the sorted list */
lpQtNode->lpTopicList = Dummy.pNext;
/* Release the memory */
_GLOBALUNLOCK(hHeap);
_GLOBALFREE(hHeap);
return S_OK;
}
/*************************************************************************
* @doc INTERNAL
*
* @func VOID PASCAL NEAR | OccurenceSort |
* Sort all the occurrences depending on their offsets. If two
* occurrences have the same offset, ie. they must be identical
* then one will be removed. Simple insertion sort is used since
* it it expected that most of the time we will have less than
* 15 occurences per TopicId
*
* @func _LPQT | lpQueryTree |
* Pointer to query tree structure where all globals are
*
* @func LPITOPIC | lpTopic |
* Pointer to doclist with the occurrence list to be sorted
*************************************************************************/
PRIVATE VOID PASCAL NEAR OccurenceSort (_LPQT lpQueryTree, LPITOPIC lpTopic)
{
LPIOCC lpPrevOcc;
LPIOCC lpCurOcc;
LPIOCC lpNextOcc;
LPIOCC lpTmpOcc;
int fResult;
for (lpCurOcc = lpTopic->lpOccur; lpCurOcc; lpCurOcc = lpNextOcc) {
if (lpNextOcc = lpCurOcc->pNext) {
if ((fResult = OccCompare(lpCurOcc, lpNextOcc)) <= 0) {
/* Out of order sequence */
/* Unlink the out of order node */
lpCurOcc->pNext = lpNextOcc->pNext;
if (fResult == 0) {
/* Duplicate data, just free the node */
lpNextOcc->pNext = (LPIOCC)lpQueryTree->lpOccFreeList;
lpQueryTree->lpOccFreeList = (LPSLINK)lpNextOcc;
lpTopic->lcOccur--;
/* Reset lpNextOcc */
lpNextOcc = lpCurOcc;
continue;
}
/* Do an insertion sort */
for (lpPrevOcc = NULL, lpTmpOcc = lpTopic->lpOccur;;
lpTmpOcc = lpTmpOcc->pNext) {
if (lpTmpOcc != NULL &&
(fResult = OccCompare(lpNextOcc, lpTmpOcc)) == 0) {
/* Duplicate data, just free the node */
lpNextOcc->pNext = (LPIOCC)lpQueryTree->lpOccFreeList;
lpQueryTree->lpOccFreeList = (LPSLINK)lpNextOcc;
lpTopic->lcOccur--;
break;
}
if (lpTmpOcc == NULL || fResult > 0) {
/* We just pass the insertion point */
if (lpPrevOcc == NULL) {
lpNextOcc->pNext = lpTopic->lpOccur;
lpTopic->lpOccur = lpNextOcc;
}
else {
lpNextOcc->pNext = lpPrevOcc->pNext;
lpPrevOcc->pNext = lpNextOcc;
}
break;
}
lpPrevOcc = lpTmpOcc;
}
/* Reset lpNextOcc */
lpNextOcc = lpCurOcc;
}
}
}
}
PRIVATE int PASCAL NEAR TopicWeightCompare (LPITOPIC lpTopic1, LPITOPIC lpTopic2)
{
if (lpTopic1->wWeight > lpTopic2->wWeight)
{
return 1;
}
else if (lpTopic1->wWeight < lpTopic2->wWeight)
{
return -1;
}
else // must be equal
{
if (lpTopic1->lcOccur >= lpTopic2->lcOccur)
return 1;
return -1;
}
}
PRIVATE int PASCAL NEAR HitCountCompare (LPITOPIC lpTopic1, LPITOPIC lpTopic2)
{
if (lpTopic1->lcOccur >= lpTopic2->lcOccur)
return 1;
return -1;
}
PRIVATE VOID PASCAL NEAR HeapUp (LPITOPIC far * lrgHeap, WORD ChildIndex,
FCMP fCompare)
{
WORD ParentIndex;
LPITOPIC far * lplpvParent;
LPITOPIC far * lplpvChild;
LPITOPIC lpSaved;
LPITOPIC lpvParent;
lplpvChild = &lrgHeap [ChildIndex];
ParentIndex = ChildIndex/2;
lpSaved = *lplpvChild;
while (ParentIndex) {
lplpvParent = &lrgHeap[ParentIndex];
lpvParent = *lplpvParent;
if ((*fCompare)((LPV)lpvParent, (LPV)lpSaved) > 0)
break;
*lplpvChild = lpvParent;
lplpvChild = lplpvParent;
ParentIndex /= 2;
};
*lplpvChild = lpSaved;
}
PRIVATE VOID PASCAL NEAR HeapDown (LPITOPIC far * lrgHeap, int MaxChildIndex,
FCMP fCompare)
{
int ChildIndex;
LPITOPIC far * lplpvParent;
LPITOPIC far * lplpvChild;
LPITOPIC lpTopicChild;
LPITOPIC lpTopicChild2;
LPITOPIC lpSaved;
lpSaved = *(lplpvParent = &lrgHeap[1]);
ChildIndex = 2;
for (; ChildIndex <= MaxChildIndex; ) {
lplpvChild = &lrgHeap[ChildIndex];
lpTopicChild = *lplpvChild;
/* Find the minimum of the two children */
if (ChildIndex < MaxChildIndex &&
(lpTopicChild2 = *(lplpvChild + 1))) {
if ((*fCompare)((LPV)lpTopicChild, (LPV)lpTopicChild2) < 0) {
lplpvChild++;
ChildIndex ++;
}
}
if ((*fCompare)((LPV)lpSaved, (LPV)*lplpvChild) > 0)
break;
/* Replace the node */
*lplpvParent = *lplpvChild;
lplpvParent = lplpvChild;
ChildIndex *= 2;
}
*lplpvParent = lpSaved;
}