433 lines
11 KiB
C++
433 lines
11 KiB
C++
|
//+---------------------------------------------------------------------------
|
||
|
//
|
||
|
// Microsoft Windows
|
||
|
// Copyright (C) Microsoft Corporation, 1991 - 1992.
|
||
|
//
|
||
|
// File: RWEX.CXX
|
||
|
//
|
||
|
// Contents: Relevant word extraction
|
||
|
//
|
||
|
// Classes: CRelevantWord, CRWStore, CRWHeap
|
||
|
//
|
||
|
// History: 25-Apr-94 dlee Created
|
||
|
//
|
||
|
//----------------------------------------------------------------------------
|
||
|
|
||
|
#include <pch.cxx>
|
||
|
#pragma hdrstop
|
||
|
|
||
|
#include <rwex.hxx>
|
||
|
|
||
|
//+---------------------------------------------------------------------------
|
||
|
//
|
||
|
// Member: CRWStore::CRWStore, public
|
||
|
//
|
||
|
// Synopsis: Constructor for relevant word store
|
||
|
//
|
||
|
// Arguments: [pwList] -- array of work ids to operate over, must be in
|
||
|
// increasing order
|
||
|
// [cWids] -- # of items in pwList
|
||
|
// [cIds] -- # of relevant word key ids per wid to store
|
||
|
//
|
||
|
// History: 25-Apr-94 dlee Created.
|
||
|
//
|
||
|
//----------------------------------------------------------------------------
|
||
|
|
||
|
CRWStore::CRWStore(WORKID *pwList,ULONG cWids,ULONG cIds)
|
||
|
: _cWids(cWids), _cIds(cIds), _ulSearchLeftOff(0)
|
||
|
{
|
||
|
_cbRow = _RowSize(cIds);
|
||
|
|
||
|
for (ULONG x = 0; x < _cWids; x++)
|
||
|
{
|
||
|
SRWHeader *p = GetRow(x);
|
||
|
p->wid = pwList[x];
|
||
|
p->cItems = 0;
|
||
|
}
|
||
|
|
||
|
} //CRWStore
|
||
|
|
||
|
//+---------------------------------------------------------------------------
|
||
|
//
|
||
|
// Member: CRWStore::new, public
|
||
|
//
|
||
|
// Synopsis: Private new operator
|
||
|
//
|
||
|
// Arguments: [st] -- predefined param
|
||
|
// [cWids] -- # of rows for the array
|
||
|
// [cIds] -- # of relevant word key ids per wid to store
|
||
|
//
|
||
|
// History: 25-Apr-94 dlee Created.
|
||
|
//
|
||
|
//----------------------------------------------------------------------------
|
||
|
|
||
|
void * CRWStore::operator new(size_t st,ULONG cWids,ULONG cIds)
|
||
|
{
|
||
|
return (void *) ::new char[_ObjectSize(cWids,cIds)];
|
||
|
} //new
|
||
|
|
||
|
#if _MSC_VER >= 1200
|
||
|
void CRWStore::operator delete(void * p, ULONG cWids, ULONG cIds)
|
||
|
{
|
||
|
::delete (p);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
//+---------------------------------------------------------------------------
|
||
|
//
|
||
|
// Member: CRWStore::Insert, public
|
||
|
//
|
||
|
// Synopsis: Inserts a keyid in a wid's heap if the rank is sufficient.
|
||
|
//
|
||
|
// Arguments: [wid] -- wid on whose heap is operated
|
||
|
// [keyid] -- keyid to add
|
||
|
// [lRank] -- rank of the keyid in the wid
|
||
|
//
|
||
|
// History: 17-Jun-94 dlee Created.
|
||
|
//
|
||
|
//----------------------------------------------------------------------------
|
||
|
|
||
|
void CRWStore::Insert(WORKID wid,KEYID keyid, LONG lRank)
|
||
|
{
|
||
|
SRWHeader *ph = _Find(wid,GetRow(_ulSearchLeftOff),
|
||
|
_cWids - _ulSearchLeftOff);
|
||
|
|
||
|
Win4Assert(ph != 0);
|
||
|
|
||
|
//
|
||
|
// This heap object is merely an accessor to a heap whose storage
|
||
|
// has already been allocated. The heap object operates on the heap
|
||
|
// but does not own the heap's memory.
|
||
|
//
|
||
|
CRWHeap heap(ph,_cIds);
|
||
|
heap.Insert(keyid,lRank);
|
||
|
|
||
|
//
|
||
|
// Start the next binary search one after where the last search
|
||
|
// left off, since we know the wids will come through in sorted order.
|
||
|
//
|
||
|
_ulSearchLeftOff = _HeaderToRow(ph) + 1;
|
||
|
} //Insert
|
||
|
|
||
|
//+---------------------------------------------------------------------------
|
||
|
//
|
||
|
// Member: CRWStore::_Find, private
|
||
|
//
|
||
|
// Synopsis: Finds a wid's heap in the array of heaps given a wid
|
||
|
//
|
||
|
// Arguments: [wid] -- wid of heap to find
|
||
|
// [base] -- pointer to first element in heap array
|
||
|
// [cRows] -- # of rows in the array
|
||
|
//
|
||
|
// Returns: pointer to the head of the heap for the wid or 0 if not found.
|
||
|
//
|
||
|
// History: 25-Apr-94 dlee Created.
|
||
|
//
|
||
|
//----------------------------------------------------------------------------
|
||
|
|
||
|
SRWHeader * CRWStore::_Find(WORKID wid,SRWHeader *pBase,ULONG cRows)
|
||
|
{
|
||
|
Win4Assert(cRows != 0);
|
||
|
|
||
|
SRWHeader *lo = pBase;
|
||
|
SRWHeader *hi = lo->Forward(cRows - 1,_cbRow);
|
||
|
SRWHeader *mid;
|
||
|
ULONG cHalf;
|
||
|
|
||
|
while (lo <= hi)
|
||
|
{
|
||
|
if (cHalf = cRows / 2)
|
||
|
{
|
||
|
mid = lo->Forward((cRows & 1) ? cHalf : (cHalf - 1),_cbRow);
|
||
|
if (wid == mid->wid)
|
||
|
{
|
||
|
return mid;
|
||
|
}
|
||
|
else if (wid < mid->wid)
|
||
|
{
|
||
|
hi = mid->Backward(1,_cbRow);
|
||
|
cRows = (cRows & 1) ? cHalf : (cHalf - 1);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
lo = mid->Forward(1,_cbRow);
|
||
|
cRows = cHalf;
|
||
|
}
|
||
|
}
|
||
|
else if (cRows != 0)
|
||
|
{
|
||
|
if (wid == lo->wid)
|
||
|
return lo;
|
||
|
else
|
||
|
return 0;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
} //_Find
|
||
|
|
||
|
//+---------------------------------------------------------------------------
|
||
|
//
|
||
|
// Member: CRWHeap::DeQueue, public
|
||
|
//
|
||
|
// Synopsis: Removes the lowest-ranking keyid in the heap for a wid
|
||
|
//
|
||
|
// Returns: keyid of the lowest-ranking member of the heap
|
||
|
//
|
||
|
// History: 25-Apr-94 dlee Created.
|
||
|
//
|
||
|
//----------------------------------------------------------------------------
|
||
|
|
||
|
KEYID CRWHeap::DeQueue()
|
||
|
{
|
||
|
ULONG ulL,ulR,ulMax;
|
||
|
KEYID kRet = _ph->aItems[0].kKeyId;
|
||
|
|
||
|
_ph->cItems--;
|
||
|
|
||
|
//
|
||
|
// Take out the bottom-right most leaf and bubble it down from
|
||
|
// the top of the tree until it is less than its parent.
|
||
|
//
|
||
|
SRWItem iFix = _ph->aItems[_ph->cItems];
|
||
|
ULONG ulPos = 0;
|
||
|
|
||
|
while (!_IsLeaf(ulPos))
|
||
|
{
|
||
|
ulL = _Left(ulPos);
|
||
|
ulR = _Right(ulPos);
|
||
|
|
||
|
if (!_IsValid(ulR))
|
||
|
ulMax = ulL;
|
||
|
else
|
||
|
{
|
||
|
if (_ph->aItems[ulL].lRank < _ph->aItems[ulR].lRank)
|
||
|
ulMax = ulL;
|
||
|
else
|
||
|
ulMax = ulR;
|
||
|
}
|
||
|
|
||
|
if (_ph->aItems[ulMax].lRank < iFix.lRank)
|
||
|
{
|
||
|
_ph->aItems[ulPos] = _ph->aItems[ulMax];
|
||
|
ulPos = ulMax;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
_ph->aItems[ulPos] = iFix;
|
||
|
|
||
|
return kRet;
|
||
|
} //DeQueue
|
||
|
|
||
|
//+---------------------------------------------------------------------------
|
||
|
//
|
||
|
// Member: CRWStore::Insert, public
|
||
|
//
|
||
|
// Synopsis: Inserts an keyid in the rw heap for a wid if the keyid's rank
|
||
|
// is greater than the lowest ranking keyid in the heap or if
|
||
|
// the heap is not yet full.
|
||
|
//
|
||
|
// Arguments: [keyid] -- item to insert
|
||
|
// [lRank] -- rank of the keyid
|
||
|
//
|
||
|
// History: 25-Apr-94 dlee Created.
|
||
|
//
|
||
|
//----------------------------------------------------------------------------
|
||
|
|
||
|
void CRWHeap::Insert(KEYID keyid, LONG lRank)
|
||
|
{
|
||
|
if ((_ph->cItems < _ulMaxIds) ||
|
||
|
(_ph->aItems[0].lRank < lRank))
|
||
|
{
|
||
|
//
|
||
|
// Pop off the top element if the list is full
|
||
|
//
|
||
|
if (_ph->cItems == _ulMaxIds)
|
||
|
DeQueue();
|
||
|
|
||
|
//
|
||
|
// Insert element as the rightmost bottom level leaf in the heap
|
||
|
//
|
||
|
ULONG ulPos = _ph->cItems++;
|
||
|
_ph->aItems[ulPos].kKeyId = keyid;
|
||
|
_ph->aItems[ulPos].lRank = lRank;
|
||
|
|
||
|
//
|
||
|
// bubble the element up until it fits correctly in the tree
|
||
|
//
|
||
|
while (ulPos)
|
||
|
{
|
||
|
ULONG ulParent = _Parent(ulPos);
|
||
|
if (_ph->aItems[ulPos].lRank < _ph->aItems[ulParent].lRank)
|
||
|
{
|
||
|
//
|
||
|
// swap the elements
|
||
|
//
|
||
|
SRWItem t = _ph->aItems[ulPos];
|
||
|
_ph->aItems[ulPos] = _ph->aItems[ulParent];
|
||
|
_ph->aItems[ulParent] = t;
|
||
|
ulPos = ulParent;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
} //Insert
|
||
|
|
||
|
//+---------------------------------------------------------------------------
|
||
|
//
|
||
|
// Member: CRelevantWord::CRelevantWord, public
|
||
|
//
|
||
|
// Synopsis: Constructor for the relevant word object
|
||
|
//
|
||
|
// Arguments: [pwid] -- array of wids in sorted order to track
|
||
|
// [cWidsUsed] -- # of wids in the array
|
||
|
// [cRW] -- # of relevant words per wid to track
|
||
|
//
|
||
|
// History: 25-Apr-94 dlee Created.
|
||
|
//
|
||
|
//----------------------------------------------------------------------------
|
||
|
|
||
|
CRelevantWord::CRelevantWord(WORKID *pwid,ULONG cWidsUsed,ULONG cRW)
|
||
|
: _pWidItem(0), _pstore(0), _cWidsAdded(0)
|
||
|
{
|
||
|
TRY
|
||
|
{
|
||
|
_pstore = new(cWidsUsed,cRW) CRWStore(pwid,cWidsUsed,cRW);
|
||
|
|
||
|
_pWidItem = new SRWWidItem[cWidsUsed];
|
||
|
}
|
||
|
CATCH ( CException, e )
|
||
|
{
|
||
|
delete _pWidItem;
|
||
|
delete _pstore;
|
||
|
|
||
|
RETHROW();
|
||
|
}
|
||
|
END_CATCH
|
||
|
} //CRelevantWord
|
||
|
|
||
|
//+---------------------------------------------------------------------------
|
||
|
//
|
||
|
// Member: CRelevantWord::~CRelevantWord
|
||
|
//
|
||
|
// Synopsis: Destructor for the relevant word object
|
||
|
//
|
||
|
// History: 25-Apr-94 dlee Created.
|
||
|
//
|
||
|
//----------------------------------------------------------------------------
|
||
|
|
||
|
CRelevantWord::~CRelevantWord()
|
||
|
{
|
||
|
delete _pWidItem;
|
||
|
delete _pstore; // may be 0
|
||
|
} //~CRelevantWord
|
||
|
|
||
|
//+---------------------------------------------------------------------------
|
||
|
//
|
||
|
// Member: CRelevantWord::DoneWithKey, public
|
||
|
//
|
||
|
// Synopsis: Computes rank for each wid occurance of a keyid and adjusts
|
||
|
// the heaps appropriately
|
||
|
//
|
||
|
// Arguments: [keyid] -- keyid on which to calculate
|
||
|
// [maxWid] -- # docs on disk
|
||
|
// [cWids] -- # docs with key on disk
|
||
|
//
|
||
|
// History: 25-Apr-94 dlee Created.
|
||
|
//
|
||
|
//----------------------------------------------------------------------------
|
||
|
|
||
|
void CRelevantWord::DoneWithKey(KEYID keyid,ULONG maxWid,ULONG cWids)
|
||
|
{
|
||
|
if (0 != cWids)
|
||
|
{
|
||
|
_SetWidInfo(maxWid,cWids);
|
||
|
|
||
|
for (ULONG x = 0; x < _cWidsAdded; x++)
|
||
|
{
|
||
|
Win4Assert(_pWidItem[x].wid != 0);
|
||
|
_pstore->Insert(_pWidItem[x].wid,keyid,_Rank(_pWidItem[x].cOcc));
|
||
|
}
|
||
|
|
||
|
_pstore->DoneWithKey();
|
||
|
|
||
|
_cWidsAdded = 0;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
Win4Assert(0 == _cWidsAdded);
|
||
|
}
|
||
|
} //DoneWithKey
|
||
|
|
||
|
//+-------------------------------------------------------------------------
|
||
|
//
|
||
|
// Function: _SortULongArray, private
|
||
|
//
|
||
|
// Synopsis: Sorts an array of unsigned longs from low to high
|
||
|
//
|
||
|
// Arguments: [pulItems] -- list of IDs to be sorted
|
||
|
// [cItems] -- # of root words to be sorted
|
||
|
//
|
||
|
// Returns: void
|
||
|
//
|
||
|
// Algorithm: Heapsort, not quick sort. Give up 20% speed to save kernel
|
||
|
// stack and prevent against n*n performance on sorted lists.
|
||
|
//
|
||
|
// History: 14-Mar-94 Dlee Created
|
||
|
//
|
||
|
//--------------------------------------------------------------------------
|
||
|
|
||
|
#define _CompUL(x,y) ((*(x)) > (*(y)) ? 1 : (*(x)) == (*(y)) ? 0 : -1)
|
||
|
#define _SwapUL(x,y) { ULONG _t = *(x); *(x) = *(y); *(y) = _t; }
|
||
|
|
||
|
inline static void _AddRootUL(ULONG x,ULONG n,ULONG *p)
|
||
|
{
|
||
|
ULONG _x = x;
|
||
|
ULONG _j = (2 * (_x + 1)) - 1;
|
||
|
|
||
|
while (_j < n)
|
||
|
{
|
||
|
if (((_j + 1) < n) &&
|
||
|
(_CompUL(p + _j,p + _j + 1) < 0))
|
||
|
_j++;
|
||
|
if (_CompUL(p + _x,p + _j) < 0)
|
||
|
{
|
||
|
_SwapUL(p + _x,p + _j);
|
||
|
_x = _j;
|
||
|
_j = (2 * (_j + 1)) - 1;
|
||
|
}
|
||
|
else break;
|
||
|
}
|
||
|
} //_AddRootUL
|
||
|
|
||
|
void _SortULongArray(ULONG *pulItems,ULONG cItems)
|
||
|
{
|
||
|
if (cItems == 0)
|
||
|
return;
|
||
|
|
||
|
long z;
|
||
|
|
||
|
for (z = (((long) cItems + 1) / 2) - 1; z >= 0; z--)
|
||
|
{
|
||
|
_AddRootUL(z,cItems,pulItems);
|
||
|
}
|
||
|
|
||
|
for (z = cItems - 1; z != 0; z--)
|
||
|
{
|
||
|
_SwapUL(pulItems,pulItems + z);
|
||
|
_AddRootUL(0,(ULONG) z,pulItems);
|
||
|
}
|
||
|
} //_SortULongArray
|
||
|
|