windows-nt/Source/XPSP1/NT/enduser/stuff/itircl/fts/search/update.c
2020-09-26 16:20:57 +08:00

2632 lines
88 KiB
C

/*************************************************************************
* *
* UPDATE.C *
* *
* Copyright (C) Microsoft Corporation 1990-1994 *
* All Rights reserved. *
* *
**************************************************************************
* *
* Module Intent *
* *
**************************************************************************
* *
* Current Owner: BinhN *
* *
**************************************************************************/
#include <mvopsys.h>
#include <math.h>
#include <mem.h>
#include <orkin.h>
#include <mvsearch.h>
#include "common.h"
#include "index.h"
#ifdef _DEBUG
static BYTE NEAR s_aszModule[] = __FILE__; /* Used by error return functions.*/
#endif
#define SAFE_SLACK 48 // Extra safety bytes
#define ESOUTPUT_BUFFER 0xFFFC // Size of output file buffer
// This must be at the size of the largest word + 12
// or word + 14 if OCCF_LENGTH is set
#define ESINPUT_BUFFER 0x7FFC // Size of input file buffers.
// Each ESB block get its own input buffer
// Min Size: Size of index word + ~8 bytes
#define NEW_NODE_ON_LEFT 0
#define NEW_NODE_ON_RIGHT 1
extern FENCODE EncodeTable[];
extern FDECODE DecodeTable[];
#define FAddDword(p,dw,key) EncodeTable[(key).cschScheme]((p), (dw), (key).ucCenter)
#define FGetDword(a,b,c) (*DecodeTable[b.cschScheme])(a, b, c)
typedef struct WORDINFO
{
DWORD dwWordLen;
DWORD dwFieldId;
DWORD dwNewTopicCount;
DWORD dwIndexTopicCount;
DWORD dwMergeTopicCount;
DWORD dwOldTopicId;
DWORD dwNewTopicId;
DWORD dwIndexTopicId;
DWORD dwDataSize;
FILEOFFSET dataLocation;
WORD fFlag;
WORD pad;
} WORDINFO, FAR *PWORDINFO;
typedef struct FREEBLOCK
{
DWORD dwBlockSize;
FILEOFFSET foBlockOffset;
}FREEBLOCK, FAR *PFREEBLOCK;
BYTE EmptyWord[4] = { 0 };
#ifdef _DEBUG
DWORD dwOldDataLoss = 0;
DWORD dwNewDataSize = 0;
DWORD dwOldDataNeed = 0;
DWORD dwNewNodeSize = 0;
#endif
// Flag to denote that the current entry is to be replaced by the new entry
// This happens when:
// - A repeated entry in the leaf node
// - The last entry in the stem node that has to be changed to the last
// word of the leaf node
#define REPLACE_WORD_01 0x0001
// Flag to denote that the last word buffer actually contains the word
// before last. This is needed when we have to replace the last word
// with the new word. In this case we need the word before last to do
// compression
#define ONE_WORD_BEHIND_02 0x0002
// Flag to denote updating the offset field with the temp node offset
#define USE_TEMP_NODE_04 0x0004
// Flag to denote that only the node offset address is to be updated. Since
// this is a fixed record size, this will speed up the update.
#define UPDATE_NODE_ADDRESS_08 0x0008
// rgpTmpNodeInfo is the new right node if set, else it is the left node
#define USE_TEMP_FOR_RIGHT_NODE_10 0x0010
// Flag to denote that we have to skip the next word before inserting a new
// word. This happen when adding a new word to the end of the block, where
// pCurPtr is pointing to the beginning of the last word
#define SKIP_NEXT_WORD_20 0x0020
// Both nodes, rgpNodeInfo and rgpTmpNodeInfo are used as left and right
// children. This happens when a new top node is created
#define USE_BOTH_NODE_40 0x0040
/*************************************************************************
*
* INTERNAL PRIVATE FUNCTIONS
*
* All of them should be declared near
*
*************************************************************************/
PRIVATE HRESULT NEAR PASCAL ESFlushBuffer (LPESI);
PRIVATE HRESULT NEAR PASCAL ESFillBuffer (_LPIPB, LPESB);
PRIVATE void NEAR PASCAL ESMemory2Disk (_LPIPB, PMERGEHEADER);
PRIVATE HRESULT NEAR PASCAL ProcessFiles (_LPIPB lpipb, LPMERGEPARAMS);
PRIVATE int NEAR PASCAL CompareRecordBuffers (_LPIPB, LPB, LPB);
PRIVATE VOID NEAR PASCAL PQueueUp (_LPIPB, LPESB FAR *, LONG);
PRIVATE VOID NEAR PASCAL PQueueDown (_LPIPB);
PRIVATE PTOPICDATA PASCAL NEAR MergeTopicNode (PMERGEHEADER, PTOPICDATA, int);
PRIVATE VOID NEAR MergeOccurrence (PTOPICDATA, PTOPICDATA, int);
PRIVATE HRESULT NEAR PASCAL UpdateIndexBTree (_LPIPB, HFPB, LPB, LPB);
VOID SetQueue (LPESI pEsi);
PRIVATE HRESULT NEAR PASCAL AddWordToBTree (_LPIPB, LPB, PWORDINFO);
PRIVATE HRESULT PASCAL NEAR NewDataInsert(LPIPB lpipb, PFILEDATA pInfile,
PNODEINFO FAR *rgpNodeInfo, LPB pWord, PWORDINFO pWordInfo);
PRIVATE HRESULT PASCAL NEAR CreateNewNode(_LPIPB lpipb, int cLevel,
int fIsStemNode, int fAfter);
PRIVATE PASCAL NEAR AddRecordToBTree (_LPIPB lpipb, LPB pWord,
PWORDINFO pWordInfo, int cLevel, int fReplaceWord);
PRIVATE HRESULT PASCAL NEAR WriteNewDataRecord (_LPIPB, PWORDINFO);
PRIVATE HRESULT GetFreeBlock (_LPIPB, PFREEBLOCK, DWORD);
PRIVATE HRESULT PASCAL NEAR CopyBlockFile (PFILEDATA, HFPB, FILEOFFSET, DWORD);
PRIVATE HRESULT PASCAL FAR EmitOldData (_LPIPB, PNODEINFO, PWORDINFO);
PRIVATE HRESULT PASCAL FAR EmitNewData (_LPIPB, PWORDINFO, BOOL);
PRIVATE HRESULT PASCAL NEAR UpdateDataNode (_LPIPB lpipb, PWORDINFO pWordInfo);
PRIVATE int PASCAL NEAR SplitNodeAndAddData (_LPIPB lpipb, LPB pWord,
PWORDINFO pWordInfo, int cLevel, int fFlag, int fIsStemNode);
PRIVATE int PASCAL NEAR CopyNewDataToStemNode (_LPIPB lpipb,
PNODEINFO pTmpNode, LPB pWord, LPB pLastWord, int cLevel, int fFlag);
PRIVATE int PASCAL NEAR CopyNewDataToLeafNode (_LPIPB lpipb, PNODEINFO pTmpNode,
PWORDINFO pWordInfo, LPB pWord, LPB pLastWord);
VOID GetLastWordInNode (_LPIPB lpipb, PNODEINFO pNodeinfo, BOOL flag);
PRIVATE HRESULT PASCAL FAR SkipNewData (_LPIPB lpipb, PWORDINFO pWordInfo);
HRESULT CheckLeafNode (PNODEINFO pNodeInfo, int occf);
HRESULT CheckStemNode (PNODEINFO pNodeInfo);
/*************************************************************************
*
* INTERNAL PUBLIC FUNCTIONS
*
* All of them should be declared far, unless we know they belong to
* the same segment. They should be included in some include file
*
*************************************************************************/
HRESULT FAR PASCAL FlushTree(_LPIPB lpipb);
PUBLIC HRESULT FAR PASCAL MergeSortTreeFile (_LPIPB, LPMERGEPARAMS);
PUBLIC HRESULT FAR PASCAL FillInputBuffer (LPESB, HFPB);
PUBLIC VOID PASCAL FAR FreeBTreeNode (PNODEINFO pNode);
PUBLIC PNODEINFO PASCAL FAR AllocBTreeNode (_LPIPB lpipb);
PUBLIC PASCAL FAR PrefixCompressWord (LPB, LPB, LPB, int);
PUBLIC DWORD PASCAL FAR WriteDataNode (_LPIPB, DWORD, PHRESULT);
PUBLIC HRESULT PASCAL FAR IndexOpenRW (LPIPB, HFPB, LSZ);
PUBLIC HRESULT PASCAL FAR SkipOldData (_LPIPB, PNODEINFO);
PUBLIC LONG PASCAL FAR CompareDWord (DWORD, DWORD, LPV lpParm);
#ifdef _DEBUG
static LONG Count = 0;
#endif
/*************************************************************************
*
* @doc EXTERNAL API INDEX
*
* @func HRESULT FAR PASCAL | MVIndexUpdate |
* This function will update an index file based on the information
* collected in the Index parameter block.
*
* @parm HFPB | hSysFile |
* System file handle.
* If it is 0, this function will open the system file
* specified in lszFilename, and then close it after finishing the
* index update. If the system file does not exist, then this function
* will create it.
* If it is non-zero, then the system file is already opened. Only the
* index sub-file needs to be created
*
* @parm LSZ | lszFilename |
* Index filename.
* If hSysFile is non-zero, the format is: !index_filename
* if hSysFile is zero, the format is: dos_filename[!index_filename]
* If !index_filename is not specified, the default name will be used
* if hSysFile == 0 and there is no '!', this is a regular DOS file
*
* @parm LPIPB | lpipb |
* Pointer to Index Parameter Block. This structure contains all the
* information necessary to update the index file
* *
* @rdesc S_OK if succeeded, or other errors
*
*************************************************************************/
PUBLIC HRESULT EXPORT_API FAR PASCAL MVIndexUpdate (HFPB hSysFile,
_LPIPB lpipb, LSZ lszFilename)
{
return MVIndexUpdateEx(hSysFile, lpipb, lszFilename, NULL, 0);
}
/*************************************************************************
*
* @doc EXTERNAL API INDEX
*
* @func HRESULT FAR PASCAL | MVIndexUpdateEx |
* This function will update an index file based on the information
* collected in the Index parameter block, and also will "pre-delete" the
* topics in the given list from the LPIPB before updating. This function is useful
* in scenarios where new topics are continuously added into the index
* before knowledge of out-dated topics is available (e.g. netnews).
* This allows a single-pass update once the deletes are known.
*
* @parm HFPB | hSysFile |
* System file handle.
* If it is 0, this function will open the system file
* specified in lszFilename, and then close it after finishing the
* index update. If the system file does not exist, then this function
* will create it.
* If it is non-zero, then the system file is already opened. Only the
* index sub-file needs to be created
*
* @parm LSZ | lszFilename |
* Index filename.
* If hSysFile is non-zero, the format is: !index_filename
* if hSysFile is zero, the format is: dos_filename[!index_filename]
* If !index_filename is not specified, the default name will be used
* if hSysFile == 0 and there is no '!', this is a regular DOS file
*
* @parm LPIPB | lpipb |
* Pointer to Index Parameter Block. This structure contains all the
* information necessary to update the index file
*
* @parm LPDW | lpdwTopicList |
* Pointer to DWORD array of topic UIDs to be deleted
*
* @parm DWORD | dwCount |
* The number of topics in the array
*
* @rdesc S_OK if succeeded, or other errors
*
*************************************************************************/
PUBLIC HRESULT EXPORT_API FAR PASCAL MVIndexUpdateEx (HFPB hSysFile,
_LPIPB lpipb, LSZ lszFilename, DWORD FAR *rgTopicId, DWORD dwCount)
{
ERRB errb;
PHRESULT phr = &errb;
PFILEDATA pOutFile;
MERGEPARAMS mp;
HRESULT fRet; // Return value from this function.
// Flush the internal sort
// Flushes any records in the tree to disk
fRet = FlushTree(lpipb);
// Free all memory blocks
FreeISI (lpipb);
if (fRet != S_OK)
return(fRet);
if (lpipb->esi.cesb == 0)
// Nothing to process, there will be no index file
return S_OK;
// Set the state flag
lpipb->bState = UPDATING_STATE;
// Open the index file
if ((fRet = IndexOpenRW(lpipb, hSysFile, lszFilename)) != S_OK)
{
exit00:
if (lpipb->idxf & IDXF_NORMALIZE)
{
FreeHandle (lpipb->wi.hSigma);
FreeHandle (lpipb->wi.hLog);
lpipb->wi.hSigma = lpipb->wi.hLog = NULL;
}
return fRet;
}
if (rgTopicId && dwCount)
{
// Sort the incoming array
if ((fRet = HugeDataSort((LPV HUGE*)rgTopicId, dwCount,
(FCOMPARE)CompareDWord, NULL, NULL, NULL)) != S_OK)
goto exit00;
mp.rgTopicId = rgTopicId;
mp.dwCount = dwCount;
mp.lpTopicIdLast = rgTopicId;
}
if ((fRet = MergeSortTreeFile (lpipb, (rgTopicId && dwCount) ? &mp: NULL)) != S_OK)
{
FileClose(lpipb->hfpbIdxFile);
fRet = SetErrCode (phr, fRet);
goto exit00;
}
FileUnlink (NULL, lpipb->isi.aszTempName, REGULAR_FILE);
// Open output file
pOutFile = &lpipb->OutFile;
if ((pOutFile->fFile = FileCreate (NULL, lpipb->isi.aszTempName,
REGULAR_FILE, phr)) == NULL)
{
FileClose(lpipb->hfpbIdxFile);
fRet = SetErrCode (phr, fRet);
goto exit00;
}
// Allocate output buffer
pOutFile->dwMax = FILE_BUFFER;
pOutFile->cbLeft = FILE_BUFFER;
if ((pOutFile->hMem = _GLOBALALLOC (DLLGMEM_ZEROINIT,
pOutFile->dwMax + SAFE_SLACK)) == NULL)
{
fRet = E_OUTOFMEMORY;
exit0:
FileClose(lpipb->hfpbIdxFile);
FileClose (pOutFile->fFile);
FileUnlink (NULL, lpipb->isi.aszTempName, REGULAR_FILE);
goto exit00;
}
pOutFile->pCurrent = pOutFile->pMem = _GLOBALLOCK (pOutFile->hMem);
// Build the permanent index
fRet = UpdateIndexBTree(lpipb, hSysFile, lpipb->esi.aszTempName,
lszFilename);
_GLOBALUNLOCK(pOutFile->hMem);
_GLOBALFREE(pOutFile->hMem);
pOutFile->hMem = NULL;
goto exit0;
}
/*************************************************************************
*
* @doc PRIVATE INDEXING
*
* @func HRESULT | UpdateIndexBTree |
* Allocates required memory and opens input files to create a B-Tree.
* Parses incoming words and calls AddRecordToBTree to process them.
*
* @parm _LPIPB | lpipb |
* Pointer to the index parameter block
*
* @parm LPB | lpszTemp |
* Filename of the temporary input file
*
* @parm LPB | szIndexFilename |
* Filename of the permanent B-Tree file
*
* @rdesc Returns S_OK on success or errors if failed
*
*************************************************************************/
PRIVATE HRESULT NEAR PASCAL UpdateIndexBTree (_LPIPB lpipb, HFPB hSysFile,
LPB lpszTemp, LPB szIndexFilename)
{
PFILEDATA pInFile; // Pointer to input data
DWORD dwBytesRead = 0; // Checks for EOF
PNODEINFO FAR * rgpNodeInfo;
PNODEINFO FAR * rgpTmpNodeInfo;
PNODEINFO pIndexDataNode;
ERRB errb;
PHRESULT phr = &errb;
PIH20 pHeader;
int cTreeLevel;
int iIndex;
LPB pWord;
WORDINFO WordInfo;
OCCF occf;
HRESULT fRet; // Return value
FILEOFFSET foFreeListOffset; // File Offset where the FreeList will be saved.
DWORD dwSizeFreeList; // Size of the FreeList to be saved.
rgpNodeInfo = lpipb->BTreeData.rgpNodeInfo;
rgpTmpNodeInfo = lpipb->BTreeData.rgpTmpNodeInfo;
MEMSET(&WordInfo, 0, sizeof(WORDINFO));
// Open input file
pInFile = &lpipb->InFile;
if ((pInFile->fFile = FileOpen (NULL, lpszTemp, REGULAR_FILE,
READ, phr)) == NULL)
return *phr;
// Allocate input buffer
pInFile->dwMax = FILE_BUFFER;
if ((pInFile->hMem =
_GLOBALALLOC (DLLGMEM_ZEROINIT, pInFile->dwMax + SAFE_SLACK)) == NULL)
{
fRet = E_OUTOFMEMORY;
exit0:
FileClose (pInFile->fFile);
FileUnlink (NULL, lpszTemp, REGULAR_FILE);
return fRet;
}
pInFile->pMem = _GLOBALLOCK (pInFile->hMem);
pInFile->pCurrent = pInFile->pMem;
pHeader = &lpipb->BTreeData.Header;
// Allocate BTree block.
for (cTreeLevel = pHeader->cIdxLevels - 1; cTreeLevel >= 0; cTreeLevel --)
{
if ((rgpNodeInfo[cTreeLevel] = AllocBTreeNode (lpipb)) == NULL)
{
fRet = E_OUTOFMEMORY;
goto exit2;
}
if ((rgpTmpNodeInfo[cTreeLevel] = AllocBTreeNode (lpipb)) == NULL)
{
fRet = E_OUTOFMEMORY;
goto exit2;
}
}
if (((lpipb->pIndexDataNode = pIndexDataNode =
AllocBTreeNode (lpipb))) == NULL)
{
fRet = E_OUTOFMEMORY;
goto exit2;
}
// Reallocate a bigger buffer. BTREE_NODE_SIZE is only good for btree node
_GLOBALUNLOCK (pIndexDataNode->hMem);
_GLOBALFREE (pIndexDataNode->hMem);
// Allocate 1M of memory for the data buffer
if ((pIndexDataNode->hMem = _GLOBALALLOC (DLLGMEM_ZEROINIT,
pIndexDataNode->dwBlockSize = FILE_BUFFER)) == NULL)
goto exit2;
pIndexDataNode->pCurPtr = pIndexDataNode->pBuffer =
_GLOBALLOCK (pIndexDataNode->hMem);
lpipb->pIndexDataNode->hfpbIdx = lpipb->hfpbIdxFile; // Index file to read from
// Remember the file offset of this node
rgpNodeInfo[0]->nodeOffset = pHeader->foIdxRoot;
// Read in data for the top stem node
if ((fRet = ReadNewNode(lpipb->hfpbIdxFile, rgpNodeInfo[0],
pHeader->cIdxLevels > 1 ? FALSE : TRUE)) != S_OK)
{
exit2:
FreeHandle (pInFile->hMem);
for (cTreeLevel = pHeader->cIdxLevels - 1; cTreeLevel >= 0; cTreeLevel --)
{
FreeBTreeNode (rgpNodeInfo[cTreeLevel]);
FreeBTreeNode (rgpTmpNodeInfo[cTreeLevel]);
}
goto exit0;
}
// Allocate temporary buffer for word. The buffer is allocated as followed:
// - Max word length * 2: for maximum word length. Minimum is 256
// - 3 byte: word length
// - 5 byte: Field Id
// - 5 byte: Topic count
// - 6 byte: data pointer
// iIndex is used as a tmp
iIndex = (WORD)(lpipb->BTreeData.Header.dwMaxWLen * 2);
if (iIndex < 1024)
iIndex = 1024;
iIndex += 3 + 5 + 5 + 6;
if ((lpipb->hTmpBuf = _GLOBALALLOC (DLLGMEM_ZEROINIT, iIndex * 2)) == NULL)
{
fRet = E_OUTOFMEMORY;
goto exit2;
}
lpipb->pTmpBuf = (LPB)_GLOBALLOCK (lpipb->hTmpBuf);
lpipb->pWord = lpipb->pTmpBuf + iIndex;
// Allocate a big buffer for data
if ((lpipb->hData = _GLOBALALLOC(DLLGMEM_ZEROINIT,
lpipb->dwDataSize = 0x80000)) == NULL)
{
fRet = E_OUTOFMEMORY;
goto exit2;
}
lpipb->pDataBuffer= _GLOBALLOCK(lpipb->hData);
// Load the input buffer & repeat until all records are processed
pInFile->dwMax = pInFile->cbLeft =
FileRead (pInFile->fFile, pInFile->pMem, pInFile->dwMax, phr);
fRet = S_OK;
pWord = lpipb->pWord;
occf = lpipb->BTreeData.Header.occf;
do
{
LPB pSrcPtr;
WORD wLen;
if (pInFile->cbLeft < CB_MAX_WORD_LEN * sizeof(DWORD) * 8)
{
MEMMOVE (pInFile->pMem, pInFile->pCurrent, pInFile->cbLeft);
pInFile->cbLeft += FileRead (pInFile->fFile,
pInFile->pMem + pInFile->cbLeft,
pInFile->dwMax - pInFile->cbLeft, &errb);
pInFile->dwMax = pInFile->cbLeft;
pInFile->pCurrent = pInFile->pMem;
}
// Extract the word and its info
pSrcPtr = pInFile->pCurrent + sizeof(DWORD); // Skip reclength
// Copy the word
MEMCPY (pWord, pSrcPtr, wLen = GETWORD((LPUW)pSrcPtr) + 2);
pSrcPtr += GETWORD((LPUW)pSrcPtr) + 2;
if (occf & OCCF_LENGTH)
{
pSrcPtr += CbByteUnpack(&WordInfo.dwWordLen, pSrcPtr);
CbBytePack (pWord + wLen, WordInfo.dwWordLen);
}
else
{
WordInfo.dwWordLen = wLen - 2;
}
if (occf & OCCF_FIELDID)
pSrcPtr += CbByteUnpack(&WordInfo.dwFieldId, pSrcPtr);
WordInfo.dwNewTopicCount = GETLONG((LPUL)pSrcPtr);
pSrcPtr += sizeof(DWORD);
pInFile->pCurrent = pSrcPtr;
pInFile->cbLeft = (LONG)(pInFile->dwMax - (pSrcPtr - pInFile->pMem));
#if 0
if (STRNICMP(pWord+2, "cylindeeer", 10) == 0)
_asm int 3;
#endif
#if 0
else
{
SkipNewData (lpipb, &WordInfo);
continue;
}
#endif
// Find/Add the record
if ((fRet = AddWordToBTree (lpipb, pWord, &WordInfo)) != S_OK)
{
exit3:
_GLOBALUNLOCK (lpipb->hTmpBuf);
_GLOBALFREE (lpipb->hTmpBuf);
_GLOBALUNLOCK(lpipb->hData);
_GLOBALFREE(lpipb->hData);
FreeBTreeNode (lpipb->pIndexDataNode);
lpipb->hData = lpipb->hTmpBuf = 0;
goto exit2;
}
pSrcPtr = pInFile->pCurrent;
// pInFile->pCurrent points to the record size
if (pInFile->cbLeft <= SAFE_SLACK ||
(LONG)GETLONG ((LPUL)pInFile->pCurrent) >= pInFile->cbLeft)
{
MEMMOVE (pInFile->pMem, pInFile->pCurrent, pInFile->cbLeft);
if ((pInFile->cbLeft += FileRead (pInFile->fFile, pInFile->pMem +
pInFile->cbLeft, pInFile->dwMax - pInFile->cbLeft, phr)) < 0)
{
fRet = *phr;
goto exit3;
}
pInFile->dwMax = pInFile->cbLeft;
pInFile->pCurrent = pInFile->pMem;
}
} while (fRet == S_OK && pInFile->cbLeft);
for (cTreeLevel = pHeader->cIdxLevels - 1; cTreeLevel >= 0; cTreeLevel --)
{
if (rgpNodeInfo[cTreeLevel]->fFlag == TO_BE_UPDATE)
{
if ((FileSeekWrite(lpipb->hfpbIdxFile,
rgpNodeInfo[cTreeLevel]->pBuffer,
rgpNodeInfo[cTreeLevel]->nodeOffset,
lpipb->BTreeData.Header.dwBlockSize, phr)) != (LONG)lpipb->BTreeData.Header.dwBlockSize)
{
fRet = *phr;
goto exit3;
}
}
}
if (lpipb->idxf & IDXF_NORMALIZE)
{
LONG loop;
for (loop = lpipb->dwMaxTopicId; loop >= 0; loop--)
{
lpipb->wi.hrgsigma[loop] =
(float)sqrt ((double)lpipb->wi.hrgsigma[loop]);
}
pHeader->WeightTabSize = (lpipb->dwMaxTopicId + 1)* sizeof(float);
if (FileSeekWrite (lpipb->hfpbIdxFile, lpipb->wi.hrgsigma,
lpipb->foMaxOffset, pHeader->WeightTabSize, phr) !=
(LONG)pHeader->WeightTabSize)
{
fRet = *phr;
goto exit3;
}
pHeader->WeightTabOffset = lpipb->foMaxOffset;
}
// ERIC: 1/ Save the freelist info to the end of the file
// 2/ Update the header with the new freelist offset/size
if (lpipb->hFreeList)
{
LPBYTE lpbFreeList;
dwSizeFreeList = FreeListSize(lpipb->hFreeList,phr);
foFreeListOffset = FreeListGetBestFit(lpipb->hFreeList, MakeFo(dwSizeFreeList,0), phr);
if (FoIsNil(foFreeListOffset))
foFreeListOffset = lpipb->foMaxOffset;
if((lpbFreeList = (LPBYTE) _GLOBALALLOCPTR(DLLGMEM_ZEROINIT, dwSizeFreeList)) == NULL)
return E_OUTOFMEMORY;
FreeListGetMem(lpipb->hFreeList, (LPVOID)lpbFreeList);
FileSeekWrite (lpipb->hfpbIdxFile, (LPBYTE)lpbFreeList,
foFreeListOffset, dwSizeFreeList, phr);
if (FoEquals(foFreeListOffset, lpipb->foMaxOffset))
dwSizeFreeList |= 0x80000000;
FreeListDestroy(lpipb->hFreeList);
lpipb->hFreeList = (HFREELIST) NULL;
_GLOBALFREEPTR(lpbFreeList);
}
// Copy info to header
if (pHeader->lcTopics < lpipb->lcTopics)
pHeader->lcTopics = lpipb->lcTopics;
if (pHeader->dwMaxFieldId < lpipb->dwMaxFieldId)
pHeader->dwMaxFieldId = lpipb->dwMaxFieldId;
if (pHeader->dwMaxWCount < lpipb->dwMaxWCount)
pHeader->dwMaxWCount = lpipb->dwMaxWCount;
if (pHeader->dwMaxOffset < lpipb->dwMaxOffset)
pHeader->dwMaxOffset = lpipb->dwMaxOffset;
if (pHeader->dwMaxWLen < lpipb->dwMaxWLen)
pHeader->dwMaxWLen = lpipb->dwMaxWLen;
pHeader->dwMaxTopicId = lpipb->dwMaxTopicId;
// ERIC: Garbage Collection
pHeader->foFreeListOffset = foFreeListOffset;
pHeader->dwFreeListSize = dwSizeFreeList;
// END
FileSeekWrite (lpipb->hfpbIdxFile, (LPB)pHeader,
MakeFo (0, 0), sizeof (IH20), phr);
fRet = S_OK;
goto exit3;
}
/*********************************************************************
* @func LPB PASCAL | AddWordToBTree |
* Find the location of a word in the index. This function also
* sets up all relevant data for the future update
*
* @parm LPIPB | lpipb |
* Pointer to index info
*
* @parm LPB | pWord |
* Word to be searched for. This is a 2-byte preceded Pascal string
*
* @parm PWORDINFO | pWordInfo |
* Pointer to word's info
*
* @rdesc
* S_OK or other errors. In case of success, pWordInfo will
* be filled with useful data
*********************************************************************/
PRIVATE HRESULT NEAR PASCAL AddWordToBTree (_LPIPB lpipb, LPB pWord,
PWORDINFO pWordInfo)
{
int cLevel;
LPB lpCurPtr;
int nCmp;
HRESULT fRet;
WORD RecSize = 0;
LPB lpMaxAddress;
ERRB errb;
PHRESULT phr = &errb;
WORD wWlen;
PNODEINFO pNodeInfo;
PNODEINFO pChildNode;
LPB pBTreeWord;
int cMaxLevel;
FILEOFFSET nodeOffset;
PNODEINFO FAR *rgpNodeInfo = lpipb->BTreeData.rgpNodeInfo;
OCCF occf = lpipb->occf;
LONG dwBlockSize = lpipb->BTreeData.Header.dwBlockSize;
#if 0
Count++;
if (STRNICMP(pWord+2, "approeeaching", 11) == 0 ||
STRNICMP(pWord+2, "authenteeic", 11) == 0 ||
STRNICMP(pWord+2, "eastleeand", 10) == 0)
_asm int 3;
#endif
// Change to 0-based
cMaxLevel = lpipb->BTreeData.Header.cIdxLevels - 1;
// Remember the last level offset
nodeOffset = rgpNodeInfo[0]->nodeOffset;
/* Search in the stem nodes */
for (cLevel = 0; cLevel < cMaxLevel ; cLevel++)
{
//
// Set variables
//
pNodeInfo = rgpNodeInfo[cLevel];
pChildNode = rgpNodeInfo[cLevel + 1];
pChildNode->prevNodeOffset = foNil;
pBTreeWord = pNodeInfo->pTmpResult;
// Reload the node if neccessary
if (!FoEquals(pNodeInfo->nodeOffset, nodeOffset))
{
if (pNodeInfo->fFlag == TO_BE_UPDATE)
{
if ((FileSeekWrite(lpipb->hfpbIdxFile, pNodeInfo->pBuffer,
pNodeInfo->nodeOffset, dwBlockSize,
&errb)) != (LONG)dwBlockSize)
return(errb);
}
pNodeInfo->nodeOffset = nodeOffset;
if ((fRet = ReadNewNode (lpipb->hfpbIdxFile, pNodeInfo,
FALSE)) != S_OK)
{
return SetErrCode (phr, fRet);
}
pNodeInfo->fFlag = 0;
}
lpMaxAddress = pNodeInfo->pMaxAddress;
lpCurPtr = pNodeInfo->pCurPtr; // points to the LAST ACCESSED word in the block
// The format of the stem node
// cbLeft | (Word | PointerToNode) | Slack
while (lpCurPtr < lpMaxAddress - 1)
{
// Save the last location. This would be the insertion point for
// update
pNodeInfo->pCurPtr = lpCurPtr;
// Reset the word length
wWlen = 0;
// Get the compressed word
lpCurPtr = ExtractWord(pBTreeWord, lpCurPtr, &wWlen);
/* Read in NodeId record */
lpCurPtr += ReadFileOffset (&nodeOffset, lpCurPtr);
if ((nCmp = StrCmpPascal2(pWord, pBTreeWord)) == 0)
nCmp = (int)((WORD)pWordInfo->dwWordLen - wWlen );
if (nCmp > 0)
{
// We didn't find the location of the word yet
// Continue searching
if (lpCurPtr < pNodeInfo->pMaxAddress - 1)
{
MEMCPY (pNodeInfo->pLastWord, pBTreeWord,
*(LPUW)pBTreeWord + sizeof(WORD)); // erinfox RISC patch
}
pChildNode->prevNodeOffset = nodeOffset;
continue;
}
// We found the location of the word
break;
}
}
// At this point, nodeOffset is the node id of the leaf that
// is supposed to contain the searched word.
pNodeInfo = rgpNodeInfo[cMaxLevel];
if (!FoEquals(pNodeInfo->nodeOffset, nodeOffset))
{
if (pNodeInfo->fFlag == TO_BE_UPDATE)
{
if ((FileSeekWrite(lpipb->hfpbIdxFile, pNodeInfo->pBuffer,
pNodeInfo->nodeOffset, dwBlockSize,
phr)) != dwBlockSize)
return(*phr);
}
pNodeInfo->nodeOffset = nodeOffset;
if ((fRet = ReadNewNode (lpipb->hfpbIdxFile, pNodeInfo,
TRUE)) != S_OK)
{
return SetErrCode (phr, fRet);
}
pNodeInfo->fFlag = 0;
lpCurPtr = pNodeInfo->pCurPtr;
}
else
{
// Reset all data
// lpCurPtr = pNodeInfo->pCurPtr = pNodeInfo->pBuffer + sizeof(WORD) + FOFFSET_SIZE;
lpCurPtr = pNodeInfo->pCurPtr;
}
pBTreeWord = pNodeInfo->pTmpResult;
lpMaxAddress = pNodeInfo->pMaxAddress;
// Reset the last word
*(LPWORD)pNodeInfo->pLastWord = 0;
// Leaf node structure: *
// (Word|FieldId|TopicCnt|PointerToNode|DataSize)*
for (;;)
{
DWORD dwFieldId;
// Save the last location. This would be the insertion point for
// update
pNodeInfo->pCurPtr = lpCurPtr;
if (lpCurPtr >= lpMaxAddress)
{
// Add to the end of the node
if ((fRet = WriteNewDataRecord (lpipb, pWordInfo)) != S_OK)
return(fRet);
return AddRecordToBTree (lpipb, pWord, pWordInfo, cMaxLevel, 0);
}
// Get the compressed word
lpCurPtr = ExtractWord(pBTreeWord, lpCurPtr, &wWlen);
// Get fieldif and topic count
if (occf & OCCF_FIELDID)
lpCurPtr += CbByteUnpack (&dwFieldId, lpCurPtr);
lpCurPtr += CbByteUnpack (&pWordInfo->dwIndexTopicCount, lpCurPtr);
// Get the data location and size
lpCurPtr += ReadFileOffset (&pWordInfo->dataLocation, lpCurPtr);
lpCurPtr += CbByteUnpack(&pWordInfo->dwDataSize, lpCurPtr);
if ((nCmp = StrCmpPascal2(pWord, pBTreeWord)) == 0)
{
if (occf & OCCF_LENGTH)
nCmp = (int)((WORD)pWordInfo->dwWordLen - wWlen);
if (nCmp == 0 && (occf & OCCF_FIELDID))
nCmp = (int)(pWordInfo->dwFieldId - dwFieldId);
}
if (nCmp > 0)
{
// We didn't find the location of the word yet
// Continue searching
MEMCPY (pNodeInfo->pLastWord, pBTreeWord,
*(LPUW)pBTreeWord+sizeof(WORD) + sizeof(WORD)); // erinfox RISC patch
continue;
}
if (nCmp == 0)
{
if ((fRet = UpdateDataNode (lpipb, pWordInfo)) != S_OK)
return(fRet);
return AddRecordToBTree (lpipb, pWord, pWordInfo, cMaxLevel,
REPLACE_WORD_01);
}
else
{
if ((fRet = WriteNewDataRecord (lpipb, pWordInfo)) != S_OK)
return(fRet);
return AddRecordToBTree (lpipb, pWord, pWordInfo, cLevel, 0);
}
break;
}
return S_OK;
}
/*************************************************************************
* @doc INTERNAL
*
* @func HRESULT PASCAL | ReadNewNode |
* Read in a new node from the disk if it is not the top node.
* For the top node, just reset various pointers
*
* @parm PNODEINFO | pNodeInfo |
* Pointer to leaf info
*
* @parm int | fLeafNode|
* TRUE if this is a leaf node
*
* @rdesc S_OK if succesful, otherwise other errors. On exit,
* lpCurPtr wil point to the beginning of the 1st word in the
* node
*
* @rcomm The format of the leaf node is different from a stem node
* Stem node structure: *
* CbLeft |* Word | PointerToNode *| Slack *
* *
* Leaf node structure: *
* NxtBlkPtr|CbLeft|*Word|FieldId|TopicCnt|PointerToNode|DataSize*|Slack *
* *
*************************************************************************/
PUBLIC HRESULT PASCAL FAR ReadNewNode (HFPB hfpb, PNODEINFO pNodeInfo,
int fLeafNode)
{
ERRB errb;
if (FileSeekRead (hfpb, pNodeInfo->pBuffer, pNodeInfo->nodeOffset,
pNodeInfo->dwBlockSize, &errb) != (long)pNodeInfo->dwBlockSize)
return E_BADFILE;
pNodeInfo->pCurPtr = pNodeInfo->pBuffer;
if (fLeafNode)
{
pNodeInfo->pCurPtr += ReadFileOffset (&pNodeInfo->nextNodeOffset,
pNodeInfo->pBuffer);
}
else
pNodeInfo->nextNodeOffset = foNil;
pNodeInfo->cbLeft = *(LPUW)(pNodeInfo->pCurPtr); // erinfox RISC patch
pNodeInfo->pCurPtr += sizeof(WORD);
pNodeInfo->pMaxAddress = pNodeInfo->pBuffer + pNodeInfo->dwBlockSize -
pNodeInfo->cbLeft;
*(LPUW)(pNodeInfo->pLastWord) = *(LPUW)(pNodeInfo->pTmpResult) = 0;
return S_OK;
}
PUBLIC HRESULT PASCAL FAR IndexOpenRW (_LPIPB lpipb, HFPB hfpbSysFile, LSZ lszFilename)
{
HFPB hfpb; // Handle to system file
HRESULT fRet;
ERRB errb;
PHRESULT phr = &errb;
PIH20 pHeader;
int iIndex;
LONG i;
// Check the existence of the file
if ((hfpb = FileOpen (hfpbSysFile, lszFilename,
hfpbSysFile ? FS_SUBFILE : REGULAR_FILE, READ, phr)) == 0)
{
return *phr;
}
FileClose (hfpb);
// Reopen the file for read/write
lpipb->hfpbIdxFile = FileOpen (hfpbSysFile, lszFilename,
hfpbSysFile ? FS_SUBFILE : REGULAR_FILE, READ_WRITE, phr);
if ((fRet = ReadIndexHeader(lpipb->hfpbIdxFile,
pHeader = &lpipb->BTreeData.Header)) != S_OK)
{
exit01:
SetErrCode (phr, fRet);
FileClose(lpipb->hfpbIdxFile);
return fRet;
}
if (pHeader->version != VERCURRENT ||
pHeader->FileStamp != INDEX_STAMP)
{
fRet = E_BADVERSION;
goto exit01;
}
// incoming index and occurrence flags must match those in original index
if (pHeader->occf != lpipb->occf ||
pHeader->idxf != lpipb->idxf)
{
fRet = E_BADINDEXFLAGS;
goto exit01;
}
// Update the compression key to be used by WriteDataNode later
lpipb->cKey[CKEY_TOPIC_ID] = pHeader->ckeyTopicId;
lpipb->cKey[CKEY_OCC_COUNT] = pHeader->ckeyOccCount;
iIndex = CKEY_OCC_BASE;
if (pHeader->occf & OCCF_COUNT)
lpipb->cKey[iIndex++] = pHeader->ckeyWordCount;
if (pHeader->occf & OCCF_OFFSET)
lpipb->cKey[iIndex] = pHeader->ckeyOffset;
// Update the maximum TopicId
if (pHeader->dwMaxTopicId < lpipb->dwMaxTopicId)
pHeader->dwMaxTopicId = lpipb->dwMaxTopicId;
else
lpipb->dwMaxTopicId = pHeader->dwMaxTopicId;
// Get the file size.
lpipb->foMaxOffset = FileSize (lpipb->hfpbIdxFile, phr);
if (lpipb->idxf & IDXF_NORMALIZE)
{
// Load the sigma table
if (FoEquals(pHeader->WeightTabOffset, foNil))
{
fRet = SetErrCode (phr, E_ASSERT);
goto exit01;
}
if ((fRet = AllocSigmaTable (lpipb)) != S_OK)
goto exit01;
if (FileSeekRead (lpipb->hfpbIdxFile, lpipb->wi.hrgsigma,
pHeader->WeightTabOffset, pHeader->WeightTabSize, phr) !=
(LONG)pHeader->WeightTabSize)
{
fRet = errb;
goto exit01;
}
if (lpipb->bState == DELETING_STATE)
{
// Square the sigma table
// erinfox: off by one bug. change i = lpipb->dwMaxTopicId + 1
// to lpipb->dwMaxTopicId because we have only allocated
// (dwMaxTopicId + 1)*sizeof(float) bytes
for (i = lpipb->dwMaxTopicId; i >= 0; i--)
{
lpipb->wi.hrgsigma[i] = lpipb->wi.hrgsigma[i] *
lpipb->wi.hrgsigma[i];
}
}
}
/* ERIC */
// Load or create a freelist (dwSize = 0)
if (lpipb->bState == UPDATING_STATE)
{
if (pHeader->dwFreeListSize) // If a freelist is existing, read it, otherwise, create it.
{
LPBYTE lpbFreeList;
if (pHeader->dwFreeListSize & 0x80000000)
{
pHeader->dwFreeListSize &= 0x7FFFFFFF;
lpipb->foMaxOffset = FoSubFo(lpipb->foMaxOffset,MakeFo(pHeader->dwFreeListSize,0));
}
if(!(lpbFreeList = (LPBYTE) _GLOBALALLOCPTR(DLLGMEM_ZEROINIT, pHeader->dwFreeListSize)))
{
fRet = SetErrCode (phr, E_OUTOFMEMORY);
goto exit01;
}
FileSeekRead (lpipb->hfpbIdxFile, (LPBYTE)lpbFreeList,
pHeader->foFreeListOffset, pHeader->dwFreeListSize, phr);
lpipb->hFreeList = FreeListInitFromMem(lpbFreeList, phr );
_GLOBALFREEPTR(lpbFreeList);
}
else
lpipb->hFreeList = FreeListInit( wDefaultFreeListSize, phr);
}
return S_OK;
}
PRIVATE PASCAL NEAR AddRecordToBTree (_LPIPB lpipb, LPB pWord,
PWORDINFO pWordInfo, int cLevel, int fFlag)
{
PNODEINFO pNodeInfo;
PNODEINFO pTmpNodeInfo;
LPB pInsertPtr; // Pointer to insertion point
LPB pWordStorage;
LPB pLastWord;
LPB pBuffer;
BYTE fIsStemNode;
WORD wWLen;
WORD wNewRecSize; // New record size
LONG cbByteMoved; // Number of bytes moved to leave room for new rec
OCCF occf = lpipb->occf; // Occurrence field flags
BYTE fLength = occf & OCCF_LENGTH;
WORD cbLeft; // How many byte left in the current node?
LONG dwBlockSize = lpipb->BTreeData.Header.dwBlockSize;
BYTE cbSkip;
BYTE fEndNode;
ERRB errb;
if (cLevel == -1)
{
// The tree's level has increased by one
int i;
if (lpipb->BTreeData.Header.cIdxLevels >= MAX_TREE_HEIGHT - 1)
return E_TREETOOBIG;
/* Move down the entries to make room for the top node */
for (i = lpipb->BTreeData.Header.cIdxLevels; i > 0 ; i-- )
{
lpipb->BTreeData.rgpNodeInfo[i] = lpipb->BTreeData.rgpNodeInfo[i-1];
lpipb->BTreeData.rgpTmpNodeInfo[i] = lpipb->BTreeData.rgpTmpNodeInfo[i-1];
}
// Increase tree level
lpipb->BTreeData.Header.cIdxLevels ++;
if ((pNodeInfo = lpipb->BTreeData.rgpNodeInfo[0] = AllocBTreeNode (lpipb)) == NULL)
return(E_OUTOFMEMORY);
if ((pTmpNodeInfo = lpipb->BTreeData.rgpTmpNodeInfo[0] = AllocBTreeNode (lpipb)) == NULL)
return(E_OUTOFMEMORY);
pWordStorage = (pBuffer = pNodeInfo->pBuffer) + sizeof(WORD);
if (fFlag & USE_BOTH_NODE_40)
{
if (fFlag & USE_TEMP_FOR_RIGHT_NODE_10)
{
// Link to the left child node
pWordStorage += PrefixCompressWord (pWordStorage,
lpipb->BTreeData.rgpNodeInfo[1]->pTmpResult,
EmptyWord, fLength);
pWordStorage += CopyFileOffset (pWordStorage,
lpipb->BTreeData.rgpNodeInfo[1]->nodeOffset);
// Link to the right child node
pWordStorage += PrefixCompressWord (pWordStorage,
lpipb->BTreeData.rgpTmpNodeInfo[1]->pTmpResult,
lpipb->BTreeData.rgpNodeInfo[1]->pTmpResult, fLength);
pWordStorage += CopyFileOffset (pWordStorage,
lpipb->BTreeData.rgpTmpNodeInfo[1]->nodeOffset);
}
else
{
// Link to the left child node
pWordStorage += PrefixCompressWord (pWordStorage,
lpipb->BTreeData.rgpTmpNodeInfo[1]->pTmpResult,
EmptyWord, fLength);
pWordStorage += CopyFileOffset (pWordStorage,
lpipb->BTreeData.rgpTmpNodeInfo[1]->nodeOffset);
// Link to the right child node
pWordStorage += PrefixCompressWord (pWordStorage,
lpipb->BTreeData.rgpNodeInfo[1]->pTmpResult,
lpipb->BTreeData.rgpTmpNodeInfo[1]->pTmpResult, fLength);
pWordStorage += CopyFileOffset (pWordStorage,
lpipb->BTreeData.rgpNodeInfo[1]->nodeOffset);
}
}
else
{
// Link to the right child node
pWordStorage += PrefixCompressWord (pWordStorage,
pWord, EmptyWord, fLength);
pWordStorage += CopyFileOffset (pWordStorage,
lpipb->BTreeData.rgpTmpNodeInfo[1]->nodeOffset);
}
// Set all the parameter
pNodeInfo->pCurPtr = pBuffer + sizeof(WORD);
pNodeInfo->cbLeft = (LONG)(pBuffer - pWordStorage + dwBlockSize);
pNodeInfo->pMaxAddress = pBuffer + dwBlockSize - pNodeInfo->cbLeft;
SETWORD(pBuffer, (WORD)pNodeInfo->cbLeft);
// Write out the new node
if ((FileSeekWrite(lpipb->hfpbIdxFile, pBuffer,
lpipb->foMaxOffset, dwBlockSize, &errb)) != (LONG)dwBlockSize)
return(errb);
// Remember the offset of this node
// Set the pointer to the top stem node
lpipb->BTreeData.Header.foIdxRoot = pNodeInfo->nodeOffset =
lpipb->foMaxOffset;
lpipb->BTreeData.Header.nidIdxRoot = pNodeInfo->nodeOffset.dwOffset;
lpipb->foMaxOffset = FoAddDw (lpipb->foMaxOffset, dwBlockSize);
#if 0
return CheckStemNode (pNodeInfo);
#else
return(S_OK);
#endif
}
// Initialize data
pNodeInfo = lpipb->BTreeData.rgpNodeInfo[cLevel];
pTmpNodeInfo = lpipb->BTreeData.rgpTmpNodeInfo[cLevel];
pLastWord = pNodeInfo->pLastWord;
pBuffer = pNodeInfo->pBuffer;
if (fIsStemNode = (cLevel < lpipb->BTreeData.Header.cIdxLevels - 1))
cbSkip = sizeof(WORD);
else
cbSkip = sizeof(WORD) + FOFFSET_SIZE;
fEndNode = (pNodeInfo->pCurPtr >= pNodeInfo->pMaxAddress);
// Calculate how many byte left are there in the old node
pInsertPtr = pNodeInfo->pCurPtr; // Pointer to insertion point
cbLeft = (WORD)pNodeInfo->cbLeft;
// Handle special simple cases
if (fFlag & UPDATE_NODE_ADDRESS_08)
{
// Skip the next word
pInsertPtr = ExtractWord(pTmpNodeInfo->pTmpResult,
pInsertPtr, &wWLen);
if (fFlag & USE_TEMP_NODE_04)
{
CopyFileOffset (pInsertPtr,
lpipb->BTreeData.rgpTmpNodeInfo[cLevel + 1]->nodeOffset);
}
else
{
CopyFileOffset (pInsertPtr,
lpipb->BTreeData.rgpNodeInfo[cLevel + 1]->nodeOffset);
}
#if 0
return(fIsStemNode ? CheckStemNode (pNodeInfo) :
CheckLeafNode (pNodeInfo, occf));
#else
return(S_OK);
#endif
}
if (fFlag & (REPLACE_WORD_01 | SKIP_NEXT_WORD_20))
{
// We get more room from the replaced word
DWORD dwTemp;
// Skip the next word
if (fFlag & SKIP_NEXT_WORD_20)
{
pInsertPtr = ExtractWord(pLastWord, pInsertPtr, &wWLen);
}
else
{
pInsertPtr = ExtractWord(pTmpNodeInfo->pTmpResult,
pInsertPtr, &wWLen);
}
// Skip the data
if (fIsStemNode)
pInsertPtr += FOFFSET_SIZE;
else
{
// Skip field id, topic count. fileoffset, datasize
if (occf & OCCF_FIELDID)
pInsertPtr += CbByteUnpack (&dwTemp, pInsertPtr); // FieldId
if (occf & OCCF_TOPICID)
{
pInsertPtr += CbByteUnpack (&dwTemp, pInsertPtr);
pInsertPtr += FOFFSET_SIZE;
pInsertPtr += CbByteUnpack (&dwTemp, pInsertPtr);
}
}
if (fFlag & SKIP_NEXT_WORD_20)
pNodeInfo->pCurPtr = pInsertPtr;
else
{
// Remove the old data
MEMMOVE (pNodeInfo->pCurPtr, pInsertPtr,
cbByteMoved = (LONG)(pNodeInfo->pMaxAddress - pInsertPtr));
pNodeInfo->pMaxAddress =
(pInsertPtr = pNodeInfo->pCurPtr) + cbByteMoved;
cbLeft = (WORD)(dwBlockSize - (pNodeInfo->pMaxAddress - pBuffer));
}
if (pInsertPtr >= pNodeInfo->pMaxAddress)
fEndNode = TRUE;
}
//Calculate the approximate number of bytes needed for the
// new data by compress it to the temporary block
if (fIsStemNode)
{
if (pInsertPtr <= pNodeInfo->pBuffer + sizeof(WORD))
{
// This is the first word, there is no previous one
*(LPWORD)pLastWord = 0;
}
wNewRecSize = (WORD) CopyNewDataToStemNode (lpipb, pTmpNodeInfo,
pWord, pLastWord, cLevel, fFlag);
}
else
{
if (pInsertPtr <= pNodeInfo->pBuffer + sizeof(WORD) + FOFFSET_SIZE)
{
// This is the first word, there is no previous one
*(LPWORD)pLastWord = 0;
}
wNewRecSize = (WORD) CopyNewDataToLeafNode (lpipb, pTmpNodeInfo,
pWordInfo, pWord, pLastWord);
}
wNewRecSize -= cbSkip;
// I reserved about 4 byte to ensure that when we have enough room
// we do have enough room. Compression may change the size of the
// record, causing us to run out of room when copying the new data
// over
if (cbLeft - sizeof(DWORD) > wNewRecSize)
{
// We have enough room for the new data. Just insert the new data
pWordStorage = pTmpNodeInfo->pCurPtr;
if (!fEndNode)
{
// We need to recompress the next word
MEMCPY (pTmpNodeInfo->pTmpResult, pWord,
*(LPUW)pWord + sizeof(WORD) + sizeof(WORD)); //erinfox RISC patch
pInsertPtr = ExtractWord(pTmpNodeInfo->pTmpResult,
pInsertPtr, &wWLen);
cbByteMoved = PrefixCompressWord (pWordStorage,
pTmpNodeInfo->pTmpResult, pWord, fLength);
wNewRecSize += (WORD)cbByteMoved;
// Reset the last word for pBTreeWord
MEMCPY (pNodeInfo->pTmpResult, pLastWord,
*(LPUW)pLastWord + sizeof(WORD) + sizeof(WORD)); // erinfox RISC patch
}
// Make room for the new data
if ((cbByteMoved = (LONG)(pNodeInfo->pMaxAddress - pInsertPtr)) <= 0)
cbByteMoved = 0;
else
MEMMOVE(pNodeInfo->pCurPtr + wNewRecSize, pInsertPtr,
cbByteMoved = (LONG)(pNodeInfo->pMaxAddress - pInsertPtr));
// Copy the new data
MEMCPY (pNodeInfo->pCurPtr, pTmpNodeInfo->pBuffer + cbSkip,
wNewRecSize);
// Update data
pNodeInfo->pMaxAddress = pNodeInfo->pCurPtr + wNewRecSize +
cbByteMoved;
pNodeInfo->cbLeft = cbLeft =
(WORD)(dwBlockSize - (pNodeInfo->pMaxAddress - pBuffer));
SETWORD(pNodeInfo->pBuffer + cbSkip - sizeof(WORD),
(WORD)cbLeft);
pNodeInfo->fFlag = TO_BE_UPDATE;
// Change the parent node
if (fEndNode && cLevel)
{
return (AddRecordToBTree (lpipb, pWord, pWordInfo, cLevel - 1,
REPLACE_WORD_01));
}
#if 0
return(fIsStemNode ? CheckStemNode (pNodeInfo) :
CheckLeafNode (pNodeInfo, occf));
#else
return(S_OK);
#endif
return S_OK;
}
// Case 3: Add to the middle. This is a complex one, since we have
// to split the node into 2.
return(SplitNodeAndAddData (lpipb, pWord, pWordInfo, cLevel, fFlag,
fIsStemNode));
}
PRIVATE int PASCAL NEAR SplitNodeAndAddData (_LPIPB lpipb, LPB pWord,
PWORDINFO pWordInfo, int cLevel, int fFlag, int fIsStemNode)
{
PNODEINFO pNodeInfo;
PNODEINFO pTmpNodeInfo;
LONG cbByteMoved;
WORD leftSize;
WORD rightSize;
WORD wWLen;
LPB pInsertPtr;
LPB pWordStorage;
int cbSkip;
DWORD dwBlockSize;
HRESULT fRet;
BYTE fLength = lpipb->occf & OCCF_LENGTH;
LPB pLastWord;
LPB pTemp;
LPB pBuffer;
if (fIsStemNode)
cbSkip = 0;
else
cbSkip = FOFFSET_SIZE;
// Variable initialization
pNodeInfo = lpipb->BTreeData.rgpNodeInfo[cLevel];
pBuffer = pNodeInfo->pBuffer;
pTmpNodeInfo = lpipb->BTreeData.rgpTmpNodeInfo[cLevel];
pInsertPtr = pNodeInfo->pCurPtr;
dwBlockSize = lpipb->BTreeData.Header.dwBlockSize;
pLastWord = pNodeInfo->pLastWord;
// Calculate approximately the left & right side node sizes
leftSize = (WORD)(pInsertPtr - pBuffer - cbSkip - sizeof(WORD));
rightSize = (WORD)(pNodeInfo->pMaxAddress - pNodeInfo->pCurPtr);
if (leftSize >= rightSize)
{
// We add to the right. The new data will be 1st
// Example:
// Add 4 into 1 2 3 5 --> 1 2 3 and 4 5
if (fIsStemNode)
{
CopyNewDataToStemNode (lpipb, pTmpNodeInfo,
pWord, EmptyWord, cLevel, fFlag);
pTemp = pTmpNodeInfo->pBuffer + sizeof(WORD);
}
else
{
CopyNewDataToLeafNode (lpipb, pTmpNodeInfo,
pWordInfo, pWord, EmptyWord);
pTemp = pTmpNodeInfo->pBuffer + sizeof(WORD) +
FOFFSET_SIZE;
}
pWordStorage = pTmpNodeInfo->pCurPtr;
// Move back the pointer to the beginning of the word
// for future reference
pTmpNodeInfo->pCurPtr = pTemp;
if (rightSize > 0)
{
// Extract the word on the right of the insertion point
MEMCPY (pTmpNodeInfo->pTmpResult, pWord,
*(LPUW)pWord + sizeof(WORD)); // erinfox RISC patch
pInsertPtr = ExtractWord(pTmpNodeInfo->pTmpResult,
pInsertPtr, &wWLen);
pWordStorage += PrefixCompressWord (pWordStorage,
pTmpNodeInfo->pTmpResult, pWord, fLength);
// Copy data on the right of the insertion point to the new node
MEMCPY (pWordStorage, pInsertPtr,
cbByteMoved = (LONG)(pNodeInfo->pMaxAddress - pInsertPtr));
pWordStorage += cbByteMoved;
}
pTmpNodeInfo->pMaxAddress = pWordStorage;
// Update the right node
SETWORD(pTmpNodeInfo->pBuffer + cbSkip,
(WORD)(pTmpNodeInfo->cbLeft =
(LONG)(dwBlockSize - (pWordStorage - pTmpNodeInfo->pBuffer))));
pTmpNodeInfo->pMaxAddress = pTmpNodeInfo->pBuffer +
dwBlockSize - pTmpNodeInfo->cbLeft;
#if 0
if (fIsStemNode)
CheckStemNode (pTmpNodeInfo);
else
CheckLeafNode (pTmpNodeInfo, lpipb->occf);
#endif
MEMSET (pWordStorage, 0, pTmpNodeInfo->cbLeft);
if ((fRet = CreateNewNode (lpipb, cLevel,
fIsStemNode, NEW_NODE_ON_RIGHT)) != S_OK)
return(fRet);
// Update the left node
pNodeInfo->fFlag = TO_BE_UPDATE;
SETWORD(pBuffer + cbSkip, (WORD)(pNodeInfo->cbLeft =
(LONG)(dwBlockSize - (pNodeInfo->pCurPtr - pBuffer))));
#ifdef _DEBUG
MEMSET (pNodeInfo->pCurPtr, 0, pNodeInfo->cbLeft);
#endif
pNodeInfo->pMaxAddress = pBuffer + dwBlockSize - pNodeInfo->cbLeft;
pNodeInfo->fFlag = TO_BE_UPDATE;
#if 0
if (fIsStemNode)
CheckStemNode (pNodeInfo);
else
CheckLeafNode (pNodeInfo, lpipb->occf);
#endif
if (cLevel == 0)
{
if (pNodeInfo->pCurPtr >= pNodeInfo->pMaxAddress - 1)
pNodeInfo->pCurPtr = pNodeInfo->pBuffer + cbSkip + sizeof(WORD);
GetLastWordInNode (lpipb, pNodeInfo, fIsStemNode);
GetLastWordInNode (lpipb, pTmpNodeInfo, fIsStemNode);
return AddRecordToBTree (lpipb, pWord, NULL, cLevel - 1,
USE_BOTH_NODE_40 | USE_TEMP_FOR_RIGHT_NODE_10);
}
if (rightSize > 0)
{
if ((fRet = AddRecordToBTree (lpipb, pWord, NULL, cLevel - 1,
USE_TEMP_NODE_04 | UPDATE_NODE_ADDRESS_08)) != S_OK)
return fRet;
return AddRecordToBTree (lpipb, pNodeInfo->pLastWord, NULL, cLevel - 1, 0);
}
if (fFlag & REPLACE_WORD_01)
{
// rightSize == 0 means that we are adding to the end of the block.
// REPLACE_WORD means that we are replacing the same word, so basically
// we have to add a new entry for the left block
if ((fRet = AddRecordToBTree (lpipb, pWord, NULL, cLevel - 1,
USE_TEMP_NODE_04 | REPLACE_WORD_01)) != S_OK)
return fRet;
return AddRecordToBTree (lpipb, pNodeInfo->pLastWord, NULL,
cLevel - 1, 0);
}
// Add to the end
return AddRecordToBTree (lpipb, pWord, NULL, cLevel - 1,
USE_TEMP_NODE_04 | SKIP_NEXT_WORD_20);
}
//**********************************************
//
// Add the new data to the end of the leftnode
//
//**********************************************
// We add to the left. The new data will be last
// Example:
// Add 2 into 1 3 4 5 --> 1 2 and 3 4 5
pTmpNodeInfo->pCurPtr = pWordStorage =
pTmpNodeInfo->pBuffer + cbSkip + sizeof(WORD);
// Copy the data on the left to the new node
if (cbByteMoved = leftSize)
{
MEMCPY(pWordStorage, pBuffer + cbSkip + sizeof(WORD), cbByteMoved);
pWordStorage += cbByteMoved;
}
// Emit new data
pWordStorage += PrefixCompressWord (pWordStorage,
pWord, pLastWord, lpipb->occf & OCCF_LENGTH);
if (fIsStemNode)
{
if (fFlag & USE_TEMP_NODE_04)
{
pWordStorage += CopyFileOffset (pWordStorage,
lpipb->BTreeData.rgpTmpNodeInfo[cLevel+1]->nodeOffset);
}
else
{
pWordStorage += CopyFileOffset (pWordStorage,
lpipb->BTreeData.rgpNodeInfo[cLevel+1]->nodeOffset);
}
}
else
{
// Emit field id, topic count. fileoffset, datasize
if (lpipb->occf & OCCF_FIELDID)
pWordStorage += CbBytePack (pWordStorage, pWordInfo->dwFieldId);
pWordStorage += CbBytePack (pWordStorage,
pWordInfo->dwMergeTopicCount);
pWordStorage += CopyFileOffset (pWordStorage, pWordInfo->dataLocation);
pWordStorage += CbBytePack (pWordStorage, pWordInfo->dwDataSize);
}
SETWORD (pTmpNodeInfo->pBuffer + cbSkip,
(WORD)(pTmpNodeInfo->cbLeft = (LONG)(pNodeInfo->dwBlockSize
- (pWordStorage - pTmpNodeInfo ->pBuffer))));
pTmpNodeInfo->pMaxAddress = pWordStorage;
if ((fRet = CreateNewNode (lpipb, cLevel, fIsStemNode,
NEW_NODE_ON_LEFT)) != S_OK)
return(fRet);
// Update the right node
if (leftSize > 0)
{
MEMMOVE(pNodeInfo->pCurPtr = pBuffer + cbSkip + sizeof(WORD),
pInsertPtr, (size_t)(pNodeInfo->pMaxAddress - pInsertPtr));
pNodeInfo->pMaxAddress -= cbByteMoved;
// Reconstruct the 1st word in the node.
if (fFlag & REPLACE_WORD_01)
{
MEMCPY (pTmpNodeInfo->pTmpResult, pWord,
*(LPUW)pWord + sizeof(WORD) + sizeof(WORD)); // erinfox RISC patch
}
else
{
MEMCPY (pTmpNodeInfo->pTmpResult, pLastWord,
*(LPUW)pLastWord + sizeof(WORD) + sizeof(WORD)); // erinfox RISC patch
}
}
pInsertPtr = pNodeInfo->pCurPtr;
pInsertPtr = ExtractWord(pTmpNodeInfo->pTmpResult, pTemp = pInsertPtr, &wWLen);
cbByteMoved = (LONG)(pInsertPtr - pTemp);
// Recompress the word using pLastWord of pTmpNodeInfo
wWLen = (WORD) PrefixCompressWord (pTmpNodeInfo->pLastWord,
pTmpNodeInfo->pTmpResult, EmptyWord, fLength);
// Reserved room for the word
pWordStorage = pBuffer + cbSkip + sizeof(WORD);
MEMMOVE (pWordStorage + wWLen, pInsertPtr,
(size_t)(pNodeInfo->pMaxAddress - pInsertPtr));
// Copy down the word
MEMCPY(pWordStorage, pTmpNodeInfo->pLastWord, wWLen);
pNodeInfo->pMaxAddress += wWLen - cbByteMoved;
// Update the right node
SETWORD(pBuffer + cbSkip,
(WORD)(pNodeInfo->cbLeft =(WORD)(dwBlockSize -
(pNodeInfo->pMaxAddress - pBuffer))));
pNodeInfo->fFlag = TO_BE_UPDATE;
#ifdef _DEBUG
MEMSET (pNodeInfo->pMaxAddress, 0, pNodeInfo->cbLeft);
#endif
if (cLevel == 0)
{
GetLastWordInNode (lpipb, pNodeInfo, fIsStemNode);
GetLastWordInNode (lpipb, pTmpNodeInfo, fIsStemNode);
return AddRecordToBTree (lpipb, pWord, NULL, cLevel - 1,
USE_BOTH_NODE_40);
}
return AddRecordToBTree (lpipb, pWord, NULL, cLevel - 1,
USE_TEMP_NODE_04);
return(fRet);
}
VOID GetLastWordInNode (_LPIPB lpipb, PNODEINFO pNodeInfo, BOOL fIsStemNode)
{
LPB pInsertPtr = pNodeInfo->pCurPtr;
LPB pMaxAddress = pNodeInfo->pMaxAddress;
WORD wWLen;
DWORD dwTemp;
MEMCPY (pNodeInfo->pTmpResult, EmptyWord, 4);
while (pInsertPtr < pNodeInfo->pMaxAddress - 1)
{
pInsertPtr = ExtractWord(pNodeInfo->pTmpResult, pInsertPtr, &wWLen);
if (!fIsStemNode)
{
if (lpipb->occf & OCCF_FIELDID)
pInsertPtr += CbByteUnpack (&dwTemp, pInsertPtr);
if (lpipb->occf & OCCF_TOPICID)
pInsertPtr += CbByteUnpack (&dwTemp, pInsertPtr);// Topic count
}
pInsertPtr += FOFFSET_SIZE; // FileOffset
if (!fIsStemNode)
pInsertPtr += CbByteUnpack (&dwTemp, pInsertPtr);
}
}
PRIVATE HRESULT PASCAL NEAR CreateNewNode(_LPIPB lpipb, int cLevel,
int fIsStemNode, int fAfter)
{
PNODEINFO pNodeInfo;
PNODEINFO pTmpNodeInfo;
ERRB errb;
LONG dwBlockSize = lpipb->BTreeData.Header.dwBlockSize;
pNodeInfo = lpipb->BTreeData.rgpNodeInfo[cLevel];
pTmpNodeInfo = lpipb->BTreeData.rgpTmpNodeInfo[cLevel];
#ifdef _DEBUG
dwNewNodeSize += dwBlockSize;
#endif
if (!fIsStemNode)
{
// Add the new node into the linked list
if (fAfter)
CopyFileOffset (pTmpNodeInfo->pBuffer, pNodeInfo->nextNodeOffset);
else
CopyFileOffset (pTmpNodeInfo->pBuffer, pNodeInfo->nodeOffset);
}
// Write out the new node
if ((FileSeekWrite(lpipb->hfpbIdxFile, pTmpNodeInfo->pBuffer,
lpipb->foMaxOffset, dwBlockSize, &errb)) != (LONG)dwBlockSize)
return(errb);
// Remember the offset of this node
pTmpNodeInfo->nodeOffset = lpipb->foMaxOffset;
if (!fIsStemNode)
{
if (fAfter)
{
CopyFileOffset (pNodeInfo->pBuffer, lpipb->foMaxOffset);
pNodeInfo->fFlag = TO_BE_UPDATE;
}
else
{
// Update the previous link
if (!FoEquals(pNodeInfo->prevNodeOffset, foNil))
{
BYTE TempBuf[FOFFSET_SIZE + 1];
CopyFileOffset (TempBuf,lpipb->foMaxOffset);
if ((FileSeekWrite(lpipb->hfpbIdxFile, TempBuf,
pNodeInfo->prevNodeOffset, FOFFSET_SIZE,
&errb)) != FOFFSET_SIZE)
return(errb);
}
}
}
lpipb->foMaxOffset = FoAddDw (lpipb->foMaxOffset, dwBlockSize);
return(S_OK);
}
PRIVATE HRESULT PASCAL NEAR WriteNewDataRecord (_LPIPB lpipb, PWORDINFO pWordInfo)
{
PFILEDATA pOutFile = &lpipb->OutFile;
DWORD dwBlockSize;
ERRB errb;
HRESULT fRet;
FREEBLOCK FreeBlock;
// Reset the characteristic of the file
pOutFile->pCurrent = pOutFile->pMem;
pOutFile->cbLeft = pOutFile->dwMax;
pOutFile->ibit = cbitBYTE - 1;
FileSeek (pOutFile->fFile,
pOutFile->foPhysicalOffset = foNil, 0, &errb);
// Write out the data into the temp file
if ((dwBlockSize = WriteDataNode (lpipb,
pWordInfo->dwMergeTopicCount = pWordInfo->dwNewTopicCount, &errb)) == 0)
return errb;
// Write out the output buffer
if (FileWrite (pOutFile->fFile, pOutFile->pMem,
(LONG)(pOutFile->pCurrent - pOutFile->pMem), &errb) !=
(LONG) (pOutFile->pCurrent - pOutFile->pMem))
return(errb);
// if ((errb.err = FileFlush (pOutFile->fFile)) != S_OK)
// return(errb.err);
pWordInfo->dwDataSize = dwBlockSize;
// Find the smallest free block that fits the new data
if (GetFreeBlock (lpipb, &FreeBlock, dwBlockSize) != S_OK)
{
#ifdef _DEBUGFREE
_DPF2("GetFreeBlock failed. Requested %ld bytes, appending to EOF(%ld)\n", dwBlockSize, lpipb->foMaxOffset.dwOffset);
#endif
// There is no free block large enough to store the data
if ((fRet = CopyBlockFile (pOutFile, lpipb->hfpbIdxFile,
lpipb->foMaxOffset, dwBlockSize)) != S_OK)
return fRet;
pWordInfo->dataLocation = lpipb->foMaxOffset;
lpipb->foMaxOffset = FoAddDw (lpipb->foMaxOffset, dwBlockSize);
#ifdef _DEBUG
dwNewDataSize += dwBlockSize;
#endif
return(S_OK);
}
// There is a free block large enough to store the data
if ((fRet = CopyBlockFile (pOutFile, lpipb->hfpbIdxFile,
FreeBlock.foBlockOffset, dwBlockSize)) != S_OK)
return fRet;
pWordInfo->dataLocation = FreeBlock.foBlockOffset;
return S_OK;
}
// erinfox: return a block from the free list if possible
PRIVATE HRESULT GetFreeBlock (_LPIPB lpipb, PFREEBLOCK pFreeBlock,
DWORD dwBlockSize)
{
FILEOFFSET foFreeListOffset;
ERRB errb;
// if it can't find a free block, it returns an error
foFreeListOffset = FreeListGetBestFit(lpipb->hFreeList, MakeFo(dwBlockSize,0), &errb);
if (FoIsNil(foFreeListOffset))
{
return errb;
}
pFreeBlock->foBlockOffset = foFreeListOffset;
return S_OK;
}
PRIVATE HRESULT PASCAL NEAR CopyBlockFile (PFILEDATA pFileData, HFPB hfpbDest,
FILEOFFSET foOffset, DWORD dwBlockSize)
{
LONG cbCopied;
ERRB errb;
// Initialize variable
errb = S_OK;
// Seek to the right locations
FileSeek (pFileData->fFile, foNil, 0, &errb);
if (errb != S_OK)
return(errb);
FileSeek (hfpbDest, foOffset, 0, &errb);
if (errb != S_OK)
return(errb);
// Do the copy
while (dwBlockSize)
{
if ((cbCopied = dwBlockSize) > pFileData->dwMax)
cbCopied = pFileData->dwMax;
if (FileRead (pFileData->fFile, pFileData->pMem, cbCopied, &errb) !=
cbCopied)
return(E_FILEREAD);
if (FileWrite(hfpbDest, pFileData->pMem, cbCopied, &errb) != cbCopied)
return(E_FILEWRITE);
dwBlockSize -= cbCopied;
}
return(S_OK);
}
PRIVATE HRESULT PASCAL NEAR UpdateDataNode (_LPIPB lpipb, PWORDINFO pWordInfo)
{
// Local replacement Variables
PBTREEDATA pTreeData = &lpipb->BTreeData;
PFILEDATA pOutFile = &lpipb->OutFile; // Output data structure
PFILEDATA pInFile = &lpipb->InFile; // Input data structre
HFPB fFile = pOutFile->fFile; // Output file handle
PNODEINFO pIndexDataNode = lpipb->pIndexDataNode;
DWORD dwNewDataSize;
ERRB errb;
// Working Variables
DWORD dwEncodedSize = 0; // Size of encoded block
DWORD dwTopicIdDelta; // Really only used for weight values
DWORD dwNewTopicId = 0;
DWORD dwIndexTopicId = 0;
DWORD dwNewTopicCount;
DWORD dwIndexTopicCount;
DWORD dwTopicCount;
FILEOFFSET foStart; // Physical beginning of bit compression block
WORD wWeight = 0; // Only used when IDXF_NORMALIZE is set
DWORD dwTopicId = 0; // Only used when IDXF_NORMALIZE is set
int cbTemp; // # of compressed bytes that uncompressed
OCCF occf = lpipb->occf;
BYTE fetchOldData;
BYTE fetchNewData;
PIH20 pHeader = &lpipb->BTreeData.Header;
HRESULT fRet;
// Initialize variables
wWeight = 0; // UNDONE: Don't need it
// Reset the file pointer
FileSeek (pOutFile->fFile,
foStart = pOutFile->foPhysicalOffset = foNil, 0, &errb);
pOutFile->pCurrent = pOutFile->pMem;
pOutFile->cbLeft = pOutFile->dwMax;
pOutFile->ibit = cbitBYTE - 1;
dwIndexTopicCount = pWordInfo->dwIndexTopicCount;
dwNewTopicCount = pWordInfo->dwNewTopicCount;
fetchOldData = fetchNewData = TRUE;
pWordInfo->dwOldTopicId = pWordInfo->dwNewTopicId = dwTopicCount = 0;
// Initialize pIndexDataNode structure
pIndexDataNode->nodeOffset = pWordInfo->dataLocation;
pIndexDataNode->dwDataSizeLeft = pWordInfo->dwDataSize;
if ((fRet = ReadNewData(pIndexDataNode)) != S_OK)
return(fRet);
while (dwIndexTopicCount && dwNewTopicCount)
{
// Get the topicId from the new file
if (fetchNewData)
{
if (pInFile->cbLeft < 2 * sizeof (DWORD))
{
MEMMOVE (pInFile->pMem, pInFile->pCurrent, pInFile->cbLeft);
pInFile->cbLeft += FileRead (pInFile->fFile, pInFile->pMem +
pInFile->cbLeft, pInFile->dwMax - pInFile->cbLeft,
&errb);
pInFile->dwMax = pInFile->cbLeft;
pInFile->pCurrent = pInFile->pMem;
}
cbTemp = CbByteUnpack (&dwTopicIdDelta, pInFile->pCurrent);
pInFile->pCurrent += cbTemp;
pInFile->cbLeft -= cbTemp;
pWordInfo->dwNewTopicId = (dwNewTopicId += dwTopicIdDelta);
fetchNewData = FALSE;
}
if (fetchOldData)
{
if (pIndexDataNode->ibit < cbitBYTE - 1)
{
pIndexDataNode->ibit = cbitBYTE - 1;
pIndexDataNode->pCurPtr ++;
}
// Get the topicId from the index file
if ((fRet = FGetDword(pIndexDataNode, pHeader->ckeyTopicId,
&dwTopicIdDelta)) != S_OK)
return fRet;
pWordInfo->dwIndexTopicId = (dwIndexTopicId += dwTopicIdDelta);
fetchOldData = FALSE;
}
if (dwIndexTopicId < dwNewTopicId)
{
if ((fRet = EmitOldData (lpipb, pIndexDataNode,
pWordInfo)) != S_OK)
return(fRet);
fetchOldData = TRUE;
dwTopicCount++;
dwIndexTopicCount --;
}
else if (dwIndexTopicId == dwNewTopicId)
{
DWORD dwTmp;
if (lpipb->idxf & IDXF_NORMALIZE)
{
if ((fRet = FGetBits(pIndexDataNode, &dwTmp,
sizeof (USHORT) * cbitBYTE)) != S_OK)
return fRet;
}
if (occf & OCCF_HAVE_OCCURRENCE)
{
if ((fRet = SkipOldData (lpipb, pIndexDataNode)) != S_OK)
return(fRet);
}
fetchOldData = TRUE;
dwIndexTopicCount --;
if ((fRet = EmitNewData (lpipb, pWordInfo, FALSE)) != S_OK)
return(fRet);
dwNewTopicCount --;
fetchNewData = TRUE;
dwTopicCount++;
}
else
{
if ((fRet = EmitNewData (lpipb, pWordInfo, TRUE)) != S_OK)
return(fRet);
dwNewTopicCount --;
fetchNewData = TRUE;
pWordInfo->dwIndexTopicCount++;
dwTopicCount++;
}
}
while (dwIndexTopicCount)
{
if (fetchOldData)
{
if (pIndexDataNode->ibit < cbitBYTE - 1)
{
pIndexDataNode->ibit = cbitBYTE - 1;
pIndexDataNode->pCurPtr ++;
}
// Get the topicId from the index file
if ((fRet = FGetDword(pIndexDataNode, pHeader->ckeyTopicId,
&dwTopicIdDelta)) != S_OK)
return fRet;
pWordInfo->dwIndexTopicId = (dwIndexTopicId += dwTopicIdDelta);
fetchOldData = FALSE;
}
if ((fRet = EmitOldData (lpipb, pIndexDataNode,
pWordInfo)) != S_OK)
return(fRet);
fetchOldData = TRUE;
dwIndexTopicCount --;
dwTopicCount++;
}
while (dwNewTopicCount)
{
// Get the topicId from the new file
if (fetchNewData)
{
if (pInFile->cbLeft < 2 * sizeof (DWORD))
{
MEMMOVE (pInFile->pMem, pInFile->pCurrent, pInFile->cbLeft);
pInFile->cbLeft += FileRead (pInFile->fFile, pInFile->pMem +
pInFile->cbLeft, pInFile->dwMax - pInFile->cbLeft,
&errb);
pInFile->dwMax = pInFile->cbLeft;
pInFile->pCurrent = pInFile->pMem;
}
cbTemp = CbByteUnpack (&dwTopicIdDelta, pInFile->pCurrent);
pInFile->pCurrent += cbTemp;
pInFile->cbLeft -= cbTemp;
pWordInfo->dwNewTopicId = (dwNewTopicId += dwTopicIdDelta);
fetchNewData = FALSE;
}
if ((fRet = EmitNewData (lpipb, pWordInfo, TRUE)) != S_OK)
return(fRet);
fetchNewData = TRUE;
dwNewTopicCount --;
dwTopicCount++;
pWordInfo->dwIndexTopicCount++;
}
// Adjust for some bits used
if (pOutFile->ibit < cbitBYTE - 1)
{
pOutFile->pCurrent++;
pOutFile->cbLeft--;
pOutFile->foPhysicalOffset = FoAddDw (pOutFile->foPhysicalOffset, 1);
}
// Flush the output buffer
if (FileWrite (pOutFile->fFile, pOutFile->pMem,
(LONG)(pOutFile->pCurrent - pOutFile->pMem), &errb) !=
(LONG)(pOutFile->pCurrent - pOutFile->pMem))
return(errb);
dwNewDataSize = DwSubFo(pOutFile->foPhysicalOffset, foStart);
if (pWordInfo->dwDataSize < dwNewDataSize)
{
// ERIC: Find the best fit block here
// - Add the block pointed by pWordInfo into the free list
// - Find a new block in the freelist
// if ((fRet = CopyBlockFile (pOutFile, lpipb->hfpbIdxFile,
// foNewDataOffset, dwNewDataSize)) != S_OK)
// where foNewDataOffset may be the max offset or the freelist
// block offset
FILEOFFSET foOffset1, foNewDataOffset;
WORD wNumBlocksTemp;
WORD wMaxBlocksTemp;
// Before adding that block to the FreeList,
// look if we need to change the size of the FreeList
QFREELIST qFreeList = _GLOBALLOCK(lpipb->hFreeList);
wNumBlocksTemp = qFreeList->flh.wNumBlocks;
wMaxBlocksTemp = qFreeList->flh.wMaxBlocks;
_GLOBALUNLOCK(lpipb->hFreeList);
// we use a count of two in the test below, in case not only old block is added but
// also an entry for the unused portion of the new block (later).
if (wMaxBlocksTemp < 2 || wNumBlocksTemp >= wMaxBlocksTemp - 2)
{
HFREELIST hFreeListTemp;
// if the free list can't grow, fall through to FreeListAdd, where the
// smallest free entry will be overwritten and re-used
if (wMaxBlocksTemp < MAXWORD - wDefaultFreeListSize)
{
hFreeListTemp = FreeListRealloc(lpipb->hFreeList,
(WORD)(wMaxBlocksTemp + wDefaultFreeListSize),
&errb);
if (errb != S_OK)
return errb;
lpipb->hFreeList = hFreeListTemp;
}
}
FreeListAdd(lpipb->hFreeList, pWordInfo->dataLocation, MakeFo(pWordInfo->dwDataSize,0));
foNewDataOffset = FreeListGetBestFit(lpipb->hFreeList, MakeFo(dwNewDataSize,0), &errb);
if (FoIsNil(foNewDataOffset))
{
#ifdef _DEBUGFREE
_DPF2("UpdateDataNode: Grow from %ld to %ld failed: appending to EOF\n", pWordInfo->dwDataSize,\
dwNewDataSize);
#endif
foNewDataOffset = lpipb->foMaxOffset;
}
else
{
#ifdef _DEBUGFREE
_DPF3("UpdateDataNode: Grow from %ld to %ld uses free block at %ld\n", pWordInfo->dwDataSize,\
dwNewDataSize, foNewDataOffset.dwOffset );
#endif
foOffset1 = FreeListGetBlockAt(lpipb->hFreeList, foNewDataOffset, &errb);
if (FoCompare(foOffset1,MakeFo(sizeof(FREELIST),0)) > 0)
FreeListAdd(lpipb->hFreeList, FoAddDw(foNewDataOffset,dwNewDataSize),
FoSubFo(foOffset1,MakeFo(dwNewDataSize,0)));
}
if ((fRet = CopyBlockFile (pOutFile, lpipb->hfpbIdxFile,
foNewDataOffset, dwNewDataSize)) != S_OK)
return fRet;
pWordInfo->dataLocation = foNewDataOffset;
//if ((fRet = CopyBlockFile (pOutFile, lpipb->hfpbIdxFile,
// lpipb->foMaxOffset, dwNewDataSize)) != S_OK)
// return fRet;
//pWordInfo->dataLocation = lpipb->foMaxOffset;
// ERIC: Only increase the size of the file if foMaxOffset is used
if (FoEquals(foNewDataOffset,lpipb->foMaxOffset))
{
lpipb->foMaxOffset = FoAddDw (lpipb->foMaxOffset, dwNewDataSize);
#ifdef _DEBUG
dwOldDataLoss += pWordInfo->dwDataSize;
dwOldDataNeed += dwNewDataSize;
#endif
}
pWordInfo->dwDataSize = dwNewDataSize;
}
else
{
if ((fRet = CopyBlockFile (pOutFile, lpipb->hfpbIdxFile,
pWordInfo->dataLocation, dwNewDataSize)) != S_OK)
return fRet;
}
pWordInfo->dwMergeTopicCount = dwTopicCount;
return(S_OK);
}
PUBLIC HRESULT PASCAL FAR SkipOldData (_LPIPB lpipb, PNODEINFO pIndexDataNode)
{
HRESULT fRet;
DWORD dwOccs;
DWORD dwTmp; // Trash variable.
OCCF occf = lpipb->occf;
PIH20 pHeader = &lpipb->BTreeData.Header;
// Get the number of occurrences
if ((fRet = FGetDword(pIndexDataNode, pHeader->ckeyOccCount,
&dwOccs)) != S_OK)
return fRet;
//
// One pass through here for each occurence in the
// current sub-list.
//
for (; dwOccs; dwOccs--)
{
//
// Keeping word-counts? If so, get it.
//
if (occf & OCCF_COUNT)
{
if ((fRet = FGetDword(pIndexDataNode, pHeader->ckeyWordCount,
&dwTmp)) != S_OK)
{
return fRet;
}
}
//
// Keeping byte-offsets? If so, get it.
//
if (occf & OCCF_OFFSET)
{
if ((fRet = FGetDword(pIndexDataNode, pHeader->ckeyOffset,
&dwTmp)) != S_OK)
return fRet;
}
}
return S_OK;
}
PRIVATE HRESULT PASCAL FAR EmitNewData (_LPIPB lpipb, PWORDINFO pWordInfo,
BOOL fnewData)
{
DWORD dwTopicDelta;
DWORD dwOccs = 0;
DWORD dwTemp;
WORD wWeight = 0;
PBTREEDATA pTreeData = &lpipb->BTreeData;
PFILEDATA pInFile = &lpipb->InFile;
PFILEDATA pOutFile = &lpipb->OutFile;
OCCF occf = lpipb->occf;
PIH20 pHeader = &lpipb->BTreeData.Header;
int cbTemp;
ERRB errb;
HRESULT fRet;
// Set the delta
dwTopicDelta = pWordInfo->dwNewTopicId - pWordInfo->dwOldTopicId;
pWordInfo->dwOldTopicId = pWordInfo->dwNewTopicId;
if (pOutFile->ibit < cbitBYTE - 1)
{
pOutFile->pCurrent++;
pOutFile->cbLeft--;
pOutFile->foPhysicalOffset = FoAddDw (pOutFile->foPhysicalOffset, 1);
pOutFile->ibit = cbitBYTE - 1;
}
FAddDword (pOutFile, dwTopicDelta, pHeader->ckeyTopicId);
if (occf & OCCF_HAVE_OCCURRENCE)
{
// Get number of occ data records for this topic
if (pInFile->cbLeft < 2 * sizeof (DWORD))
{
MEMMOVE (pInFile->pMem, pInFile->pCurrent, pInFile->cbLeft);
pInFile->cbLeft += FileRead (pInFile->fFile,
pInFile->pMem + pInFile->cbLeft,
pInFile->dwMax - pInFile->cbLeft, &errb);
pInFile->dwMax = pInFile->cbLeft;
pInFile->pCurrent = pInFile->pMem;
}
cbTemp = CbByteUnpack (&dwOccs, pInFile->pCurrent);
pInFile->pCurrent += cbTemp;
pInFile->cbLeft -= cbTemp;
}
// If we are term weighing we have to calculate the weight
if (lpipb->idxf & IDXF_NORMALIZE)
{
FLOAT rLog;
FLOAT rTerm;
FLOAT rWeight;
FLOAT fOcc;
#ifndef ISBU_IR_CHANGE
rLog = (float) log10(cHundredMillion/(double)pWordInfo->dwIndexTopicCount);
rTerm = rLog*rLog;
if (fnewData)
{
fOcc = (float) min(cTFThreshold, dwOccs);
// Add the new factor into the sigma term
lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId] *=
lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId];
lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId] += fOcc * fOcc * rTerm;
lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId] =
(float)(sqrt((double)lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId]));
}
// NOTE : The following weight computation, until the assignment to wWeight, is
// very similar to the weight computation in WriteDataNode() of permind2.c file.
// Read the explanation there for the hard coded figures and logic appearing below.
rTerm = (float) (8.0 - log10((double)pWordInfo->dwIndexTopicCount));
// In extreme cases, rTerm could be 0 or even -ve (when dwTopicCount approaches or
// exceeds 100,000,000)
if (rTerm <= (float) 0.0)
rTerm = cVerySmallWt; // very small value. == log(100 mil/ 95 mil)
rWeight = ((float) min(cTFThreshold, dwOccs)) * rTerm * rTerm / lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId];
// without the additional rTerm, we would probably be between 0.0 and 1.0
if (rWeight > rTerm)
wWeight = 0xFFFF;
else
wWeight = (WORD) ((float)0xFFFF * rWeight / rTerm);
#else
rLog = (float)(1.0) / (float)pWordInfo->dwIndexTopicCount;
rTerm = rLog * rLog;
if (fnewData)
{
// Add the new factor into the sigma term
lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId] *=
lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId];
lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId] +=
dwOccs * rTerm;
lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId] =
(float)(sqrt((double)lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId]));
}
rTerm = rTerm * (float)65535.0;
rWeight = (float)dwOccs * rTerm /
(float)(lpipb->wi.hrgsigma[pWordInfo->dwNewTopicId]);
if (rWeight >= 65535.0)
wWeight = 65335;
else
wWeight = (WORD)rWeight;
#endif // ISBU_IR_CHANGE
// Write the weight to the output buffer
if ((fRet = FWriteBits (pOutFile, (DWORD)wWeight,
(BYTE)(sizeof (WORD) * cbitBYTE))) != S_OK)
return fRet;
}
if ((occf & OCCF_HAVE_OCCURRENCE) == 0)
return(S_OK);
// Write the OccCount
FAddDword (pOutFile, dwOccs, pHeader->ckeyOccCount);
// Encode the occ block
for (; dwOccs; dwOccs--)
{
// Make sure input buffer holds enough data
if (pInFile->cbLeft < 5 * sizeof (DWORD))
{
MEMMOVE (pInFile->pMem, pInFile->pCurrent, pInFile->cbLeft);
pInFile->cbLeft += FileRead (pInFile->fFile,
pInFile->pMem + pInFile->cbLeft,
pInFile->dwMax - pInFile->cbLeft, &errb);
pInFile->dwMax = pInFile->cbLeft;
pInFile->pCurrent = pInFile->pMem;
}
if (occf & OCCF_COUNT)
{
cbTemp = CbByteUnpack (&dwTemp, pInFile->pCurrent);
pInFile->pCurrent += cbTemp;
pInFile->cbLeft -= cbTemp;
if ((fRet = FAddDword (pOutFile, dwTemp, pHeader->ckeyWordCount))
!= S_OK)
return(fRet);
}
if (occf & OCCF_OFFSET)
{
cbTemp = CbByteUnpack (&dwTemp, pInFile->pCurrent);
pInFile->pCurrent += cbTemp;
pInFile->cbLeft -= cbTemp;
if ((fRet = FAddDword (pOutFile, dwTemp, pHeader->ckeyOffset))
!= S_OK)
return(fRet);
}
}
return(S_OK);
}
PRIVATE HRESULT PASCAL FAR EmitOldData (_LPIPB lpipb, PNODEINFO pIndexDataNode,
PWORDINFO pWordInfo)
{
DWORD dwTopicDelta;
DWORD dwOccs;
DWORD dwTmp;
WORD wWeight = 0;
PFILEDATA pOutFile = &lpipb->OutFile;
OCCF occf = lpipb->occf;
HRESULT fRet;
PIH20 pHeader = &lpipb->BTreeData.Header;
if (pOutFile->ibit < cbitBYTE - 1)
{
pOutFile->pCurrent++;
pOutFile->cbLeft--;
pOutFile->foPhysicalOffset = FoAddDw (pOutFile->foPhysicalOffset, 1);
pOutFile->ibit = cbitBYTE - 1;
}
// Set the delta
dwTopicDelta = pWordInfo->dwIndexTopicId - pWordInfo->dwOldTopicId;
pWordInfo->dwOldTopicId = pWordInfo->dwIndexTopicId;
if ((fRet = FAddDword (pOutFile, dwTopicDelta,
pHeader->ckeyTopicId)) != S_OK)
return(fRet);
// If we are term weighing we have to calculate the weight
if (lpipb->idxf & IDXF_NORMALIZE)
{
if ((fRet = FGetBits(pIndexDataNode, &dwTmp, sizeof (USHORT) * cbitBYTE))
!= S_OK)
return(fRet);
// Write the weight to the output buffer
if ((fRet = FWriteBits (pOutFile, (DWORD)wWeight,
(BYTE)(sizeof (WORD) * cbitBYTE))) != S_OK)
return(fRet);
}
// Don't do anything else if there is nothing else to do!!!
if ((occf & OCCF_HAVE_OCCURRENCE) == 0)
return S_OK;
if ((fRet = FGetDword(pIndexDataNode, pHeader->ckeyOccCount,
&dwOccs)) != S_OK)
return fRet;
// Write the OccCount
if ((fRet = FAddDword (pOutFile, dwOccs,
pHeader->ckeyOccCount)) != S_OK)
return(fRet);
// Encode the occ block
for (; dwOccs; dwOccs--)
{
if (occf & OCCF_COUNT)
{
if ((fRet = FGetDword(pIndexDataNode, pHeader->ckeyWordCount,
&dwTmp)) != S_OK)
return fRet;
if ((fRet = FAddDword (pOutFile, dwTmp, pHeader->ckeyWordCount))
!= S_OK)
return(fRet);
}
if (occf & OCCF_OFFSET)
{
if ((fRet = FGetDword(pIndexDataNode, pHeader->ckeyOffset,
&dwTmp)) != S_OK)
return fRet;
if ((fRet = FAddDword (pOutFile, dwTmp, pHeader->ckeyOffset))
!= S_OK)
return(fRet);
}
}
return(S_OK);
}
PRIVATE int PASCAL NEAR CopyNewDataToStemNode (_LPIPB lpipb,
PNODEINFO pTmpNode, LPB pWord, LPB pLastWord, int cLevel, int fFlag)
{
LPB pWordStorage;
/************************************************
* Emit the word data to the temp block
************************************************/
pWordStorage = pTmpNode->pBuffer + sizeof(WORD);
pWordStorage += PrefixCompressWord (pWordStorage,
pWord, pLastWord, lpipb->occf & OCCF_LENGTH);
// Emit fileoffset
if (fFlag & USE_TEMP_NODE_04)
{
pWordStorage += CopyFileOffset (pWordStorage,
lpipb->BTreeData.rgpTmpNodeInfo[cLevel+1]->nodeOffset);
}
else
{
pWordStorage += CopyFileOffset (pWordStorage,
lpipb->BTreeData.rgpNodeInfo[cLevel+1]->nodeOffset);
}
pTmpNode->pCurPtr = pWordStorage;
SETWORD (pTmpNode->pBuffer, (WORD)(lpipb->BTreeData.Header.dwBlockSize
- (pWordStorage - pTmpNode->pBuffer)));
return (int)(pWordStorage - pTmpNode->pBuffer);
}
PRIVATE int PASCAL NEAR CopyNewDataToLeafNode (_LPIPB lpipb, PNODEINFO pTmpNode,
PWORDINFO pWordInfo, LPB pWord, LPB pLastWord)
{
LPB pWordStorage;
/************************************************
* Emit the word data to the temp block
************************************************/
pWordStorage = pTmpNode->pBuffer + FOFFSET_SIZE + sizeof(WORD);
pWordStorage += PrefixCompressWord (pWordStorage,
pWord, pLastWord, lpipb->occf & OCCF_LENGTH);
// Emit field id, topic count. fileoffset, datasize
if (lpipb->occf & OCCF_FIELDID)
pWordStorage += CbBytePack (pWordStorage, pWordInfo->dwFieldId);
pWordStorage += CbBytePack (pWordStorage,
pWordInfo->dwMergeTopicCount);
pWordStorage += CopyFileOffset (pWordStorage, pWordInfo->dataLocation);
pWordStorage += CbBytePack (pWordStorage, pWordInfo->dwDataSize);
pTmpNode->pCurPtr = pWordStorage;
SETWORD (pTmpNode->pBuffer + FOFFSET_SIZE,
(WORD)(pTmpNode->cbLeft = (LONG)(lpipb->BTreeData.Header.dwBlockSize
- (pWordStorage - pTmpNode->pBuffer))));
return (int)(pWordStorage - pTmpNode->pBuffer);
}
PRIVATE HRESULT PASCAL FAR SkipNewData (_LPIPB lpipb, PWORDINFO pWordInfo)
{
DWORD dwOccs;
DWORD dwTemp;
PBTREEDATA pTreeData = &lpipb->BTreeData;
PFILEDATA pInFile = &lpipb->InFile;
PFILEDATA pOutFile = &lpipb->OutFile;
OCCF occf = lpipb->occf;
PIH20 pHeader = &lpipb->BTreeData.Header;
int cbTemp;
ERRB errb;
// Don't do anything else if there is nothing else to do!!!
if ((occf & OCCF_HAVE_OCCURRENCE) == 0)
return S_OK;
// Get number of occ data records for this topic
if (pInFile->cbLeft < 2 * sizeof (DWORD))
{
MEMMOVE (pInFile->pMem, pInFile->pCurrent, pInFile->cbLeft);
pInFile->cbLeft += FileRead (pInFile->fFile,
pInFile->pMem + pInFile->cbLeft,
pInFile->dwMax - pInFile->cbLeft, &errb);
pInFile->dwMax = pInFile->cbLeft;
pInFile->pCurrent = pInFile->pMem;
}
cbTemp = CbByteUnpack (&dwOccs, pInFile->pCurrent);
pInFile->pCurrent += cbTemp;
pInFile->cbLeft -= cbTemp;
// Encode the occ block
for (; dwOccs; dwOccs--)
{
// Make sure input buffer holds enough data
if (pInFile->cbLeft < 5 * sizeof (DWORD))
{
MEMMOVE (pInFile->pMem, pInFile->pCurrent, pInFile->cbLeft);
pInFile->cbLeft += FileRead (pInFile->fFile,
pInFile->pMem + pInFile->cbLeft,
pInFile->dwMax - pInFile->cbLeft, &errb);
pInFile->dwMax = pInFile->cbLeft;
pInFile->pCurrent = pInFile->pMem;
}
if (occf & OCCF_COUNT)
{
cbTemp = CbByteUnpack (&dwTemp, pInFile->pCurrent);
pInFile->pCurrent += cbTemp;
pInFile->cbLeft -= cbTemp;
}
if (occf & OCCF_OFFSET)
{
cbTemp = CbByteUnpack (&dwTemp, pInFile->pCurrent);
pInFile->pCurrent += cbTemp;
pInFile->cbLeft -= cbTemp;
}
}
return(S_OK);
}
BYTE CurrentWord [1000];
BYTE LastWord [1000];
#if 0
HRESULT CheckStemNode (PNODEINFO pNodeInfo)
{
LPB lpCurPtr;
WORD wWlen;
LPB lpMaxAddress = pNodeInfo->pMaxAddress;
FILEOFFSET nodeOffset;
lpCurPtr = pNodeInfo->pBuffer + sizeof(WORD);
// Reset the last word
*(LPWORD)LastWord = 0;
do
{
lpCurPtr = ExtractWord(CurrentWord, lpCurPtr, &wWlen);
if (StrCmpPascal2(LastWord, CurrentWord) > 0)
{
// _asm int 3;
return(SetErrCode (NULL, ERR_FAILED));
}
lpCurPtr += ReadFileOffset (&nodeOffset, lpCurPtr);
MEMCPY(LastWord, CurrentWord, wWlen + 2);
} while (lpCurPtr < lpMaxAddress);
return(S_OK);
}
HRESULT CheckLeafNode (PNODEINFO pNodeInfo, int occf)
{
LPB lpCurPtr;
WORD wWlen;
LPB lpMaxAddress = pNodeInfo->pMaxAddress;
FILEOFFSET nodeOffset;
DWORD dwTmp;
lpCurPtr = pNodeInfo->pBuffer + sizeof(WORD) + FOFFSET_SIZE;
// Reset the last word
*(LPWORD)LastWord = 0;
do
{
lpCurPtr = ExtractWord(CurrentWord, lpCurPtr, &wWlen);
if (StrCmpPascal2(LastWord, CurrentWord) > 0)
{
// _asm int 3;
return(SetErrCode (NULL, ERR_FAILED));
}
MEMCPY(LastWord, CurrentWord, wWlen + 2);
// Get fieldif and topic count
if (occf & OCCF_FIELDID)
lpCurPtr += CbByteUnpack (&dwTmp, lpCurPtr);
lpCurPtr += CbByteUnpack (&dwTmp, lpCurPtr);
// Get the data location and size
lpCurPtr += ReadFileOffset (&nodeOffset, lpCurPtr);
lpCurPtr += CbByteUnpack(&dwTmp, lpCurPtr);
} while (lpCurPtr < lpMaxAddress);
return(S_OK);
}
#endif