779 lines
21 KiB
C++
779 lines
21 KiB
C++
/*************************************************************************
|
|
* @doc SHROOM INTERNAL API *
|
|
* *
|
|
* IDXOBR.CPP *
|
|
* *
|
|
* Copyright (C) Microsoft Corporation 1997 *
|
|
* All Rights reserved. *
|
|
* *
|
|
* This file contains the implementation of CITIndexObjBridge, *
|
|
* which is a class used by CITIndexLocal to allow the old .c *
|
|
* search internals to call the new COM-based breaker and stemmer *
|
|
* objects.
|
|
* *
|
|
* *
|
|
**************************************************************************
|
|
* *
|
|
* Written By : Bill Aloof *
|
|
* Current Owner: billa *
|
|
* *
|
|
**************************************************************************/
|
|
#include <mvopsys.h>
|
|
|
|
#ifdef _DEBUG
|
|
static char s_aszModule[] = __FILE__; /* For error report */
|
|
#endif
|
|
|
|
|
|
#include <atlinc.h>
|
|
|
|
// MediaView (InfoTech) includes
|
|
#include <orkin.h>
|
|
#include <groups.h>
|
|
|
|
#include <itquery.h>
|
|
#include <itcat.h>
|
|
#include <itwbrk.h>
|
|
#include <itwbrkid.h>
|
|
#include "indeximp.h"
|
|
#include "queryimp.h"
|
|
#include "mvsearch.h"
|
|
#include "idxobr.h"
|
|
#include "common.h"
|
|
|
|
|
|
//---------------------------------------------------------------------------
|
|
// Constructor and Destructor
|
|
//---------------------------------------------------------------------------
|
|
|
|
|
|
CITIndexObjBridge::CITIndexObjBridge()
|
|
{
|
|
m_cRef = 0;
|
|
m_piwbrk = NULL;
|
|
m_piwbrkc = NULL;
|
|
m_pistem = NULL;
|
|
m_piitstwdl = NULL;
|
|
m_pexbrkpm = NULL;
|
|
m_fNormWord = FALSE;
|
|
m_dwCodePageID = 0;
|
|
m_hmemSrc = m_hmemDestNorm = m_hmemDestRaw = NULL;
|
|
m_cbBufSrcCur = m_cbBufDestNormCur = m_cbBufDestRawCur = 0;
|
|
m_lpsipbTermHit = NULL;
|
|
}
|
|
|
|
CITIndexObjBridge::~CITIndexObjBridge()
|
|
{
|
|
if (m_cRef > 0)
|
|
{
|
|
ITASSERT(FALSE);
|
|
}
|
|
|
|
if (m_hmemSrc != NULL)
|
|
{
|
|
_GLOBALFREE(m_hmemSrc);
|
|
m_hmemSrc = NULL;
|
|
m_cbBufSrcCur = 0;
|
|
}
|
|
|
|
if (m_hmemDestNorm != NULL)
|
|
{
|
|
_GLOBALFREE(m_hmemDestNorm);
|
|
m_hmemDestNorm = NULL;
|
|
m_cbBufDestNormCur = 0;
|
|
}
|
|
|
|
if (m_hmemDestRaw != NULL)
|
|
{
|
|
_GLOBALFREE(m_hmemDestRaw);
|
|
m_hmemDestRaw = NULL;
|
|
m_cbBufDestRawCur = 0;
|
|
}
|
|
|
|
if (m_piwbrk != NULL)
|
|
{
|
|
m_piwbrk->Release();
|
|
m_piwbrk = NULL;
|
|
}
|
|
|
|
if (m_piwbrkc != NULL)
|
|
{
|
|
m_piwbrkc->Release();
|
|
m_piwbrkc = NULL;
|
|
}
|
|
|
|
if (m_pistem != NULL)
|
|
{
|
|
m_pistem->Release();
|
|
m_pistem = NULL;
|
|
}
|
|
|
|
if (m_piitstwdl != NULL)
|
|
{
|
|
m_piitstwdl->Release();
|
|
m_piitstwdl = NULL;
|
|
}
|
|
|
|
MVStopListDispose(m_lpsipbTermHit);
|
|
}
|
|
|
|
|
|
//---------------------------------------------------------------------------
|
|
// IUnknown Method Implementations
|
|
//---------------------------------------------------------------------------
|
|
|
|
// NOTE: This implementation of IUnknown assumes that this object is used
|
|
// only in a local context, meaning that no piece of code will hold onto
|
|
// an IUnknown pointer obtained via QueryInterface beyond the scope that
|
|
// an instance of this object was created in. For example, this object
|
|
// will very likely be created/destroyed in the same method. That's why
|
|
// there's no controlling IUnknown for us to forward AddRef's and Release's
|
|
// to. It is also the reason that IUnknown::Release doesn't call the
|
|
// class's destructor when the ref count goes to 0.
|
|
|
|
STDMETHODIMP
|
|
CITIndexObjBridge::QueryInterface(REFIID riid, LPVOID *ppvObj)
|
|
{
|
|
HRESULT hr = S_OK;
|
|
void *pvObj = NULL;
|
|
|
|
if (ppvObj == NULL)
|
|
return (SetErrReturn(E_POINTER));
|
|
|
|
if (riid == IID_IWordSink)
|
|
pvObj = (void *)((PIWRDSNK) this);
|
|
else if (riid == IID_IStemSink)
|
|
pvObj = (void *)((PISTEMSNK) this);
|
|
else if (riid == IID_IUnknown)
|
|
pvObj = (void *)((IUnknown *) ((PIWRDSNK) this));
|
|
|
|
if (pvObj != NULL)
|
|
*ppvObj = pvObj;
|
|
else
|
|
hr = E_NOINTERFACE;
|
|
|
|
return (hr);
|
|
}
|
|
|
|
|
|
STDMETHODIMP_(ULONG)
|
|
CITIndexObjBridge::AddRef(void)
|
|
{
|
|
return (++m_cRef);
|
|
}
|
|
|
|
|
|
STDMETHODIMP_(ULONG)
|
|
CITIndexObjBridge::Release(void)
|
|
{
|
|
if (m_cRef > 0)
|
|
--m_cRef;
|
|
else
|
|
{
|
|
ITASSERT(FALSE);
|
|
}
|
|
|
|
return (m_cRef);
|
|
}
|
|
|
|
|
|
//---------------------------------------------------------------------------
|
|
// IWordSink Method Implementations
|
|
//---------------------------------------------------------------------------
|
|
|
|
|
|
/*****************************************************************
|
|
* @method STDMETHODIMP | IWordSink | PutWord |
|
|
* This method notifies IWordSink of a new word.
|
|
*
|
|
* @parm WCHAR const | *pwcInBuf | Pointer to the word to add.
|
|
* @parm ULONG |cwc | Count of characters in the word.
|
|
* @parm ULONG |cwcSrcLen | count of characters in pTextSource buffer
|
|
* (see <om IWordBreaker.BreakText>) that corresponds to the output word
|
|
* @parm ULONG |cwcSrcPos | the position of the word in pTextSource
|
|
* buffer that corresponds to the output word
|
|
*
|
|
* @rvalue S_OK | The operation completed successfully.
|
|
* @rvalue E_POINTER | Input buffer is NULL.
|
|
*
|
|
* @comm The values of <p cwcSrcLen> and <p cwcSrcPos> are used by the
|
|
* ISearch interface, which given a query and a text source, will highlight
|
|
* all hits within the text source that match the query. The location
|
|
* of the text to be highlighted is computed from <p cwcSrcLen> and
|
|
* <p cwcSrcPos>. Since <p pwcInfbuf> is constant and should not be
|
|
* modified by PutWord, it can point directly into <p pTextSource>.
|
|
* Values of cwc larger than the ulMaxTokenSize specified in
|
|
* <om IWordBreaker.Init> will result in LANGUAGE_S_LARGE_WORD.
|
|
* @comm Text sent to PutWord should match the source text as closely
|
|
* as possible, including capitalization and accents.
|
|
*
|
|
* @comm You need to call this method for every word retrieved from
|
|
* <p pTextSource> except those for which the <om .PutAltWord> call
|
|
* has been made. The word sink automatically adds an end of word break
|
|
* (EOW) after this token.
|
|
*
|
|
****************************************************************/
|
|
|
|
STDMETHODIMP
|
|
CITIndexObjBridge::PutWord(WCHAR const *pwcInBuf, ULONG cwc,
|
|
ULONG cwcSrcLen, ULONG cwcSrcPos)
|
|
{
|
|
HRESULT hr;
|
|
DWORD cbAnsi;
|
|
|
|
if (pwcInBuf == NULL)
|
|
return (E_POINTER);
|
|
|
|
cbAnsi = (sizeof(WCHAR) * cwc) + sizeof(WORD);
|
|
|
|
if (SUCCEEDED(hr =
|
|
ReallocBuffer(&m_hmemDestRaw, &m_cbBufDestRawCur, cbAnsi)))
|
|
{
|
|
char *lpchBufRaw;
|
|
|
|
lpchBufRaw = (char *) _GLOBALLOCK(m_hmemDestRaw);
|
|
|
|
if ((*((WORD *)lpchBufRaw) = (WORD)
|
|
WideCharToMultiByte(m_dwCodePageID, NULL, pwcInBuf, cwc,
|
|
lpchBufRaw + sizeof(WORD), cbAnsi - sizeof(WORD),
|
|
NULL, NULL)) > 0)
|
|
{
|
|
char *lpchBufNorm;
|
|
|
|
lpchBufNorm = (char *) _GLOBALLOCK(m_hmemDestNorm);
|
|
|
|
if (!m_fNormWord)
|
|
MEMCPY(lpchBufNorm, lpchBufRaw,
|
|
*((WORD *)lpchBufRaw) + sizeof(WORD));
|
|
|
|
ITASSERT(m_pexbrkpm != NULL);
|
|
if (m_pexbrkpm->lpfnOutWord != NULL)
|
|
{
|
|
DWORD ibAnsiOffset;
|
|
WCHAR *lpwchSrc;
|
|
|
|
lpwchSrc = (WCHAR *) _GLOBALLOCK(m_hmemSrc);
|
|
|
|
// Compute the ANSI offset of the beginning of the raw word.
|
|
// The ANSI buffer we pass won't get written to - we just
|
|
// pass a pointer just in case the routine requires a non-NULL
|
|
// for that param (documentation doesn't say).
|
|
ibAnsiOffset = WideCharToMultiByte(m_dwCodePageID, NULL,
|
|
lpwchSrc, cwcSrcPos,
|
|
lpchBufRaw, 0, NULL, NULL);
|
|
|
|
// Call the supplied word callback function.
|
|
hr = m_pexbrkpm->lpfnOutWord((LPBYTE)lpchBufRaw,
|
|
(LPBYTE)lpchBufNorm,
|
|
ibAnsiOffset, m_pexbrkpm->lpvUser);
|
|
|
|
_GLOBALUNLOCK(m_hmemSrc);
|
|
}
|
|
|
|
_GLOBALUNLOCK(m_hmemDestNorm);
|
|
}
|
|
else
|
|
hr = E_UNEXPECTED;
|
|
|
|
_GLOBALUNLOCK(m_hmemDestRaw);
|
|
}
|
|
|
|
return (hr);
|
|
}
|
|
|
|
/****************************************************************
|
|
* @method STDMETHODIMP | IWordSink | PutAltWord |
|
|
* Allows the word breaker to put more than one word in the same place.
|
|
* @parm WCHAR const | *pwcInBuf | Pointer to the word to add.
|
|
* @parm ULONG |cwc | Count of characters in the word.
|
|
* @parm ULONG |cwcSrcLen | count of characters in pTextSource buffer
|
|
* (see <om IWordBreaker.BreakText>) that corresponds to the output word.
|
|
* @parm ULONG |cwcSrcPos | the position of the word in pTextSource
|
|
* buffer that corresponds to the output word
|
|
*
|
|
* @rvalue S_OK | The operation completed successfully.
|
|
* @rvalue E_POINTER | Input buffer is NULL.
|
|
*
|
|
* @comm
|
|
* When you need to add more than one word in the same place, use
|
|
* PutAltWord for all alternative words except the last one. Use
|
|
* PutWord for the final alternative, indicating movement to the next position.
|
|
* @ex The phrase "Where is Kyle's document" would be stored as: |
|
|
* pWSink->PutWord( L"Where", 5, 5, 0 );
|
|
* pWSink->PutWord( L"is", 2, 2, 6 );
|
|
* pWSink->PutAltWord( L"Kyle", 4, 6, 9 );
|
|
* pWSink->PutWord( L"Kyle's", 6, 6, 9 );
|
|
* pWSink->PutWord( L"document", 8, 8, 16 );
|
|
*
|
|
*
|
|
***************************************************************/
|
|
STDMETHODIMP
|
|
CITIndexObjBridge::PutAltWord(WCHAR const *pwcInBuf, ULONG cwc,
|
|
ULONG cwcSrcLen, ULONG cwcSrcPos)
|
|
{
|
|
HRESULT hr;
|
|
DWORD cbAnsi;
|
|
|
|
if (pwcInBuf == NULL)
|
|
return (E_POINTER);
|
|
|
|
cbAnsi = (sizeof(WCHAR) * cwc) + sizeof(WORD);
|
|
|
|
if (SUCCEEDED(hr =
|
|
ReallocBuffer(&m_hmemDestNorm, &m_cbBufDestNormCur, cbAnsi)))
|
|
{
|
|
char *lpchBuf;
|
|
|
|
lpchBuf = (char *) _GLOBALLOCK(m_hmemDestNorm);
|
|
|
|
if ((*((WORD *)lpchBuf) = (WORD)
|
|
WideCharToMultiByte(m_dwCodePageID, NULL, pwcInBuf, cwc,
|
|
lpchBuf + sizeof(WORD), cbAnsi - sizeof(WORD),
|
|
NULL, NULL)) > 0)
|
|
{
|
|
m_fNormWord = TRUE;
|
|
}
|
|
else
|
|
hr = E_UNEXPECTED;
|
|
|
|
_GLOBALUNLOCK(m_hmemDestNorm);
|
|
}
|
|
|
|
return (hr);
|
|
}
|
|
|
|
|
|
/****************************************************************
|
|
* @method STDMETHODIMP | IWordSink | StartAltPhrase |
|
|
* This method is not implemented.
|
|
***************************************************************/
|
|
STDMETHODIMP
|
|
CITIndexObjBridge::StartAltPhrase(void)
|
|
{
|
|
return (E_NOTIMPL);
|
|
}
|
|
|
|
|
|
/****************************************************************
|
|
* @method STDMETHODIMP | IWordSink | EndAltPhrase|
|
|
* This method is not implemented.
|
|
***************************************************************/
|
|
STDMETHODIMP
|
|
CITIndexObjBridge::EndAltPhrase(void)
|
|
{
|
|
return (E_NOTIMPL);
|
|
}
|
|
|
|
|
|
/****************************************************************
|
|
* @method STDMETHODIMP | IWordSink | PutBreak |
|
|
* This method is not implemented.
|
|
*
|
|
* @parm WORDREP_BREAK_TYPE | breakType | Specifies break type
|
|
*
|
|
*
|
|
***************************************************************/
|
|
STDMETHODIMP
|
|
CITIndexObjBridge::PutBreak(WORDREP_BREAK_TYPE breakType)
|
|
{
|
|
return (E_NOTIMPL);
|
|
}
|
|
|
|
|
|
//---------------------------------------------------------------------------
|
|
// IStemSink Method Implementations
|
|
//---------------------------------------------------------------------------
|
|
|
|
|
|
/****************************************************************
|
|
* @method STDMETHODIMP | IStemSink | PutWord |
|
|
* Notifies IStemSink of a word that is similar to the input word
|
|
* of <om IStemmer.StemWord> method.
|
|
*
|
|
* @parm WCHAR const | *pwcInBuf | Pointer to the word
|
|
* @parm ULONG | cwc | Number of characters in the word
|
|
*
|
|
* @rvalue E_POINTER | The input buffer is NULL.
|
|
*
|
|
***************************************************************/
|
|
STDMETHODIMP
|
|
CITIndexObjBridge::PutWord(WCHAR const *pwcInBuf, ULONG cwc)
|
|
{
|
|
HRESULT hr;
|
|
DWORD cbAnsi;
|
|
|
|
if (pwcInBuf == NULL)
|
|
return (E_POINTER);
|
|
|
|
cbAnsi = (sizeof(WCHAR) * cwc) + sizeof(WORD);
|
|
|
|
if (SUCCEEDED(hr =
|
|
ReallocBuffer(&m_hmemDestNorm, &m_cbBufDestNormCur, cbAnsi)))
|
|
{
|
|
char *lpchBuf;
|
|
|
|
lpchBuf = (char *) _GLOBALLOCK(m_hmemDestNorm);
|
|
|
|
if ((*((WORD *)lpchBuf) = (WORD)
|
|
WideCharToMultiByte(m_dwCodePageID, NULL, pwcInBuf, cwc,
|
|
lpchBuf + sizeof(WORD), cbAnsi - sizeof(WORD),
|
|
NULL, NULL)) == 0)
|
|
hr = E_UNEXPECTED;
|
|
|
|
_GLOBALUNLOCK(m_hmemDestNorm);
|
|
}
|
|
|
|
return (hr);
|
|
}
|
|
|
|
|
|
/****************************************************************
|
|
* @method STDMETHODIMP | IStemSink | PutAltWord |
|
|
* Notifies IStemSink of a word that is similar to the input word
|
|
* of <om IStemmer.StemWord> method.
|
|
* @parm WCHAR const | *pwcInBuf | Pointer to the word
|
|
* @parm ULONG | cwc | Number of characters in the word
|
|
*
|
|
* @rvalue S_OK | This method always returns success.
|
|
*
|
|
* @comm
|
|
* InfoTech Search only supports getting back one stemmed version
|
|
* of the raw word. Any others are ignored.
|
|
* @xref <om .PutWord>
|
|
***************************************************************/
|
|
STDMETHODIMP
|
|
CITIndexObjBridge::PutAltWord(WCHAR const *pwcInBuf, ULONG cwc)
|
|
{
|
|
// We only support getting back one stemmed version of the raw word,
|
|
// so we ignore all the others.
|
|
return (S_OK);
|
|
}
|
|
|
|
|
|
//---------------------------------------------------------------------------
|
|
// Other Public Method Implementations
|
|
//---------------------------------------------------------------------------
|
|
|
|
|
|
// By the time this method is called, we assume the breaker has been fully
|
|
// initialized via IWordBreakerConfig (if present) and via IWordBreaker::Init.
|
|
STDMETHODIMP
|
|
CITIndexObjBridge::SetWordBreaker(PIWBRK piwbrk)
|
|
{
|
|
LCID lcid;
|
|
|
|
if (piwbrk == NULL)
|
|
return (SetErrReturn(E_POINTER));
|
|
|
|
if (m_piwbrk != NULL)
|
|
return (SetErrReturn(E_ALREADYINIT));
|
|
|
|
// Pick up IWordBreakerConfig if its there, otherwise we'll go without it.
|
|
// Do the same for IStemmer if we got IWordBreakerConfig.
|
|
if (SUCCEEDED(piwbrk->QueryInterface(IID_IWordBreakerConfig,
|
|
(LPVOID *) &m_piwbrkc)))
|
|
m_piwbrkc->GetWordStemmer(&m_pistem);
|
|
|
|
// Pick up IITStopWordList if its there, otherwise we'll go without it.
|
|
piwbrk->QueryInterface(IID_IITStopWordList, (LPVOID *) &m_piitstwdl);
|
|
|
|
if (m_piwbrkc == NULL ||
|
|
FAILED(m_piwbrkc->GetLocaleInfo(&m_dwCodePageID, &lcid)))
|
|
m_dwCodePageID = GetACP();
|
|
|
|
(m_piwbrk = piwbrk)->AddRef();
|
|
|
|
return (S_OK);
|
|
}
|
|
|
|
|
|
// NOTE: If CITIndexObjBridge::BreakText was going to provide more than
|
|
// one buffer's worth of text to the COM breaker, then the very first members of
|
|
// CITIndexObjBridge would be made to match those of TEXT_SOURCE so that
|
|
// FillTextSource callback could call back into us (by casting the TEXT_SOURCE
|
|
// param passed to it). Otherwise, we would have no way of providing
|
|
// object-oriented breaking - we would have to resort to using globals.
|
|
|
|
SCODE __stdcall FillTextSource(TEXT_SOURCE *pTextSource)
|
|
{
|
|
// We always return failure to signify no more text.
|
|
return E_FAIL;
|
|
}
|
|
|
|
|
|
STDMETHODIMP
|
|
CITIndexObjBridge::BreakText(PEXBRKPM pexbrkpm)
|
|
{
|
|
HRESULT hr = S_OK;
|
|
|
|
if (m_piwbrk == NULL)
|
|
return (E_UNEXPECTED);
|
|
|
|
if (pexbrkpm == NULL)
|
|
return (SetErrReturn(E_POINTER));
|
|
|
|
if (pexbrkpm->lpbBuf == NULL)
|
|
return (SetErrReturn(E_INVALIDARG));
|
|
|
|
// Configure word breaker if we got IWordBreakerConfig; otherwise,
|
|
// check values in *pexbrkpm to see if they are compatible with defaults.
|
|
if (m_piwbrkc != NULL)
|
|
{
|
|
DWORD grfBreakFlags;
|
|
|
|
if (SUCCEEDED(hr =
|
|
m_piwbrkc->SetBreakWordType(pexbrkpm->dwBreakWordType)) &&
|
|
SUCCEEDED(hr =
|
|
m_piwbrkc->GetControlInfo(&grfBreakFlags, NULL)))
|
|
{
|
|
SetGrfFlag(&grfBreakFlags, IITWBC_BREAK_ACCEPT_WILDCARDS,
|
|
(pexbrkpm->fFlags & ACCEPT_WILDCARD));
|
|
hr = m_piwbrkc->SetControlInfo(grfBreakFlags, NULL);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (pexbrkpm->dwBreakWordType != IITWBC_BREAKTYPE_TEXT)
|
|
hr = E_NOTSUPPORTED;
|
|
}
|
|
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
DWORD cwch;
|
|
|
|
m_fNormWord = FALSE;
|
|
m_pexbrkpm = pexbrkpm;
|
|
cwch = pexbrkpm->cbBufCount;
|
|
|
|
if (SUCCEEDED(hr = ReallocBuffer(&m_hmemSrc, &m_cbBufSrcCur,
|
|
sizeof(WCHAR) * cwch)))
|
|
{
|
|
WCHAR *lpwchBuf;
|
|
|
|
lpwchBuf = (WCHAR *) _GLOBALLOCK(m_hmemSrc);
|
|
|
|
// Convert the text source buffer to Unicode.
|
|
if ((cwch = MultiByteToWideChar(m_dwCodePageID, NULL,
|
|
(LPCSTR) pexbrkpm->lpbBuf, pexbrkpm->cbBufCount,
|
|
lpwchBuf, cwch)) > 0)
|
|
{
|
|
TEXT_SOURCE txtsrc;
|
|
|
|
txtsrc.pfnFillTextBuffer = FillTextSource;
|
|
txtsrc.awcBuffer = lpwchBuf;
|
|
txtsrc.iCur = 0;
|
|
txtsrc.iEnd = cwch;
|
|
|
|
// Send the Unicode text buffer to the breaker.
|
|
hr = m_piwbrk->BreakText(&txtsrc, (PIWRDSNK) this, NULL);
|
|
}
|
|
else
|
|
hr = E_UNEXPECTED;
|
|
|
|
_GLOBALUNLOCK(m_hmemSrc);
|
|
}
|
|
|
|
m_pexbrkpm = NULL;
|
|
}
|
|
|
|
return (hr);
|
|
}
|
|
|
|
|
|
// The stop word is in WORD length prefix format.
|
|
STDMETHODIMP
|
|
CITIndexObjBridge::LookupStopWord(LPBYTE lpbStopWord)
|
|
{
|
|
HRESULT hr;
|
|
DWORD cwch;
|
|
DWORD cbAnsi;
|
|
|
|
if (lpbStopWord == NULL)
|
|
return (SetErrReturn(E_POINTER));
|
|
|
|
if (m_piitstwdl == NULL)
|
|
return (SetErrReturn(E_NOTIMPL));
|
|
|
|
cwch = cbAnsi = (DWORD)(*((WORD *)lpbStopWord));
|
|
if (SUCCEEDED(hr = ReallocBuffer(&m_hmemSrc, &m_cbBufSrcCur,
|
|
sizeof(WCHAR) * cwch)))
|
|
{
|
|
WCHAR *lpwchBuf;
|
|
|
|
lpwchBuf = (WCHAR *) _GLOBALLOCK(m_hmemSrc);
|
|
|
|
// Convert the stop word to Unicode.
|
|
if ((cwch = MultiByteToWideChar(m_dwCodePageID, NULL,
|
|
(LPCSTR)lpbStopWord + sizeof(WORD), cbAnsi,
|
|
lpwchBuf, cwch)) > 0)
|
|
{
|
|
// Lookup the stop word.
|
|
hr = m_piitstwdl->LookupWord(lpwchBuf, cwch);
|
|
}
|
|
else
|
|
hr = E_UNEXPECTED;
|
|
|
|
_GLOBALUNLOCK(m_hmemSrc);
|
|
}
|
|
|
|
return (hr);
|
|
}
|
|
|
|
|
|
// Stem the raw word and return result in lpbStemWord.
|
|
// Both word buffers are in WORD length prefix format.
|
|
STDMETHODIMP
|
|
CITIndexObjBridge::StemWord(LPBYTE lpbStemWord, LPBYTE lpbRawWord)
|
|
{
|
|
HRESULT hr;
|
|
DWORD cwch;
|
|
DWORD cbAnsi;
|
|
|
|
if (lpbStemWord == NULL || lpbRawWord == NULL)
|
|
return (SetErrReturn(E_POINTER));
|
|
|
|
if (m_pistem == NULL)
|
|
return (SetErrReturn(E_NOSTEMMER));
|
|
|
|
cwch = cbAnsi = (DWORD)(*((WORD *)lpbRawWord));
|
|
if (SUCCEEDED(hr = ReallocBuffer(&m_hmemSrc, &m_cbBufSrcCur,
|
|
sizeof(WCHAR) * cwch)))
|
|
{
|
|
WCHAR *lpwchBuf;
|
|
|
|
lpwchBuf = (WCHAR *) _GLOBALLOCK(m_hmemSrc);
|
|
|
|
// Convert the word to be stemmed to Unicode.
|
|
if ((cwch = MultiByteToWideChar(m_dwCodePageID, NULL,
|
|
(LPCSTR)lpbRawWord + sizeof(WORD), cbAnsi,
|
|
lpwchBuf, cwch)) > 0)
|
|
{
|
|
// Stem the raw word.
|
|
if (SUCCEEDED(hr =
|
|
m_pistem->StemWord(lpwchBuf, cwch, (PISTEMSNK) this)))
|
|
{
|
|
char *lpchStemBuf;
|
|
WORD cbStemWord;
|
|
|
|
lpchStemBuf = (char *) _GLOBALLOCK(m_hmemDestNorm);
|
|
|
|
// Copy stem word from the normalized word destination buffer
|
|
// (where our implementation of IStemSink::PutWord put it) to
|
|
// lpbStemWord as long as it is not longer than the raw word.
|
|
if ((cbStemWord = *((WORD *)lpchStemBuf)) <= cbAnsi)
|
|
MEMCPY(lpbStemWord, lpchStemBuf, cbStemWord + sizeof(WORD));
|
|
else
|
|
hr = E_WORDTOOLONG;
|
|
|
|
_GLOBALUNLOCK(m_hmemDestNorm);
|
|
}
|
|
}
|
|
else
|
|
hr = E_UNEXPECTED;
|
|
|
|
_GLOBALUNLOCK(m_hmemSrc);
|
|
}
|
|
|
|
return (hr);
|
|
}
|
|
|
|
|
|
// On entry, lpbTermHit is a WORD-prefixed MBCS string.
|
|
// On exit, *ppvTermHit is a WORD-prefixed Unicode string.
|
|
STDMETHODIMP
|
|
CITIndexObjBridge::AddQueryResultTerm(LPBYTE lpbTermHit, LPVOID *ppvTermHit)
|
|
{
|
|
DWORD cwch;
|
|
DWORD cbAnsi;
|
|
HRESULT hr = S_OK;
|
|
|
|
if (lpbTermHit == NULL || ppvTermHit == NULL)
|
|
return (SetErrReturn(E_POINTER));
|
|
|
|
if (m_dwCodePageID == 0)
|
|
return (SetErrReturn(E_NOTINIT));
|
|
|
|
cwch = cbAnsi = (DWORD)(*((WORD *)lpbTermHit));
|
|
|
|
// When allocating the buffer, add 1 char to leave room for the
|
|
// Unicode string's WORD prefix.
|
|
if ((m_lpsipbTermHit != NULL ||
|
|
(m_lpsipbTermHit = MVStopListInitiate(IDXOBR_TERMHASH_SIZE,
|
|
&hr)) != NULL) &&
|
|
SUCCEEDED(hr = ReallocBuffer(&m_hmemSrc, &m_cbBufSrcCur,
|
|
sizeof(WCHAR) * (cwch + 1))))
|
|
{
|
|
WCHAR *lpwchBuf;
|
|
|
|
lpwchBuf = (WCHAR *) _GLOBALLOCK(m_hmemSrc);
|
|
|
|
// Convert lpbTermHit to Unicode before searching or storing it;
|
|
// leave space in the Unicode buffer for the WORD length prefix.
|
|
if ((cwch = MultiByteToWideChar(m_dwCodePageID, NULL,
|
|
(LPCSTR)lpbTermHit + sizeof(WORD), cbAnsi,
|
|
lpwchBuf + 1, cwch)) > 0)
|
|
{
|
|
// Store the Unicode string length, but restate it in bytes
|
|
// since the stopword list lookup code assumes MBCS.
|
|
*lpwchBuf = (WORD)cwch * sizeof(WCHAR);
|
|
|
|
// Add the word to the list and then get a pointer to it.
|
|
if (SUCCEEDED(hr = MVStopListAddWord(m_lpsipbTermHit,
|
|
(LPBYTE) lpwchBuf)))
|
|
{
|
|
hr = MVStopListFindWordPtr(m_lpsipbTermHit,
|
|
(LST)lpwchBuf, (LST *)ppvTermHit);
|
|
}
|
|
}
|
|
else
|
|
hr = E_UNEXPECTED;
|
|
|
|
_GLOBALUNLOCK(m_hmemSrc);
|
|
}
|
|
|
|
return (hr);
|
|
}
|
|
|
|
|
|
// This method should only be called after a query term hit list has been
|
|
// completely built. It will iterate over all the terms and reduce the
|
|
// length prefixes from byte-based to WCHAR-based - i.e. the lengths
|
|
// ill be divided by two. Once this method has been called, it will
|
|
// no longer be possible to search for terms in the term list.
|
|
// We do this so that the direct pointer refs to terms that end up in the
|
|
// query result list point to correct WCHAR-based length prefixes.
|
|
STDMETHODIMP
|
|
CITIndexObjBridge::AdjustQueryResultTerms(void)
|
|
{
|
|
if (m_lpsipbTermHit != NULL)
|
|
{
|
|
LST lstWord;
|
|
LONG lWordInfo = -1L;
|
|
LPVOID pvWordInfo = NULL;
|
|
|
|
while (SUCCEEDED(MVStopListEnumWords(m_lpsipbTermHit, &lstWord,
|
|
&lWordInfo, &pvWordInfo)))
|
|
{
|
|
ITASSERT(*((WORD *)lstWord) % sizeof(WCHAR) == 0);
|
|
*((WORD *)lstWord) /= sizeof(WCHAR);
|
|
}
|
|
}
|
|
|
|
return (S_OK);
|
|
}
|
|
|
|
|
|
//---------------------------------------------------------------------------
|
|
// Private Method Implementations
|
|
//---------------------------------------------------------------------------
|
|
|
|
HRESULT
|
|
CITIndexObjBridge::ReallocBuffer(HGLOBAL *phmemBuf, DWORD *pcbBufCur,
|
|
DWORD cbBufNew)
|
|
{
|
|
return (ReallocBufferHmem(phmemBuf, pcbBufCur,
|
|
max(cbBufNew, cbConvBufInit)));
|
|
}
|
|
|
|
|