/**************************************************************** * @doc SHROOM EXTERNAL API * * A Legsdin added autodoc headers for IITBuildCollect Interface * ****************************************************************/ // ftuMain.CPP: Implementation of CITIndexBuild #include #ifdef _DEBUG static char s_aszModule[] = __FILE__; /* For error report */ #endif #include #ifdef IA64 #include #endif #include #include #include #include #include #include #include #include <_mvutil.h> #include #include #include "..\svWrdSnk.h" #include "ftuMain.h" #define ULMAXTOKENSIZE 1024 #define OCCF_DEFAULT OCCF_TOPICID | OCCF_FIELDID | OCCF_COUNT HRESULT __stdcall FillText(TEXT_SOURCE * pTextSource) { return E_FAIL;//WBREAK_E_END_OF_TEXT; } CITIndexBuild::CITIndexBuild() { m_fInitialized = FALSE; m_fIsDirty = FALSE; m_piWordSink = NULL; m_piwb = NULL; m_piwbConfig = NULL; m_lpipb = NULL; m_dwUID = m_dwVFLD = m_dwDType = m_dwWordCount = m_dwCodePage = 0; m_lpbfText = NULL; m_dwOccFlags = OCCF_DEFAULT; } CITIndexBuild::~CITIndexBuild() { (void)Close(); } /************************************************************************ * @method STDMETHODIMP | IITBuildCollect | GetTypeString | * Returns a prefix to use when the storage or stream object is created. * * @parm LPWSTR | pPrefix | Pointer to a buffer in which to copy the prefix * @parm DWORD | *pLen | Length of the buffer * * @rvalue S_OK | The operation completed successfully * * * @comm If you are creating a new build object, you need to decide on a * unique prefix to identify that object. Word wheels use $WW, for example. * ************************************************************************/ STDMETHODIMP CITIndexBuild::GetTypeString(LPWSTR pPrefix, DWORD *pLen) { DWORD dwLen = (DWORD) WSTRLEN (SZ_GP_STORAGE) + 1; if (NULL == pPrefix) { *pLen = dwLen; return S_OK; } if (pLen && *pLen < dwLen) { *pLen = dwLen; return S_OK; } if (pLen) *pLen = dwLen; WSTRCPY (pPrefix, SZ_FI_STREAM); return S_OK; } /* GetTypeString */ /**************************************************************** * @method STDMETHODIMP | IITBuildCollect | SetConfigInfo | * Passes initialization parameters to a build object. * * @parm IITDatabase | *piitdb | Pointer to database * @parm VARARG | vaParams | Configuration parameters * * @rvalue S_OK | The operation completed successfully. * @comm Call this method before calling InitHelperInstance. * ****************************************************************/ // This must be called before InitHelperInstance! STDMETHODIMP CITIndexBuild::SetConfigInfo (IITDatabase *piitdb, VARARG vaParams) { if(vaParams.dwArgc) { m_dwOccFlags = 0; // Work through params backwards // If we add more params we may need to scan forward for (int loop = vaParams.dwArgc; loop; --loop) { LPWSTR pwstr = (LPWSTR)vaParams.Argv[loop - 1]; if(!WSTRICMP(pwstr, L"OCC_VFLD")) m_dwOccFlags |= OCCF_FIELDID; else if(!WSTRICMP(pwstr, L"OCC_UID")) m_dwOccFlags |= OCCF_TOPICID; else if(!WSTRICMP(pwstr, L"OCC_COUNT")) m_dwOccFlags |= OCCF_COUNT; else if(!WSTRICMP(pwstr, L"OCC_LENGTH")) m_dwOccFlags |= OCCF_LENGTH; else if(!WSTRICMP(pwstr, L"OCC_OFFSET")) m_dwOccFlags |= OCCF_OFFSET; else if(!WSTRICMP(pwstr, L"OCC_NONE")) { m_dwOccFlags = 0; break; } } } return S_OK; } /* SetConfigInfo */ /******************************************************************** * @method HRESULT WINAPI | IITBuildCollect | InitHelperInstance | * Allows you to configure a helper object used by a * build object (such as sort objects for a word wheel, or breaker * objects for a full-text index). * * @parm DWORD | dwHelperObjInstance | Helper object instance ID. * @parm IITDatabase | *pITDatabase | Pointer to database. * @parm DWORD | dwCodePage | Code page identifier. * @parm LCID | lcid | Locale identifier. * @parm VARARG | vaDword | Flags you want to use to configure the object. * @parm VARARG | vaString | String parameters you want to use to * configure the object. * * @rvalue E_FAIL | The object is already initialized or file create failed * ********************************************************************/ STDMETHODIMP CITIndexBuild::InitHelperInstance( DWORD dwHelperObjInstance, IITDatabase *pITDatabase, DWORD dwCodePage, LCID lcid, VARARG vaDword, VARARG vaString ) { if (TRUE == m_fInitialized) return SetErrReturn(E_ALREADYINIT); HRESULT hr = S_OK; BOOL fLicense; IPersistStreamInit *piipstm; m_dwCodePage = dwCodePage; // Open nested indexer INDEXINFO IndexInfo; IndexInfo.dwMemSize = 0x100000; IndexInfo.Occf = m_dwOccFlags; IndexInfo.Idxf = 0; IndexInfo.dwBlockSize = 0; // Use default IndexInfo.dwBreakerInstID = dwHelperObjInstance; IndexInfo.dwCodePageID = dwCodePage; IndexInfo.lcid = lcid; if (NULL == (m_lpipb = MVIndexInitiate(&IndexInfo, &hr))) SetErrCode(&hr, E_FAIL); // Set up the helper (breaker) if (SUCCEEDED(hr)) { // Get the Breaker hr = pITDatabase->GetObject (dwHelperObjInstance, IID_IWordBreaker, (void **)&m_piwb); } // Config the breaker if it is supported if (SUCCEEDED(hr) && SUCCEEDED(hr = m_piwb->Init(FALSE, ULMAXTOKENSIZE, &fLicense))) { if (SUCCEEDED(pITDatabase->GetObject (dwHelperObjInstance, IID_IWordBreakerConfig, (void **)&m_piwbConfig))) { // We don't really care if these fail hr = m_piwbConfig->SetLocaleInfo(dwCodePage, lcid); hr = m_piwbConfig->SetBreakWordType(IITWBC_BREAKTYPE_TEXT); if (vaDword.dwArgc >= 1) { hr = m_piwbConfig->SetControlInfo(*(LPDWORD)vaDword.Argv, 0); } IFSStorage *pifsstg = NULL; IStream *piistm; if (vaString.dwArgc) { // Create ITSS stuff hr = CoCreateInstance(CLSID_IFSStorage, NULL, CLSCTX_INPROC_SERVER, IID_IFSStorage, (VOID **)&pifsstg); ITASSERT(SUCCEEDED(hr)); } if(vaString.dwArgc >= 1 && *(LPWSTR)vaString.Argv[0]) { if(SUCCEEDED(pifsstg->FSOpenStream((LPWSTR)vaString.Argv[0], STGM_SHARE_DENY_WRITE | STGM_READWRITE, &piistm))) { hr = m_piwbConfig->LoadExternalBreakerData (piistm, IITWBC_EXTDATA_CHARTABLE); piistm->Release(); } } if (vaString.dwArgc >= 2 && *(LPWSTR)vaString.Argv[1]) { if (SUCCEEDED(pifsstg->FSOpenStream((LPWSTR)vaString.Argv[1], STGM_SHARE_DENY_WRITE | STGM_READWRITE, &piistm))) { hr = m_piwbConfig->LoadExternalBreakerData (piistm, IITWBC_EXTDATA_STOPWORDLIST); piistm->Release(); } } if (vaString.dwArgc >= 3 && *(LPWSTR)vaString.Argv[2]) { // Get the CLSID and instantiate the stemmer CLSID clsid; IStemmer *pStemmer; hr = CLSIDFromProgID((LPWSTR)vaString.Argv[2], &clsid); if(SUCCEEDED(hr)) hr = CoCreateInstance(clsid, NULL, CLSCTX_INPROC_SERVER, IID_IStemmer, (VOID **)&pStemmer); if (SUCCEEDED(hr)) { if(SUCCEEDED(hr = pStemmer->QueryInterface (IID_IPersistStreamInit, (void **)&piipstm))) { piipstm->InitNew(); piipstm->Release(); } (void)pStemmer->Init(ULMAXTOKENSIZE, &fLicense); // Check for IStemmerConfig interface IStemmerConfig *pistemConfig; hr = pStemmer->QueryInterface (IID_IStemmerConfig, (void **)&pistemConfig); if (SUCCEEDED(hr)) { hr = pistemConfig->SetLocaleInfo(dwCodePage, lcid); pistemConfig->Release(); } hr = m_piwbConfig->SetWordStemmer(clsid, pStemmer); pStemmer->Release(); } } if (pifsstg) pifsstg->Release(); hr = S_OK; } } if(SUCCEEDED(hr) && SUCCEEDED(hr = CoCreateInstance(CLSID_IITWordSink, NULL, CLSCTX_INPROC_SERVER, IID_IWordSink, (LPVOID *)&m_piWordSink)) && SUCCEEDED(hr = ((CDefWordSink *)m_piWordSink)->SetLocaleInfo(dwCodePage, lcid)) && SUCCEEDED(hr = ((CDefWordSink *)m_piWordSink)->SetIPB(m_lpipb))) { m_fInitialized = TRUE; } return hr; } /* InitHelperInstance */ /**************************************************************** * @method STDMETHODIMP | IITBuildCollect | SetEntry | * Sets properties for a build object. * * * @parm LPCWSTR | szDest | Property destination * @parm IITPropList | *pPropList | Pointer to property list * * @comm Like CSvDoc::AddObjectEntry, this method is called * several times for all the properties that you need to set. ****************************************************************/ STDMETHODIMP CITIndexBuild::SetEntry(LPCWSTR szDest, IITPropList *pPropList) { if (FALSE == m_fInitialized) return SetErrReturn(E_NOTINIT); m_fIsDirty = TRUE; CProperty cProp; HRESULT hr; LPWSTR pwstrIndexText; BOOL fTerm = FALSE; if(SUCCEEDED(hr = pPropList->Get(STDPROP_INDEX_BREAK, cProp))) { SendTextToBreaker(); return S_OK; } // Check for REQUIRED text (can be either INDEX_TEXT or INDEX_TERM) if(FAILED(hr = pPropList->Get(STDPROP_INDEX_TEXT, cProp))) { if(SUCCEEDED(hr = pPropList->Get(STDPROP_INDEX_TERM, cProp))) fTerm = TRUE; } if(SUCCEEDED(hr)) pwstrIndexText = (LPWSTR)cProp.lpszwData; // Check for REQUIRED UID if (SUCCEEDED(hr) && SUCCEEDED(hr = pPropList->Get(STDPROP_UID, cProp)) && m_dwUID != cProp.dwValue) { SendTextToBreaker(); m_dwUID = cProp.dwValue; m_dwWordCount = 0; } // Check for OPTIONAL VFLD if (SUCCEEDED(hr) && SUCCEEDED(pPropList->Get(STDPROP_INDEX_VFLD, cProp)) && m_dwVFLD != cProp.dwValue) { SendTextToBreaker(); m_dwVFLD = cProp.dwValue; } // Check for OPTIONAL DTYPE if (SUCCEEDED(hr) && m_piwbConfig && SUCCEEDED(pPropList->Get(STDPROP_INDEX_DTYPE, cProp)) && m_dwDType != cProp.dwValue) { SendTextToBreaker(); hr = m_piwbConfig->SetBreakWordType(cProp.dwValue); } DWORD cchText; if (SUCCEEDED(pPropList->Get(STDPROP_INDEX_LENGTH, cProp))) cchText = (WORD)cProp.dwValue; else cchText = (DWORD) WSTRLEN(pwstrIndexText); if (SUCCEEDED(hr)) { if (fTerm) { // Get actual index term length // Fill-ou occurrence info OCC occ; occ.dwFieldId = m_dwVFLD; occ.dwTopicID = m_dwUID; occ.dwCount = m_dwWordCount++; // Is there a diffrerent highlite length? if (SUCCEEDED(pPropList->Get(STDPROP_INDEX_TERM_RAW_LENGTH, cProp))) occ.wWordLen = (WORD)cProp.dwValue; else occ.wWordLen = (WORD)cchText; if (cchText > 255) return SetErrReturn(E_UNEXPECTED); char strTerm[256 + sizeof(WORD)]; if(!WideCharToMultiByte(m_dwCodePage, 0, pwstrIndexText, cchText, strTerm + sizeof(WORD), 255, NULL, NULL)) { // The conversion failed! -- very bad return SetErrReturn(E_UNEXPECTED); } *(LPWORD)strTerm = (SHORT)cchText; hr = MVIndexAddWord(m_lpipb, (LPB)strTerm, &occ); } else { // Accumulate text until we need to send it along if (!DynBufferAppend (m_lpbfText, (LPBYTE)pwstrIndexText, cchText * sizeof (WCHAR))) SetErr(&hr, E_OUTOFMEMORY); } } return hr; } /* SetEntry */ STDMETHODIMP CITIndexBuild::SendTextToBreaker(void) { HRESULT hr; // TODO: Call these only for our own word sink hr = ((CDefWordSink *)m_piWordSink)->SetDocID(m_dwUID); hr = ((CDefWordSink *)m_piWordSink)->SetVFLD(m_dwVFLD); // TODO: We can set TYPE here, so we can use the same breaker instance for // multiple FTI and they will not interfere with each other. This would be // different than current behavior, however, so I have left it out for now. TEXT_SOURCE tsText; tsText.pfnFillTextBuffer = FillText; tsText.awcBuffer = (LPWSTR)DynBufferPtr(m_lpbfText); tsText.iEnd = DynBufferLen(m_lpbfText) / sizeof (WCHAR); tsText.iCur = 0; hr = m_piwb->BreakText(&tsText, m_piWordSink, NULL); DynBufferReset(m_lpbfText); return hr; } /* SendTextToBreaker */ /***************************************************************** * @method STDMETHODIMP | IITBuildCollect | Close | * Closes the build object and frees memory. * * @Rvalue E_NOTINIT | Object has not been initialized. * @comm Calling this method is optional, but the build object must * implement it. Any object that implements IITBuildCollect interface * must support the Close method. * ****************************************************************/ STDMETHODIMP CITIndexBuild::Close(void) { if (FALSE == m_fInitialized) return SetErrReturn(E_NOTINIT); if(m_piwb) m_piwb->Release(); if(m_piWordSink) m_piWordSink->Release(); if(m_piwbConfig) m_piwbConfig->Release(); if(m_piwbConfig) m_piwbConfig = NULL; if (m_lpipb) MVIndexDispose(m_lpipb); m_fInitialized = FALSE; m_fIsDirty = FALSE; m_piWordSink = NULL; m_piwb = NULL; m_piwbConfig = NULL; m_lpipb = NULL; m_dwUID = m_dwVFLD = m_dwDType = m_dwWordCount = m_dwCodePage = 0; if (m_lpbfText) { DynBufferFree (m_lpbfText); m_lpbfText = NULL; } // Reset the occurrence flags to the default m_dwOccFlags = OCCF_DEFAULT; return S_OK; } /* Close */ STDMETHODIMP CITIndexBuild::InitNew(void) { if(NULL == (m_lpbfText = DynBufferAlloc (0x4000))) return SetErrReturn(E_OUTOFMEMORY); return S_OK; } /* IPersistStreamInit::InitNew */ STDMETHODIMP CITIndexBuild::GetClassID(CLSID *pClsID) { if (NULL == pClsID || IsBadWritePtr(pClsID, sizeof(CLSID))) return SetErrReturn(E_INVALIDARG); *pClsID = CLSID_IITIndexBuild; return S_OK; } /* GetClassID */ inline STDMETHODIMP CITIndexBuild::IsDirty(void) { return m_fIsDirty ? S_OK : S_FALSE; } /* IsDirty */ STDMETHODIMP CITIndexBuild::Load(IStream *piistm) { return SetErrReturn(E_NOTIMPL); } /* IPersistStreamInit::Load */ STDMETHODIMP CITIndexBuild::Save(IStream *piistm, BOOL fClearDirty) { if (FALSE == m_fInitialized) return SetErrReturn(E_NOTINIT); SendTextToBreaker(); HRESULT hr; HFPB hfpbSave = FpbFromHf(piistm, &hr); if (SUCCEEDED(hr)) { hr = MVIndexBuild (0, m_lpipb, hfpbSave, NULL); MVIndexDispose (m_lpipb); m_lpipb = NULL; if (fClearDirty) m_fIsDirty = FALSE; FreeHfpb(hfpbSave); } return hr; } /* IPersistStreamInit::Save */ STDMETHODIMP CITIndexBuild::GetSizeMax(ULARGE_INTEGER *pcbSize) { return SetErrReturn(E_NOTIMPL); } /* GetSizeMax */ // ********************* IPersisFile Methods ********************* STDMETHODIMP CITIndexBuild::Load(LPCWSTR pszFileName, DWORD dwMode) { return SetErrReturn(E_NOTIMPL); } /* IPersistFile::Load */ STDMETHODIMP CITIndexBuild::Save(LPCWSTR pszFileName, BOOL fRemember) { return SetErrReturn(E_NOTIMPL); } /* IPersistFile::Save */ STDMETHODIMP CITIndexBuild::SaveCompleted(LPCWSTR pszFileName) { return SetErrReturn(E_NOTIMPL); } /* IPersistFile::SaveCompleted */ STDMETHODIMP CITIndexBuild::GetCurFile(LPWSTR *ppszFileName) { return SetErrReturn(E_NOTIMPL); } /* IPersistFile::GetCurFile */