/************************************************************************* * @doc SHROOM EXTERNAL API * * * * INDEXIMP.CPP * * * * Copyright (C) Microsoft Corporation 1997 * * All Rights reserved. * * * * This file contains the implementation of the index object * * * * * ************************************************************************** * * * Written By : Erin Foxford * * Current Owner: erinfox * * * **************************************************************************/ #include #ifdef _DEBUG static char s_aszModule[] = __FILE__; /* For error report */ #endif #include // MediaView (InfoTech) includes #include #include #include #include #include #include #include #include "indeximp.h" #include "queryimp.h" #include "mvsearch.h" #include #include // for STDPROP_UID def. #include // for IITResultSet def. #include #define QUERYRESULT_GROUPCREATE 0x0800 //---------------------------------------------------------------------- // REVIEW (billa): Need to add critical section locking to all methods // that reference member variables. //---------------------------------------------------------------------- /******************************************************************** * @method STDMETHODIMP | IITIndex | Open | * Opens a full-text index, which can reside in the database's * storage or as a Win32 file. * * @parm IITDatabase* | pITDB | Pointer to database associated with * full-text index * @parm LPCWSTR | lpszIndexMoniker | Name of full-text index to open. * If index resides outside database (as a file), this should include * the full path to the index. * @parm BOOL | fInside | If TRUE, index resides inside of database; * otherwise, index resides outside of database. * * @rvalue S_OK | The index was successfully opened ********************************************************************/ STDMETHODIMP CITIndexLocal::Open(IITDatabase* pITDB, LPCWSTR lpszIndexMoniker, BOOL fInside) { HFPB hfpb = NULL; HRESULT hr; INDEXINFO indexinfo; char szFileName[_MAX_PATH + 1] = SZ_FI_STREAM_A; if (m_idx) return (SetErrReturn(E_ALREADYINIT)); // We have to have a database for charmap (and stoplist and // operator table eventually) if (NULL == pITDB || NULL == lpszIndexMoniker) return (SetErrReturn(E_INVALIDARG)); m_cs.Lock(); // if index is inside storage, need to get hfpb if (fInside) { WCHAR rgwch[1]; IStorage *pStorageDBRoot = NULL; // Get root storage from database. rgwch[0] = (WCHAR) NULL; if (FAILED(hr = pITDB->GetObjectPersistence(rgwch, IITDB_OBJINST_NULL, (LPVOID *)&pStorageDBRoot, FALSE)) || (hfpb = (HFPB)FpbFromHfs(pStorageDBRoot, &hr)) == NULL) { if (pStorageDBRoot != NULL) pStorageDBRoot->Release(); m_cs.Unlock(); return (hr); } } // TODO: make MVIndexOpen take Unicode file name. This might take a little // work because it depends on FileOpen, which has a call to one of the Fm // functions... DWORD dwSize = (DWORD) STRLEN(szFileName); WideCharToMultiByte(CP_ACP, 0, lpszIndexMoniker, -1, szFileName + dwSize, _MAX_PATH + 1 - dwSize, NULL, NULL); if (NULL == (m_idx = MVIndexOpen(hfpb, (LSZ) szFileName, &hr))) goto cleanup; MVGetIndexInfoLpidx(m_idx, &indexinfo); if (SUCCEEDED(hr = pITDB->GetObject(indexinfo.dwBreakerInstID, IID_IWordBreaker, (LPVOID *) &m_piwbrk))) { BOOL fLicense; hr = m_piwbrk->Init(TRUE, CB_MAX_WORD_LEN, &fLicense); } if (FAILED(hr)) goto cleanup; // Open catalog object - we only need one instance // TODO (evaluate): how bad of hit is this going to be? hr = CoCreateInstance(CLSID_IITCatalogLocal, NULL, CLSCTX_INPROC_SERVER, IID_IITCatalog, (VOID **) &m_pCatalog); if (FAILED(hr)) goto cleanup; // if it fails, there is no catalog which we can run without. if (FAILED(m_pCatalog->Open(pITDB))) { m_pCatalog->Release(); m_pCatalog = NULL; } cleanup: if (FAILED(hr)) Close(); // If we have an HFPB for the DB's root storage, we need to release the // storage pointer and free the HFPB. FileClose takes care of everything. if (hfpb) FileClose(hfpb); m_cs.Unlock(); return hr; } /******************************************************************** * @method STDMETHODIMP | IITIndex | CreateQueryInstance | * Creates a query object * * @parm IITQuery** | ppITQuery | Indirect pointer to query object * * @rvalue S_OK | The query object was successfully returned * ********************************************************************/ STDMETHODIMP CITIndexLocal::CreateQueryInstance(IITQuery** ppITQuery) { // TODO: possible optimization in case where user specifies multiple // query objects... get class factory pointer once; then call CreateInstance // Free CF when all done w/ index object. return CoCreateInstance(CLSID_IITQuery, NULL, CLSCTX_INPROC_SERVER, IID_IITQuery, (VOID **) ppITQuery); } /******************************************************************** * @method STDMETHODIMP | IITIndex | Search | * Performs a full-text search on the open index, returning the * results in a result set object. * * @parm IITQuery* | pITQuery | Pointer to query object * @parm IITResultSet* | pITResult | Pointer to result set object * containing search results. Caller is responsible for initializing * the result set with the properties to be returned. * * @rvalue S_FALSE | The search was successful, but returned no hits. * @rvalue S_OK | The search was successfully performed. * @rvalue E_NOTOPEN | The index object is not open. * @rvalue E_INVALIDARG | One or both parameters is NULL. * @rvalue E_OUTOFMEMORY | There was not enough memory to perform this function. * @rvalue E_NULLQUERY | The query consisted of no terms, or is all stopwords. * @rvalue E_STOPWORD | A stopword was one of the terms in the query. * @rvalue E_* | An error occurred during the search. Check iterror.h for the possible error codes. * * @comm The caller is responsible for setting the proper options * through the query object before calling this function. ********************************************************************/ STDMETHODIMP CITIndexLocal::Search(IITQuery* pITQuery, IITResultSet* pITResult) { HRESULT hr; CITIndexObjBridge *pidxobr = NULL; LPQT pQueryTree = NULL; // Pointer to query tree LPHL pHitList = NULL; // Pointer to hit list IITGroup* piitGroup = NULL; _LPGROUP lpGroup; if (NULL == pITQuery || NULL == pITResult) return (SetErrReturn(E_INVALIDARG)); if (m_idx == NULL) return (SetErrReturn(E_NOTOPEN)); if ((pidxobr = new CITIndexObjBridge) != NULL) { pidxobr->AddRef(); hr = pidxobr->SetWordBreaker(m_piwbrk); } else hr = E_OUTOFMEMORY; if (SUCCEEDED(hr) && SUCCEEDED(hr = QueryParse(pITQuery, &pQueryTree, pidxobr))) { SRCHINFO SrchInfo; // Search parameters SrchInfo.dwMemAllowed = 0; pITQuery->GetResultCount((LONG &)SrchInfo.dwTopicCount); pITQuery->GetOptions(SrchInfo.Flag); SrchInfo.dwValue = 0; SrchInfo.dwTopicFullCalc = 0; SrchInfo.lpvIndexObjBridge = (LPVOID) pidxobr; pITQuery->GetGroup(&piitGroup); if (piitGroup) lpGroup = (_LPGROUP)piitGroup->GetLocalImageOfGroup(); else lpGroup = NULL; // Perform search pHitList = MVIndexSearch(m_idx, pQueryTree, &SrchInfo, lpGroup, &hr); // Massage hitlist into a result set. if (pHitList) { hr = HitListToResultSet(pHitList, pITResult, pidxobr); MVHitListDispose(pHitList); } } if (pQueryTree) MVQueryFree(pQueryTree); // We don't want to delete pidxobr if HitListToResultSet AddRef'ed it // so that the result set can hold onto a term string heap via pidxobr. if (pidxobr && pidxobr->Release() == 0) delete pidxobr; return hr; } /******************************************************************** * @method STDMETHODIMP | IITIndex | Search | * Performs a full-text search on the open index, returning the * results in a group object. * * @parm IITQuery* | pITQuery | Pointer to query object * @parm IITGroup* | pITGroup | Pointer to group object. The caller * is responsible for initializing this object before passing it. * * @rvalue S_OK | The search was successfully performed * * @comm The caller is responsible for setting the proper options * through the query object before calling this function. ********************************************************************/ STDMETHODIMP CITIndexLocal::Search(IITQuery* pITQuery, IITGroup* pITGroup) { HRESULT hr = S_OK; CITIndexObjBridge *pidxobr = NULL; LPQT pQueryTree = NULL; // Pointer to query tree LPHL pHitList = NULL; // Pointer to hit list if (NULL == pITQuery || NULL == pITGroup) return (SetErrReturn(E_INVALIDARG)); if (m_idx == NULL) return (SetErrReturn(E_NOTOPEN)); // TODO: MVIndexSearch would take IITGroup*, not _LPGROUP _LPGROUP lpGroup = (_LPGROUP) pITGroup->GetLocalImageOfGroup(); if ((pidxobr = new CITIndexObjBridge) != NULL) hr = pidxobr->SetWordBreaker(m_piwbrk); else hr = E_OUTOFMEMORY; if (SUCCEEDED(hr) && SUCCEEDED(hr = QueryParse(pITQuery, &pQueryTree, pidxobr))) { SRCHINFO SrchInfo; // Search parameters SrchInfo.dwMemAllowed = 0; pITQuery->GetResultCount((LONG &)SrchInfo.dwTopicCount); pITQuery->GetOptions(SrchInfo.Flag); SrchInfo.Flag |= QUERYRESULT_GROUPCREATE; SrchInfo.dwValue = 0; SrchInfo.dwTopicFullCalc = 0; SrchInfo.lpvIndexObjBridge = (LPVOID) pidxobr; // Perform search - if pHitList comes back NULL, we will return hr if (pHitList = MVIndexSearch(m_idx, pQueryTree, &SrchInfo, lpGroup, &hr)) MVHitListDispose(pHitList); } if (pQueryTree) MVQueryFree(pQueryTree); if (pidxobr) delete pidxobr; return hr; } // This is private - it encapsulates the query parsing needed // in all searches STDMETHODIMP CITIndexLocal::QueryParse(IITQuery* pITQuery, LPQT* pQueryTree, CITIndexObjBridge *pidxobr) { HRESULT hr = S_OK; EXBRKPM exbrkpm; PARSE_PARMS ParseParm; ITASSERT(pITQuery != NULL && pQueryTree != NULL && pidxobr != NULL); // Fill PARSE_PARMS structure DWORD dwFlags; pITQuery->GetOptions(dwFlags); if (dwFlags & QUERYRESULT_SKIPOCCINFO) m_fSkipOcc = TRUE; ParseParm.cDefOp = (WORD)(dwFlags & IMPLICIT_OR); ParseParm.wCompoundWord = (WORD)(dwFlags & COMPOUNDWORD_PHRASE); pITQuery->GetProximity(ParseParm.cProxDist); IITGroup* ITGroup; pITQuery->GetGroup(&ITGroup); if (ITGroup) { _LPGROUP lpGroup = (_LPGROUP) ITGroup->GetLocalImageOfGroup(); ParseParm.lpGroup = lpGroup; } else ParseParm.lpGroup = NULL; // Breaker bridge setup exbrkpm.lpvIndexObjBridge = (LPVOID)pidxobr; ParseParm.pexbrkpm = &exbrkpm; // TODO: provide the right stuff ParseParm.lpOpTab = NULL; LPSTR lpszQuery = NULL; // Pointer to query buffer DWORD cbQuery; // Query buffer's length DWORD dwCodePageID; LCID lcid; if (FAILED(GetLocaleInfo(&dwCodePageID, &lcid))) { ITASSERT(FALSE); dwCodePageID = CP_ACP; } LPCWSTR lpszwQuery; pITQuery->GetCommand(lpszwQuery); if (NULL == lpszwQuery) return E_NULLQUERY; // Query comes in as Unicode, but the FTI still uses MBCS. cbQuery = WideCharToMultiByte (dwCodePageID, 0, lpszwQuery, -1, NULL, 0, NULL, NULL); if ((lpszQuery = new char[cbQuery]) != NULL) { WideCharToMultiByte(dwCodePageID, 0, lpszwQuery, -1, lpszQuery, cbQuery, NULL, NULL); ParseParm.cbQuery = cbQuery - 1; ParseParm.lpbQuery = (const char*) lpszQuery; } else hr = E_OUTOFMEMORY; // Parse query if (SUCCEEDED(hr)) { FCALLBACK_MSG fcbkmsg; *pQueryTree = MVQueryParse (&ParseParm, &hr); if (SUCCEEDED(hr) && SUCCEEDED(pITQuery->GetResultCallback(&fcbkmsg))) MVSearchSetCallback(*pQueryTree, &fcbkmsg); } if (lpszQuery) delete lpszQuery; return hr; } /******************************************************************** * @method STDMETHODIMP | IITIndex | Close | * Closes the full-text index. * * @rvalue S_OK | The index was successfully closed * ********************************************************************/ STDMETHODIMP CITIndexLocal::Close() { m_cs.Lock(); if (m_idx) { MVIndexClose(m_idx); m_idx = NULL; } if (m_pCatalog) { m_pCatalog->Close(); m_pCatalog->Release(); m_pCatalog = NULL; } if (m_piwbrk != NULL) { m_piwbrk->Release(); m_piwbrk = NULL; } m_cs.Unlock(); return S_OK; } /******************************************************************** * @method STDMETHODIMP | IITIndex | GetLocaleInfo | * Gets locale info that the full text index was built with. * @parm DWORD* | pdwCodePageID | On exit, pointer to code page ID. * @parm LCID* | plcid | On exit, pointer to locale ID. * * @rvalue S_OK | The locale info was successfully retrieved. * ********************************************************************/ STDMETHODIMP CITIndexLocal::GetLocaleInfo(DWORD *pdwCodePageID, LCID *plcid) { INDEXINFO indexinfo; if (pdwCodePageID == NULL || plcid == NULL) return (SetErrReturn(E_POINTER)); if (m_idx == NULL) return (SetErrReturn(E_NOTOPEN)); MVGetIndexInfoLpidx(m_idx, &indexinfo); *pdwCodePageID = indexinfo.dwCodePageID; *plcid = indexinfo.lcid; return (S_OK); } /******************************************************************** * @method STDMETHODIMP | IITIndex | GetWordBreakerInstance | * Gets the ID of the word breaker instance that the full text * index was built with. * @parm DWORD* | pdwObjInstance | On exit, pointer to word breaker instance. * * @rvalue S_OK | The word breaker instance ID was successfully retrieved. * ********************************************************************/ STDMETHODIMP CITIndexLocal::GetWordBreakerInstance(DWORD *pdwObjInstance) { INDEXINFO indexinfo; if (pdwObjInstance == NULL) return (SetErrReturn(E_POINTER)); if (m_idx == NULL) return (SetErrReturn(E_NOTOPEN)); MVGetIndexInfoLpidx(m_idx, &indexinfo); *pdwObjInstance = indexinfo.dwBreakerInstID; return (S_OK); } // Private function - passed as a parameter by // CITIndexLocal::HitListToResultSet. SCODE __stdcall FreeRSColumnHeap(LPVOID lpvIndexObjBridge) { CITIndexObjBridge *pidxobr; if (lpvIndexObjBridge == NULL) return (SetErrReturn(E_POINTER)); pidxobr = (CITIndexObjBridge *) lpvIndexObjBridge; pidxobr->Release(); delete pidxobr; return (S_OK); } // Private function - one grand hack to provide a result set from a hit list STDMETHODIMP CITIndexLocal::HitListToResultSet(LPHL pHitList, IITResultSet* pRS, CITIndexObjBridge *pidxobr) { DWORD cEntry; // Number of entries HIT HitInfo; TOPICINFO TopicInfo; LONG lColumnUID = -1; LONG lColumnOccInfo[5]; DWORD iTopic, iHit, iColumn; // Loop indices LONG lRow = 0; HRESULT hr; ITASSERT(pRS != NULL && pidxobr != NULL); // Number of entries in hit list - if 0, just return FALSE if (0 == (cEntry = MVHitListEntries(pHitList))) return S_FALSE; hr = pRS->GetColumnFromPropID(STDPROP_UID, lColumnUID); if (!m_fSkipOcc) { for (iColumn = 0; iColumn < 5; iColumn++) lColumnOccInfo[iColumn] = -1; pRS->GetColumnFromPropID(STDPROP_FIELD, lColumnOccInfo[0]); pRS->GetColumnFromPropID(STDPROP_LENGTH, lColumnOccInfo[1]); pRS->GetColumnFromPropID(STDPROP_COUNT, lColumnOccInfo[2]); pRS->GetColumnFromPropID(STDPROP_OFFSET, lColumnOccInfo[3]); pRS->GetColumnFromPropID(STDPROP_TERM_UNICODE_ST, lColumnOccInfo[4]); } // Loop over all the topics in the hit list for (iTopic = 0; iTopic < cEntry; iTopic++) { hr = MVHitListGetTopic(pHitList, iTopic, &TopicInfo); if (FAILED(hr)) return hr; // or do we continue? if (m_fSkipOcc) { // No occurrence info if (-1 != lColumnUID) pRS->Set(lRow, lColumnUID, TopicInfo.dwTopicId); lRow++; } else { // Requested occurence info, so loop // over all the hits in this topic for (iHit = 0; iHit < TopicInfo.lcHits; iHit++) { if (-1 != lColumnUID) pRS->Set(lRow, lColumnUID, TopicInfo.dwTopicId); hr = MVHitListGetHit(pHitList, &TopicInfo, iHit, &HitInfo); if (FAILED(hr)) continue; if (-1 != lColumnOccInfo[0]) pRS->Set(lRow, lColumnOccInfo[0], HitInfo.dwFieldId); if (-1 != lColumnOccInfo[1]) pRS->Set(lRow, lColumnOccInfo[1], HitInfo.dwLength); if (-1 != lColumnOccInfo[2]) pRS->Set(lRow, lColumnOccInfo[2], HitInfo.dwCount); if (-1 != lColumnOccInfo[3]) pRS->Set(lRow, lColumnOccInfo[3], HitInfo.dwOffset); if (-1 != lColumnOccInfo[4]) pRS->Set(lRow, lColumnOccInfo[4], (DWORD_PTR) HitInfo.lpvTerm); lRow++; } } } // Fill in rest of properties from catalog (like IITWordWheel::GetData) if (m_pCatalog) { hr = m_pCatalog->Lookup(pRS); if (S_FALSE == hr) hr = S_OK; // don't report S_FALSE } // If the caller requested Unicode term STs, then we need to give the result // set the string heap and adjust the string lengths in the heap. Otherwise, // we will just let the heap get freed whenever pidxobr gets deleted. if (-1 != lColumnOccInfo[4]) { pidxobr->AdjustQueryResultTerms(); pRS->SetColumnHeap(lColumnOccInfo[4], (LPVOID) pidxobr, FreeRSColumnHeap); // Tell our caller not to delete pidxobr because the result set is // holding onto it. pidxobr->AddRef(); } return S_OK; } // Need to export these without decoration to the linker so they can be called // from the old .c files. extern "C" { PUBLIC HRESULT EXPORT_API FAR PASCAL ExtBreakText(PEXBRKPM pexbrkpm) { CITIndexObjBridge *pidxobr; if (pexbrkpm == NULL || pexbrkpm->lpvIndexObjBridge == NULL) return (SetErrReturn(E_POINTER)); pidxobr = (CITIndexObjBridge *) pexbrkpm->lpvIndexObjBridge; return (pidxobr->BreakText(pexbrkpm)); } PUBLIC HRESULT EXPORT_API FAR PASCAL ExtStemWord(LPVOID lpvIndexObjBridge, LPBYTE lpbStemWord, LPBYTE lpbRawWord) { CITIndexObjBridge *pidxobr; if (lpvIndexObjBridge == NULL || lpbStemWord == NULL || lpbRawWord == NULL) return (SetErrReturn(E_POINTER)); pidxobr = (CITIndexObjBridge *) lpvIndexObjBridge; return (pidxobr->StemWord(lpbStemWord, lpbRawWord)); } PUBLIC HRESULT EXPORT_API FAR PASCAL ExtLookupStopWord(LPVOID lpvIndexObjBridge, LPBYTE lpbStopWord) { CITIndexObjBridge *pidxobr; if (lpvIndexObjBridge == NULL || lpbStopWord == NULL) return (SetErrReturn(E_POINTER)); pidxobr = (CITIndexObjBridge *) lpvIndexObjBridge; return (pidxobr->LookupStopWord(lpbStopWord)); } PUBLIC HRESULT EXPORT_API FAR PASCAL ExtAddQueryResultTerm(LPVOID lpvIndexObjBridge, LPBYTE lpbTermHit, LPVOID *ppvTermHit) { CITIndexObjBridge *pidxobr; if (lpvIndexObjBridge == NULL || lpbTermHit == NULL || ppvTermHit == NULL) return (SetErrReturn(E_POINTER)); pidxobr = (CITIndexObjBridge *) lpvIndexObjBridge; return (pidxobr->AddQueryResultTerm(lpbTermHit, ppvTermHit)); } } // End extern "C"