// STDBRKR.H: Definition of CITStdBreaker breaker object implementation. #ifndef __STDBRKR_H__ #define __STDBRKR_H__ #include #include #include "verinfo.h" #define VERSION_STDBRKR (MAKELONG(MAKEWORD(0, rapFile), MAKEWORD(rmmFile, rmjFile))) // Group of flags that indicate what data has been persisted to the // breaker's stream. #define ITSTDBRK_PERSISTED_BRKCTL 0x00000001 #define ITSTDBRK_PERSISTED_CHARTABLE 0x00000002 #define ITSTDBRK_PERSISTED_STOPWORDLIST 0x00000004 #define ITSTDBRK_PERSISTED_STEMMER 0x00000008 // Max number of stop words allowed. #define ITSTDBRK_STOPHASH_SIZE 211 // A good prime number for supporting // up to about 2000 stop words. // Breaker control structure that contains information that can // vary how text words are interpreted and broken. typedef struct _brkctl { DWORD dwCodePageID; LCID lcid; DWORD dwBreakWordType; DWORD grfBreakFlags; } BRKCTL; // Word callback function param struct that is passed to StdBreakerWordFunc, // which wraps the IWordSink implementation as far as the internal // word breaking functions are concerned. typedef struct _wrdfnpm { PIWRDSNK piwrdsnk; DWORD dwCodePageID; HGLOBAL hmemUnicode; DWORD cbBufUnicodeCur; LPBYTE lpbBuf; // MBCS text buffer. } WRDFNPM; class CITStdBreaker : public IWordBreaker, public IWordBreakerConfig, public IPersistStreamInit, public IITStopWordList, public CComObjectRootEx, public CComCoClass { public: CITStdBreaker(); virtual ~CITStdBreaker(); BEGIN_COM_MAP(CITStdBreaker) COM_INTERFACE_ENTRY(IWordBreaker) COM_INTERFACE_ENTRY(IWordBreakerConfig) COM_INTERFACE_ENTRY(IPersistStreamInit) COM_INTERFACE_ENTRY(IITStopWordList) END_COM_MAP() DECLARE_REGISTRY(CLSID_ITStdBreaker, "ITIR.StdWordBreaker.4", "ITIR.StdWordBreaker", 0, THREADFLAGS_BOTH ) // IWordBreaker methods STDMETHOD(Init)(BOOL fQuery, ULONG ulMaxTokenSize, BOOL *pfLicense); STDMETHOD(BreakText)(TEXT_SOURCE *pTextSource, IWordSink *pWordSink, IPhraseSink *pPhraseSink); STDMETHOD(ComposePhrase)(WCHAR const *pwcNoun, ULONG cwcNoun, WCHAR const *pwcModifier, ULONG cwcModifier, ULONG ulAttachmentType, WCHAR *pwcPhrase, ULONG *pcwcPhrase); STDMETHOD(GetLicenseToUse)(WCHAR const **ppwcsLicense); // IWordBreakerConfig methods STDMETHOD(SetLocaleInfo)(DWORD dwCodePageID, LCID lcid); STDMETHOD(GetLocaleInfo)(DWORD *pdwCodePageID, LCID *plcid); STDMETHOD(SetBreakWordType)(DWORD dwBreakWordType); STDMETHOD(GetBreakWordType)(DWORD *pdwBreakWordType); STDMETHOD(SetControlInfo)(DWORD grfBreakFlags, DWORD dwReserved); STDMETHOD(GetControlInfo)(DWORD *pgrfBreakFlags, DWORD *pdwReserved); STDMETHOD(LoadExternalBreakerData)(IStream *pStream, DWORD dwExtDataType); STDMETHOD(SetWordStemmer)(REFCLSID rclsid, IStemmer *pStemmer); STDMETHOD(GetWordStemmer)(IStemmer **ppStemmer); // IITStopWordList methods. STDMETHOD(AddWord)(WCHAR const *pwcInBuf, ULONG cwc); STDMETHOD(LookupWord)(WCHAR const *pwcInBuf, ULONG cwc); // IPersistStreamInit methods STDMETHOD(GetClassID)(CLSID *pclsid); STDMETHOD(IsDirty)(void); STDMETHOD(Load)(IStream *pStream); STDMETHOD(Save)(IStream *pStream, BOOL fClearDirty); STDMETHOD(GetSizeMax)(ULARGE_INTEGER *pcbSizeMax); STDMETHOD(InitNew)(void); private: // Private methods HRESULT StopListOp(WCHAR const *pwcInBuf, ULONG cwc, BOOL fAddWord); HRESULT ReallocBuffer(HGLOBAL *phmemBuf, DWORD *cbBufCur, DWORD cbBufNew); void ClearMembers(void); void InitBrkCtl(void); void Close(void); // Private data members BOOL m_fInitialized; BOOL m_fDirty; BOOL m_fQueryContext; DWORD m_grfPersistedItems; BRKCTL m_brkctl; HGLOBAL m_hmemAnsi; DWORD m_cbBufAnsiCur; LPCTAB m_lpctab; LPSIPB m_lpsipb; PISTEM m_pistem; CLSID m_clsidStemmer; _ThreadModel::AutoCriticalSection m_cs; // Critical section obj. }; // Initial size of Ansi string buffers. #define cbAnsiBufInit 256 #endif // __STDBRKR_H__