windows-nt/Source/XPSP1/NT/enduser/stuff/hhdump/itwbrk.h
2020-09-26 16:20:57 +08:00

213 lines
6.5 KiB
C

// ITWBRK.H: (from Tripoli) IWordBreaker, IWordSink, IPhraseSink, IStem
// (from InfoTech) IWordBreakerConfig
// (from Tripoli and InfoTech) Supporting definitions.
#ifndef __ITWBRK_H__
#define __ITWBRK_H__
#include <comdef.h>
#include <itstem.h>
#ifndef __IPhraseSink_FWD_DEFINED__
#define __IPhraseSink_FWD_DEFINED__
typedef interface IPhraseSink IPhraseSink;
#endif /* __IPhraseSink_FWD_DEFINED__ */
#ifndef __IWordSink_FWD_DEFINED__
#define __IWordSink_FWD_DEFINED__
typedef interface IWordSink IWordSink;
#endif /* __IWordSink_FWD_DEFINED__ */
#ifndef __IWordBreaker_FWD_DEFINED__
#define __IWordBreaker_FWD_DEFINED__
typedef interface IWordBreaker IWordBreaker;
#endif /* __IWordBreaker_FWD_DEFINED__ */
#ifndef __IWordBreakerConfig_FWD_DEFINED__
#define __IWordBreakerConfig_FWD_DEFINED__
typedef interface IWordBreakerConfig IWordBreakerConfig;
#endif /* __IWordBreakerConfig_FWD_DEFINED__ */
#ifndef __IITStopWordList_FWD_DEFINED__
#define __IITStopWordList_FWD_DEFINED__
typedef interface IITStopWordList IITStopWordList;
#endif /* __IITStopWordList_FWD_DEFINED__ */
// Supporting definitions for IWordBreaker.
typedef struct tagTEXT_SOURCE TEXT_SOURCE;
typedef SCODE (__stdcall *PFNFILLTEXTBUFFER)(TEXT_SOURCE *pTextSource);
typedef struct tagTEXT_SOURCE
{
PFNFILLTEXTBUFFER pfnFillTextBuffer;
WCHAR *awcBuffer;
ULONG iEnd;
ULONG iCur;
} TEXT_SOURCE;
DECLARE_INTERFACE_(IWordBreaker, IUnknown)
{
STDMETHOD(Init)(BOOL fQuery, ULONG ulMaxTokenSize, BOOL *pfLicense) PURE;
STDMETHOD(BreakText)(TEXT_SOURCE *pTextSource, IWordSink *pWordSink,
IPhraseSink *pPhraseSink) PURE;
STDMETHOD(ComposePhrase)(WCHAR const *pwcNoun, ULONG cwcNoun,
WCHAR const *pwcModifier, ULONG cwcModifier,
ULONG ulAttachmentType, WCHAR *pwcPhrase,
ULONG *pcwcPhrase) PURE;
STDMETHOD(GetLicenseToUse)(WCHAR const **ppwcsLicense) PURE;
};
typedef IWordBreaker *PIWBRK;
// Break word types that can be passed to
// IWordBreakerConfig::SetBreakWordType.
#define IITWBC_BREAKTYPE_TEXT ((DWORD) 0)
#define IITWBC_BREAKTYPE_NUMBER ((DWORD) 1)
#define IITWBC_BREAKTYPE_DATE ((DWORD) 2)
#define IITWBC_BREAKTYPE_TIME ((DWORD) 3)
#define IITWBC_BREAKTYPE_EPOCH ((DWORD) 4)
// Breaker control flags that can be passed to
// IWordBreakerConfig::SetControlInfo.
#define IITWBC_BREAK_ACCEPT_WILDCARDS 0x00000001 // Interpret wildcard chars
// as such.
#define IITWBC_BREAK_AND_STEM 0x00000002 // Stem words after breaking
// them.
// External data types that can be passed to
// IWordBreakerConfig::LoadExternalBreakerData.
#define IITWBC_EXTDATA_CHARTABLE ((DWORD) 0)
#define IITWBC_EXTDATA_STOPWORDLIST ((DWORD) 1)
DECLARE_INTERFACE_(IWordBreakerConfig, IUnknown)
{
// Sets/gets locale info that will affect the word breaking
// behavior of IWordBreaker::BreakText.
// Returns S_OK if locale described by params is supported
// by the breaker object; E_INVALIDARG otherwise.
STDMETHOD(SetLocaleInfo)(DWORD dwCodePageID, LCID lcid) PURE;
STDMETHOD(GetLocaleInfo)(DWORD *pdwCodePageID, LCID *plcid) PURE;
// Sets/gets the type of words the breaker should expect
// to see in all subsequent calls to IWordBreaker::BreakText.
// Returns S_OK if the type is understood by the breaker
// object; E_INVALIDARG otherwise.
STDMETHOD(SetBreakWordType)(DWORD dwBreakWordType) PURE;
STDMETHOD(GetBreakWordType)(DWORD *pdwBreakWordType) PURE;
// Sets/gets info that controls certain aspects of word breaking.
// This method currently accepts only the following set of flags
// in grfBreakFlags:
// IITWBC_BREAK_ACCEPT_WILDCARDS
// IITWBC_BREAK_AND_STEM
// In the future, additional information may be passed in through
// dwReserved.
STDMETHOD(SetControlInfo)(DWORD grfBreakFlags, DWORD dwReserved) PURE;
STDMETHOD(GetControlInfo)(DWORD *pgrfBreakFlags, DWORD *pdwReserved) PURE;
// Will load external breaker data, such as a table containing
// char-by-char break information or a list of stop words.
// Although the format of the data in the stream is entirely
// implementation-specific, this interface does define a couple
// of general types for that data which can be passed in
// dwStreamDataType:
// IITWBC_EXTDATA_CHARTABLE
// IITWBC_EXTDATA_STOPWORDLIST
STDMETHOD(LoadExternalBreakerData)(IStream *pStream,
DWORD dwExtDataType) PURE;
// These methods allow a stemmer to be associated with the breaker. The
// breaker will take responsibility for calling
// IPersistStreamInit::Load/Save when it is loaded/saved if the stemmer
// supports that interface.
STDMETHOD(SetWordStemmer)(REFCLSID rclsid, IStemmer *pStemmer) PURE;
STDMETHOD(GetWordStemmer)(IStemmer **ppStemmer) PURE;
};
typedef IWordBreakerConfig *PIWBRKC;
// Supporting definitions for IWordSink.
typedef enum tagWORDREP_BREAK_TYPE
{
WORDREP_BREAK_EOW = 0,
WORDREP_BREAK_EOS = 1,
WORDREP_BREAK_EOP = 2,
WORDREP_BREAK_EOC = 3
} WORDREP_BREAK_TYPE;
DECLARE_INTERFACE_(IWordSink, IUnknown)
{
STDMETHOD(PutWord)(WCHAR const *pwcInBuf, ULONG cwc,
ULONG cwcSrcLen, ULONG cwcSrcPos) PURE;
STDMETHOD(PutAltWord)(WCHAR const *pwcInBuf, ULONG cwc,
ULONG cwcSrcLen, ULONG cwcSrcPos) PURE;
STDMETHOD(StartAltPhrase)(void) PURE;
STDMETHOD(EndAltPhrase)(void) PURE;
STDMETHOD(PutBreak)(WORDREP_BREAK_TYPE breakType) PURE;
};
typedef IWordSink *PIWRDSNK;
DECLARE_INTERFACE_(IPhraseSink, IUnknown)
{
STDMETHOD(PutSmallPhrase)(WCHAR const *pwcNoun, ULONG cwcNoun,
WCHAR const *pwcModifier,
ULONG cwcModifier,
ULONG ulAttachmentType) PURE;
STDMETHOD(PutPhrase)(WCHAR const *pwcPhrase, ULONG cwcPhrase) PURE;
};
typedef IPhraseSink *PIPHRSNK;
// Function or macro that can be used by a breaker implementation
// to pull characters from the caller's text source.
#ifdef __cplusplus
inline WCHAR WBreakGetWChar(TEXT_SOURCE *pTextSource )
{
if ( pTextSource->iCur == pTextSource->iEnd )
{
if ( FAILED(pTextSource->pfnFillTextBuffer( pTextSource ) ) )
return 0xFFFF; // UniCode EOF
}
return pTextSource->awcBuffer[pTextSource->iCur++];
};
#else
#define WBreakGetWChar( pTextSource )\
(pTextSource->iCur==pTextSource->iEnd)\
? (FAILED(pTextSource->pfnFillTextBuffer( pTextSource )) \
? 0xFFFF\
: pTextSource->awcBuffer[pTextSource->iCur++])\
: pTextSource->awcBuffer[pTextSource->iCur++]
#endif
DECLARE_INTERFACE_(IITStopWordList, IUnknown)
{
STDMETHOD(AddWord)(WCHAR const *pwcInBuf, ULONG cwc) PURE;
STDMETHOD(LookupWord)(WCHAR const *pwcInBuf, ULONG cwc) PURE;
};
typedef IITStopWordList *PIITSTWDL;
#endif // __ITWBRK_H__