windows-nt/Source/XPSP1/NT/inetsrv/intlwb/chs/thammer.h

//+--------------------------------------------------------------------------
//
//  Copyright (C) 1994, 1995, 1996 Microsoft Corporation.  All Rights Reserved.
//
//  File:       thammer.h
//
//  This include file defines 3 exported APIs and their callbacks that export
//  word-breaking functionality for non-spaced Asian languages (Japanese, Chinese)
//
//  Summary of exports:
//      EnumSelectionOffsets - This function returns the offsets for the
//          selection chunks as specified in the Selection Profile (set at compile-time)
//      EnumSummarizationOffsets - This function returns the offsets for the
//          prefix (if any), the stem, and bound morphemes (fuzokugo).
//      EnumStemOffsets - This function returns the offsets for the stem only.
//          Offsets corresponding to any prefix or postfix characters will not
//          be returned.
//
//  History:                    pathal          Created.
//              25-Jun-97       pathal          Add TH_ERROR_INIT_FAILED
//              05-Jul-97       pathal          Add EnumSentenceOffsets, etc.
//---------------------------------------------------------------------------

// Return errors: the following error codes can be returned from any of
// T-Hammer's exported APIs (EnumSelectionOffsets, EnumSummarizationOffsets,
// and EnumStemOffsets)
//

#define TH_ERROR_SUCCESS 0
#define TH_ERROR_NOHPBS 1
#define TH_ERROR_INVALID_INPUT 2
#define TH_ERROR_INVALID_CALLBACK 3
#define TH_ERROR_INIT_FAILED 4
#define TH_ERROR_NOT_IMPLEMENTED 5

// Offset delimiter: the following code is used to delimit the end of a list of
// token offsets returned to one of the Enum* callback routines.  This is not
// an error code.

#define TH_SELECTION_INVALID_OFFSET 0xFFFFFFFF

// TOKENIZE_MODE: Begin and End HPB Modes
//
// Begin and End HPB modes signify that a hard phrase break comes before the
// first character in the string and/or follows after the last character in the string
// If these flags are not set, then the default behavior of EnumTokens is to start
// enumerating tokens to the right of the leftmost HPB, which probably won't
// be at the first character (unless it is a punctuation symbol) and to conclude
// enumeration at the rightmost HPB, which likely will not be the true end of the
// string.  So, these flags in affect force HPBs at the 0th and nth offsets, where
// n is the number of characters in the input buffer
//
// WARNNIG: Since Tokenize operates in batch mode, it assumes that the
// start and end of the input buffer are HPBs. These flags are only used for
// EnumTokens
//
#define TOKENIZE_MODE_BEGIN_HPB             0x00000001
#define TOKENIZE_MODE_END_HPB             0x00000002

// Note on HPBs:  HPB = hard phrase break.
// HPBs are statistically determined from analyzing a tagged corpora.
// Roughly, they cor-respond to places where you csn break with 100%
// precision (=confidence). Mostly this is around punctuation characters
// and certain conspicuous [case markers | character type] bigrams.


// When the Hide Punctuation mode is set in the tokenize flag parameter
// T-Hammer strips punctuation out of the Stem Offsets and Summarization Offsets
// callback
//
#define TOKENIZE_MODE_HIDE_PUNCTUATION    0x00000004

//+--------------------------------------------------------------------------
//  Routine:    EnumSelectionOffsetsCallback
//
//  Synopsis: client-side callback that receives a list of offsets for selection chunks
//
//  Parameters:
//      pichOffsets - pointer to first element in an array of offsets into client
//          text buffer. NOTE: callback is not allowed to stash pichChunks for
//          later processing.  pichChunks will not persist between successive
//          callbacks.  If the callback wants to use the data pointed to by pich
//          it must copy it to its own store
//      cOffsets - number of offsets passed to client (always > 1)
//      lpData - client defined data
//
// Return:
//  TRUE - to abort token enumeration
//  FALSE - to continue
//---------------------------------------------------------------------------
// BOOL
// EnumSelectionOffsetsCallback (
//    IN CONST DWORD *pichOffsets,
//    IN DWORD cOffsets,
//    IN OUT LPARAM lpData);

typedef BOOL (CALLBACK * ENUM_SELECTION_OFFSETS_CALLBACK)(
    IN CONST DWORD *pichOffsets,
    IN CONST DWORD cOffsets,
    IN OUT LPARAM lpData);

//+--------------------------------------------------------------------------
//  Routine:    EnumSelectionOffsets
//
//  Synopsis:  This is the main entry point for tokenizing text.  Sends tokens,
//  which can either be offsets or zero delimited strings to callback.
//
//  Parameters:
//  pwszText - pointer to wide-character text buffer to be tokenized,
//  cchText - count of characters in text buffer,
//  fBeginEndHPBMode - flag describing the callback mode  (see above),
//  pcbEnumSelectionOffsets - pointer to callback procedure handling token
//     enumeration,
//  lpData - client defined data
//
//  Returns:
//      TH_ERROR_SUCCESS - if the call completed successfully
//      TH_ERROR_NOHPBS - if there were no HPBs
//      TH_ERROR_INVALID_INPUT - if the input buffer was bad
//      TH_ERROR_INVALID_CALLBACK - if the input callback was bad
//---------------------------------------------------------------------------
INT
APIENTRY
EnumSelectionOffsets(
    IN PCWSTR pwszText,
    IN DWORD cchText,
    IN DWORD fBeginEndHPBMode,
    IN ENUM_SELECTION_OFFSETS_CALLBACK pcbEnumSelectionOffsets,
    IN LPARAM lpData);

typedef INT (APIENTRY *LP_ENUM_SELECTION_OFFSETS)(
    IN PCWSTR pwszText,
    IN DWORD cchText,
    IN DWORD fBeginEndHPBMode,
    IN ENUM_SELECTION_OFFSETS_CALLBACK pcbEnumSelectionOffsets,
    IN LPARAM lpData);

//+--------------------------------------------------------------------------
//  Routine:    EnumSummarizationOffsetsCallback
//
//  Synopsis: client-side callback that receives a list of offsets for each stem
//      in the free morpheme (jiritsugo) phrase.   Last offset is always contains
//      the complete string of bound morphemes (fuzokugo).  For example,
//      for "kaisan shite nai sou desu", offsets are returned for "kaisan" and
//      "shite nai sou desu".  So, counting the first initial offset, there are three
//      offsets.
//
//  Parameters:
//      pichOffsets - pointer to first element in an array of offsets into client
//          text buffer. NOTE: callback is not allowed to stash pichOffsets for
//          later processing.  pichOffsets will not persist between successive
//          callbacks.  If the callback wants to use the data pointed to by pich
//          it must copy it to its own store
//      cOffsets - number of offsets passed to client (always > 1)
//      lpData - client defined data
//
// Return:
//  TRUE - to abort token enumeration
//  FALSE - to continue
//---------------------------------------------------------------------------
// BOOL
// EnumSummarizationOffsets (
//    IN CONST DWORD *pichOffsets,
//    IN DWORD cOffsets,
//    IN OUT LPARAM lpData);

typedef BOOL (CALLBACK * ENUM_SUMMARIZATION_OFFSETS_CALLBACK)(
    IN CONST DWORD *pichOffsets,
    IN CONST DWORD cOffsets,
    IN OUT LPARAM lpData);

typedef BOOL (CALLBACK * ENUM_SUMMARIZATION_OFFSETS_EX1_CALLBACK)(
    IN CONST DWORD *pichOffsets,
    IN CONST DWORD cOffsets,
    IN PCWSTR pwzPOS,
    IN PCWSTR pwzMCat,
    IN OUT LPARAM lpData);

//+--------------------------------------------------------------------------
//  Routine:    EnumSummarizationOffsets
//
//  Synopsis:  This is the entry point for returning offsets for tokens used
//  in summarization.   These tokens correspond to stems and bound morphemes
//  (fuzokugo) in the text.  A list of offsets (and a count) is sent to the
//  EnumSummarizationOffsets callback (see above)
//
//  Parameters:
//  pwszText - pointer to wide-character text buffer to be tokenized,
//  cchText - count of characters in text buffer,
//  fTokenizeMode - flag describing the callback mode  (see above),
//     pEnumTokOutputProc - pointer to callback procedure handling token
//     enumeration,
//  lpData - client defined data
//
//  Returns:
//      TH_ERROR_SUCCESS - if the call completed successfully
//      TH_ERROR_NOHPBS - if there were no HPBs
//      TH_ERROR_INVALID_INPUT - if the input buffer was bad
//      TH_ERROR_INVALID_CALLBACK - if the input callback was bad
//---------------------------------------------------------------------------
INT
APIENTRY
EnumSummarizationOffsets(
    IN PCWSTR pwszText,
    IN DWORD cchText,
    IN DWORD fBeginEndHPBMode,
    IN ENUM_SUMMARIZATION_OFFSETS_CALLBACK pcbEnumSummarizationOffsets,
    IN LPARAM lpData);

INT
APIENTRY
EnumSummarizationOffsetsEx1(
    IN PCWSTR pwszText,
    IN DWORD cchText,
    IN DWORD fBeginEndHPBMode,
    IN ENUM_SUMMARIZATION_OFFSETS_EX1_CALLBACK pcbEnumSummarizationOffsetsEx1,
    IN LPARAM lpData);

typedef INT (APIENTRY *LP_ENUM_SUMMARIZATION_OFFSETS)(
    IN PCWSTR pwszText,
    IN DWORD cchText,
    IN DWORD fBeginEndHPBMode,
    IN ENUM_SUMMARIZATION_OFFSETS_CALLBACK pcbEnumSummarizationOffsets,
    IN LPARAM lpData);

typedef INT (APIENTRY *LP_ENUM_SUMMARIZATION_OFFSETS_EX1)(
    IN PCWSTR pwszText,
    IN DWORD cchText,
    IN DWORD fBeginEndHPBMode,
    IN ENUM_SUMMARIZATION_OFFSETS_EX1_CALLBACK pcbEnumSummarizationOffsetsEx1,
    IN LPARAM lpData);

//+--------------------------------------------------------------------------
//  Routine:    EnumStemOffsetsCallback
//
//  Synopsis: client-side callback that receives a zero--terminated stem per SPB
//
//  Parameters:
//      pwszStem - zero terminated stem string
//      lpData - client defined data
//
// Return:
//  TRUE - to abort token enumeration
//  FALSE - to continue
//---------------------------------------------------------------------------
// BOOL
// EnumStemOffsetsCallback (
//    IN WCHAR *pwszStem,
//    IN OUT LPARAM lpData);

typedef BOOL (CALLBACK * ENUM_STEM_OFFSETS_CALLBACK)(
    IN CONST DWORD *pichOffsets,
    IN CONST DWORD cOffsets,
    IN OUT LPARAM lpData);

//+--------------------------------------------------------------------------
//  Routine:    EnumStemOffsets
//
//  Synopsis:  This is the entry point for tokenizing stems.  Sends offsets,
//  for stems to the EnumStemOffsets callback (see above)
//
//  Parameters:
//  pwszText - pointer to wide-character text buffer to be tokenized,
//  cchText - count of characters in text buffer,
//  fTokenizeMode - flag describing the callback mode  (see above),
//     pEnumTokOutputProc - pointer to callback procedure handling token
//     enumeration,
//  lpData - client defined data
//
//  Returns:
//      TH_ERROR_SUCCESS - if the call completed successfully
//      TH_ERROR_NOHPBS - if there were no HPBs
//      TH_ERROR_INVALID_INPUT - if the input buffer was bad
//      TH_ERROR_INVALID_CALLBACK - if the input callback was bad
//---------------------------------------------------------------------------
INT
APIENTRY
EnumStemOffsets(
    IN PCWSTR pwszText,
    IN DWORD cchText,
    IN DWORD fBeginEndHPBMode,
    IN ENUM_STEM_OFFSETS_CALLBACK pcbEnumStemOffsets,
    IN OUT DWORD *pcchTextProcessed,
    IN LPARAM lpData);

typedef INT (APIENTRY *LP_ENUM_STEM_OFFSETS)(
    IN PCWSTR pwszText,
    IN DWORD cchText,
    IN DWORD fBeginEndHPBMode,
    IN ENUM_STEM_OFFSETS_CALLBACK pcbEnumStemOffsets,
    IN OUT DWORD *pcchTextProcessed,
    IN LPARAM lpData);

//+--------------------------------------------------------------------------
//  Routine:    EnumStemInfoCallback
//
//  Synopsis: client-side callback that receives offsets and stem information
//
//  Parameters:
//      ichOffset - offset to first character in stem
//      cchLen - length of the stem
//      pwszPOS - string containing POS info
//      pwszMCat - string containing MCat info
//      pwszDictionaryForm - string containing Dictionary Form
//      lpData - client defined data
//
// Return:
//  TRUE - to abort token enumeration
//  FALSE - to continue
//---------------------------------------------------------------------------
// BOOL
// EnumStemInfoCallback (
//    IN CONST DWORD ichOffset,
//    IN CONST DWORD cchLen,
//    IN PCWSTR pwszPOS,
//    IN PCWSTR pwszMCat,
//    IN PCWSTR pwszDictionaryForm,
//    IN OUT LPARAM lpData);

typedef BOOL (CALLBACK * ENUM_STEM_INFO_CALLBACK)(
    IN CONST DWORD ichOffset,
    IN CONST DWORD cchLen,
    IN PCWSTR pwszPOS,
    IN PCWSTR pwszMCat,
    IN PCWSTR pwszDictionaryForm,
    IN OUT LPARAM lpData);

//+--------------------------------------------------------------------------
//  Routine:    EnumStemInfo
//
//  Synopsis:  Call this routine to get information about stems.
//  For example, if you want the dictionary form, part-of-speech or
//  MCat information for a stem, then this is the API for you
//
//  Parameters:
//  pwszText - pointer to wide-character text buffer to be tokenized,
//  cchText - count of characters in text buffer,
//  fTokenizeMode - flag describing the callback mode  (see above),
//  pcbEnumStemInfo - pointer to callback procedure handling stem info
//      enumeration
//  lpData - client defined data
//
//  Returns:
//      TH_ERROR_SUCCESS - if the call completed successfully
//      TH_ERROR_NOHPBS - if there were no HPBs
//      TH_ERROR_INVALID_INPUT - if the input buffer was bad
//      TH_ERROR_INVALID_CALLBACK - if the input callback was bad
//---------------------------------------------------------------------------
INT
APIENTRY
EnumStemInfo(
    IN PCWSTR pwszText,
    IN DWORD cchText,
    IN DWORD fBeginEndHPBMode,
    IN ENUM_STEM_INFO_CALLBACK pcbEnumStemInfo,
    IN OUT DWORD *pcchTextProcessed,
    IN LPARAM lpData);

typedef INT (APIENTRY *LP_ENUM_STEM_INFO)(
    IN PCWSTR pwszText,
    IN DWORD cchText,
    IN DWORD fBeginEndHPBMode,
    IN ENUM_STEM_INFO_CALLBACK pcbEnumStemInfo,
    IN OUT DWORD *pcchTextProcessed,
    IN LPARAM lpData);

//+--------------------------------------------------------------------------
//  Routine:    EnumSentenceOffsetsCallback
//
//  Synopsis: client-side callback that receives a list of offsets for sentence breaks
//
//  Parameters:
//      ichOffsetStart - offset to start of sentence
//      ichOffsetEnd - offset to end of sentence (includes terminating punctuation)
//      lpData - client defined data
//
// Return:
//  TRUE - to abort token enumeration
//  FALSE - to continue
//---------------------------------------------------------------------------
// BOOL
// EnumSentenceOffsetsCallback (
//    IN DWORD ichOffsetStart,
//    IN DWORD ichOffsetEnd,
//    IN OUT LPARAM lpData);

typedef BOOL (CALLBACK * ENUM_SENTENCE_OFFSETS_CALLBACK)(
    IN DWORD ichOffsetStart,
    IN DWORD ichOffsetEnd,
    IN OUT LPARAM lpData);

//+--------------------------------------------------------------------------
//  Routine:    EnumSentenceOffsets
//
//  Synopsis:  This is the main entry point for breaking sentences.
//      Sends offsets delimiting sentences to the callback.
//
//  Parameters:
//  pwszText - pointer to wide-character text buffer to be tokenized,
//  cchText - count of characters in text buffer,
//  fTokenizeMode - not used.  later this will be used to control how
//      partial sentences are handled.
//  pEnumSentenceOffsetsCallback - pointer to callback procedure handling offsets
//  lpData - client defined data
//
//  Returns:
//      TH_ERROR_SUCCESS - if the call completed successfully
//      TH_ERROR_NOHPBS - if there were no HPBs
//      TH_ERROR_INVALID_INPUT - if the input buffer was bad
//      TH_ERROR_INVALID_CALLBACK - if the input callback was bad
//---------------------------------------------------------------------------
INT
APIENTRY
EnumSentenceOffsets(
    IN PCWSTR pwszText,
    IN DWORD cchText,
    IN DWORD fTokenizeMode,
    IN ENUM_SENTENCE_OFFSETS_CALLBACK pcbEnumSentenceOffsets,
    IN LPARAM lpData);

typedef INT (APIENTRY *LP_ENUM_SENTENCE_OFFSETS)(
    IN PCWSTR pwszText,
    IN DWORD cchText,
    IN DWORD fTokenizeMode,
    IN ENUM_SENTENCE_OFFSETS_CALLBACK pcbEnumSentenceOffsets,
    IN LPARAM lpData);


//+--------------------------------------------------------------------------
//  Routine:    FEMorphCallback
//
//  Synopsis:  The callback that gets a text stream from T-Hammer.
//
//  Parameters:
//      pwszWMorphRecs - a pointer to wide character text stream,
//                   which contains mophological analyses of a given sentence
//      pvData - pointer to private data
//
//  Returns:
//      TRUE if no more analysis is needed
//---------------------------------------------------------------------------
// BOOL
// FEMorphCallback(
//    IN PWSTR pwszMorphRecs);

typedef BOOL (CALLBACK * FEMORPH_CALLBACK)(
    IN PWSTR pwszMorphRecs,
    IN VOID *pvData);

//+--------------------------------------------------------------------------
//  Routine:    FEMorph
//
//  Synopsis:  This is the entry point for NLPWIN morpheme analysis.
//  Sends a morpheme record string back to the lex callback in NLPWIN
//
//  Parameters:
//  pwszText - pointer to wide-character text buffer to be tokenized,
//  pcbFEMorphCB - pointer to callback procedure handling morph rec enumeration
//  pvData - pointer to private data
//
//  Returns:
//---------------------------------------------------------------------------
INT
APIENTRY
FEMorph(
    IN PCWSTR pwszText,
    IN FEMORPH_CALLBACK pcbFEMorphCB,
    IN VOID *pvData);

typedef INT (APIENTRY *LP_FEMORPH)(
    IN PCWSTR pwszText,
    IN FEMORPH_CALLBACK pcbFEMorphCB,
    IN VOID *pvData);
Add source files 2020-09-26 03:20:57 -05:00			`//+--------------------------------------------------------------------------`
			`//`
			`// Copyright (C) 1994, 1995, 1996 Microsoft Corporation. All Rights Reserved.`
			`//`
			`// File: thammer.h`
			`//`
			`// This include file defines 3 exported APIs and their callbacks that export`
			`// word-breaking functionality for non-spaced Asian languages (Japanese, Chinese)`
			`//`
			`// Summary of exports:`
			`// EnumSelectionOffsets - This function returns the offsets for the`
			`// selection chunks as specified in the Selection Profile (set at compile-time)`
			`// EnumSummarizationOffsets - This function returns the offsets for the`
			`// prefix (if any), the stem, and bound morphemes (fuzokugo).`
			`// EnumStemOffsets - This function returns the offsets for the stem only.`
			`// Offsets corresponding to any prefix or postfix characters will not`
			`// be returned.`
			`//`
			`// History: pathal Created.`
			`// 25-Jun-97 pathal Add TH_ERROR_INIT_FAILED`
			`// 05-Jul-97 pathal Add EnumSentenceOffsets, etc.`
			`//---------------------------------------------------------------------------`

			`// Return errors: the following error codes can be returned from any of`
			`// T-Hammer's exported APIs (EnumSelectionOffsets, EnumSummarizationOffsets,`
			`// and EnumStemOffsets)`
			`//`

			`#define TH_ERROR_SUCCESS 0`
			`#define TH_ERROR_NOHPBS 1`
			`#define TH_ERROR_INVALID_INPUT 2`
			`#define TH_ERROR_INVALID_CALLBACK 3`
			`#define TH_ERROR_INIT_FAILED 4`
			`#define TH_ERROR_NOT_IMPLEMENTED 5`

			`// Offset delimiter: the following code is used to delimit the end of a list of`
			`// token offsets returned to one of the Enum* callback routines. This is not`
			`// an error code.`

			`#define TH_SELECTION_INVALID_OFFSET 0xFFFFFFFF`

			`// TOKENIZE_MODE: Begin and End HPB Modes`
			`//`
			`// Begin and End HPB modes signify that a hard phrase break comes before the`
			`// first character in the string and/or follows after the last character in the string`
			`// If these flags are not set, then the default behavior of EnumTokens is to start`
			`// enumerating tokens to the right of the leftmost HPB, which probably won't`
			`// be at the first character (unless it is a punctuation symbol) and to conclude`
			`// enumeration at the rightmost HPB, which likely will not be the true end of the`
			`// string. So, these flags in affect force HPBs at the 0th and nth offsets, where`
			`// n is the number of characters in the input buffer`
			`//`
			`// WARNNIG: Since Tokenize operates in batch mode, it assumes that the`
			`// start and end of the input buffer are HPBs. These flags are only used for`
			`// EnumTokens`
			`//`
			`#define TOKENIZE_MODE_BEGIN_HPB 0x00000001`
			`#define TOKENIZE_MODE_END_HPB 0x00000002`

			`// Note on HPBs: HPB = hard phrase break.`
			`// HPBs are statistically determined from analyzing a tagged corpora.`
			`// Roughly, they cor-respond to places where you csn break with 100%`
			`// precision (=confidence). Mostly this is around punctuation characters`
			`// and certain conspicuous [case markers \| character type] bigrams.`


			`// When the Hide Punctuation mode is set in the tokenize flag parameter`
			`// T-Hammer strips punctuation out of the Stem Offsets and Summarization Offsets`
			`// callback`
			`//`
			`#define TOKENIZE_MODE_HIDE_PUNCTUATION 0x00000004`

			`//+--------------------------------------------------------------------------`
			`// Routine: EnumSelectionOffsetsCallback`
			`//`
			`// Synopsis: client-side callback that receives a list of offsets for selection chunks`
			`//`
			`// Parameters:`
			`// pichOffsets - pointer to first element in an array of offsets into client`
			`// text buffer. NOTE: callback is not allowed to stash pichChunks for`
			`// later processing. pichChunks will not persist between successive`
			`// callbacks. If the callback wants to use the data pointed to by pich`
			`// it must copy it to its own store`
			`// cOffsets - number of offsets passed to client (always > 1)`
			`// lpData - client defined data`
			`//`
			`// Return:`
			`// TRUE - to abort token enumeration`
			`// FALSE - to continue`
			`//---------------------------------------------------------------------------`
			`// BOOL`
			`// EnumSelectionOffsetsCallback (`
			`// IN CONST DWORD *pichOffsets,`
			`// IN DWORD cOffsets,`
			`// IN OUT LPARAM lpData);`

			`typedef BOOL (CALLBACK * ENUM_SELECTION_OFFSETS_CALLBACK)(`
			`IN CONST DWORD *pichOffsets,`
			`IN CONST DWORD cOffsets,`
			`IN OUT LPARAM lpData);`

			`//+--------------------------------------------------------------------------`
			`// Routine: EnumSelectionOffsets`
			`//`
			`// Synopsis: This is the main entry point for tokenizing text. Sends tokens,`
			`// which can either be offsets or zero delimited strings to callback.`
			`//`
			`// Parameters:`
			`// pwszText - pointer to wide-character text buffer to be tokenized,`
			`// cchText - count of characters in text buffer,`
			`// fBeginEndHPBMode - flag describing the callback mode (see above),`
			`// pcbEnumSelectionOffsets - pointer to callback procedure handling token`
			`// enumeration,`
			`// lpData - client defined data`
			`//`
			`// Returns:`
			`// TH_ERROR_SUCCESS - if the call completed successfully`
			`// TH_ERROR_NOHPBS - if there were no HPBs`
			`// TH_ERROR_INVALID_INPUT - if the input buffer was bad`
			`// TH_ERROR_INVALID_CALLBACK - if the input callback was bad`
			`//---------------------------------------------------------------------------`
			`INT`
			`APIENTRY`
			`EnumSelectionOffsets(`
			`IN PCWSTR pwszText,`
			`IN DWORD cchText,`
			`IN DWORD fBeginEndHPBMode,`
			`IN ENUM_SELECTION_OFFSETS_CALLBACK pcbEnumSelectionOffsets,`
			`IN LPARAM lpData);`

			`typedef INT (APIENTRY *LP_ENUM_SELECTION_OFFSETS)(`
			`IN PCWSTR pwszText,`
			`IN DWORD cchText,`
			`IN DWORD fBeginEndHPBMode,`
			`IN ENUM_SELECTION_OFFSETS_CALLBACK pcbEnumSelectionOffsets,`
			`IN LPARAM lpData);`

			`//+--------------------------------------------------------------------------`
			`// Routine: EnumSummarizationOffsetsCallback`
			`//`
			`// Synopsis: client-side callback that receives a list of offsets for each stem`
			`// in the free morpheme (jiritsugo) phrase. Last offset is always contains`
			`// the complete string of bound morphemes (fuzokugo). For example,`
			`// for "kaisan shite nai sou desu", offsets are returned for "kaisan" and`
			`// "shite nai sou desu". So, counting the first initial offset, there are three`
			`// offsets.`
			`//`
			`// Parameters:`
			`// pichOffsets - pointer to first element in an array of offsets into client`
			`// text buffer. NOTE: callback is not allowed to stash pichOffsets for`
			`// later processing. pichOffsets will not persist between successive`
			`// callbacks. If the callback wants to use the data pointed to by pich`
			`// it must copy it to its own store`
			`// cOffsets - number of offsets passed to client (always > 1)`
			`// lpData - client defined data`
			`//`
			`// Return:`
			`// TRUE - to abort token enumeration`
			`// FALSE - to continue`
			`//---------------------------------------------------------------------------`
			`// BOOL`
			`// EnumSummarizationOffsets (`
			`// IN CONST DWORD *pichOffsets,`
			`// IN DWORD cOffsets,`
			`// IN OUT LPARAM lpData);`

			`typedef BOOL (CALLBACK * ENUM_SUMMARIZATION_OFFSETS_CALLBACK)(`
			`IN CONST DWORD *pichOffsets,`
			`IN CONST DWORD cOffsets,`
			`IN OUT LPARAM lpData);`

			`typedef BOOL (CALLBACK * ENUM_SUMMARIZATION_OFFSETS_EX1_CALLBACK)(`
			`IN CONST DWORD *pichOffsets,`
			`IN CONST DWORD cOffsets,`
			`IN PCWSTR pwzPOS,`
			`IN PCWSTR pwzMCat,`
			`IN OUT LPARAM lpData);`

			`//+--------------------------------------------------------------------------`
			`// Routine: EnumSummarizationOffsets`
			`//`
			`// Synopsis: This is the entry point for returning offsets for tokens used`
			`// in summarization. These tokens correspond to stems and bound morphemes`
			`// (fuzokugo) in the text. A list of offsets (and a count) is sent to the`
			`// EnumSummarizationOffsets callback (see above)`
			`//`
			`// Parameters:`
			`// pwszText - pointer to wide-character text buffer to be tokenized,`
			`// cchText - count of characters in text buffer,`
			`// fTokenizeMode - flag describing the callback mode (see above),`
			`// pEnumTokOutputProc - pointer to callback procedure handling token`
			`// enumeration,`
			`// lpData - client defined data`
			`//`
			`// Returns:`
			`// TH_ERROR_SUCCESS - if the call completed successfully`
			`// TH_ERROR_NOHPBS - if there were no HPBs`
			`// TH_ERROR_INVALID_INPUT - if the input buffer was bad`
			`// TH_ERROR_INVALID_CALLBACK - if the input callback was bad`
			`//---------------------------------------------------------------------------`
			`INT`
			`APIENTRY`
			`EnumSummarizationOffsets(`
			`IN PCWSTR pwszText,`
			`IN DWORD cchText,`
			`IN DWORD fBeginEndHPBMode,`
			`IN ENUM_SUMMARIZATION_OFFSETS_CALLBACK pcbEnumSummarizationOffsets,`
			`IN LPARAM lpData);`

			`INT`
			`APIENTRY`
			`EnumSummarizationOffsetsEx1(`
			`IN PCWSTR pwszText,`
			`IN DWORD cchText,`
			`IN DWORD fBeginEndHPBMode,`
			`IN ENUM_SUMMARIZATION_OFFSETS_EX1_CALLBACK pcbEnumSummarizationOffsetsEx1,`
			`IN LPARAM lpData);`

			`typedef INT (APIENTRY *LP_ENUM_SUMMARIZATION_OFFSETS)(`
			`IN PCWSTR pwszText,`
			`IN DWORD cchText,`
			`IN DWORD fBeginEndHPBMode,`
			`IN ENUM_SUMMARIZATION_OFFSETS_CALLBACK pcbEnumSummarizationOffsets,`
			`IN LPARAM lpData);`

			`typedef INT (APIENTRY *LP_ENUM_SUMMARIZATION_OFFSETS_EX1)(`
			`IN PCWSTR pwszText,`
			`IN DWORD cchText,`
			`IN DWORD fBeginEndHPBMode,`
			`IN ENUM_SUMMARIZATION_OFFSETS_EX1_CALLBACK pcbEnumSummarizationOffsetsEx1,`
			`IN LPARAM lpData);`

			`//+--------------------------------------------------------------------------`
			`// Routine: EnumStemOffsetsCallback`
			`//`
			`// Synopsis: client-side callback that receives a zero--terminated stem per SPB`
			`//`
			`// Parameters:`
			`// pwszStem - zero terminated stem string`
			`// lpData - client defined data`
			`//`
			`// Return:`
			`// TRUE - to abort token enumeration`
			`// FALSE - to continue`
			`//---------------------------------------------------------------------------`
			`// BOOL`
			`// EnumStemOffsetsCallback (`
			`// IN WCHAR *pwszStem,`
			`// IN OUT LPARAM lpData);`

			`typedef BOOL (CALLBACK * ENUM_STEM_OFFSETS_CALLBACK)(`
			`IN CONST DWORD *pichOffsets,`
			`IN CONST DWORD cOffsets,`
			`IN OUT LPARAM lpData);`

			`//+--------------------------------------------------------------------------`
			`// Routine: EnumStemOffsets`
			`//`
			`// Synopsis: This is the entry point for tokenizing stems. Sends offsets,`
			`// for stems to the EnumStemOffsets callback (see above)`
			`//`
			`// Parameters:`
			`// pwszText - pointer to wide-character text buffer to be tokenized,`
			`// cchText - count of characters in text buffer,`
			`// fTokenizeMode - flag describing the callback mode (see above),`
			`// pEnumTokOutputProc - pointer to callback procedure handling token`
			`// enumeration,`
			`// lpData - client defined data`
			`//`
			`// Returns:`
			`// TH_ERROR_SUCCESS - if the call completed successfully`
			`// TH_ERROR_NOHPBS - if there were no HPBs`
			`// TH_ERROR_INVALID_INPUT - if the input buffer was bad`
			`// TH_ERROR_INVALID_CALLBACK - if the input callback was bad`
			`//---------------------------------------------------------------------------`
			`INT`
			`APIENTRY`
			`EnumStemOffsets(`
			`IN PCWSTR pwszText,`
			`IN DWORD cchText,`
			`IN DWORD fBeginEndHPBMode,`
			`IN ENUM_STEM_OFFSETS_CALLBACK pcbEnumStemOffsets,`
			`IN OUT DWORD *pcchTextProcessed,`
			`IN LPARAM lpData);`

			`typedef INT (APIENTRY *LP_ENUM_STEM_OFFSETS)(`
			`IN PCWSTR pwszText,`
			`IN DWORD cchText,`
			`IN DWORD fBeginEndHPBMode,`
			`IN ENUM_STEM_OFFSETS_CALLBACK pcbEnumStemOffsets,`
			`IN OUT DWORD *pcchTextProcessed,`
			`IN LPARAM lpData);`

			`//+--------------------------------------------------------------------------`
			`// Routine: EnumStemInfoCallback`
			`//`
			`// Synopsis: client-side callback that receives offsets and stem information`
			`//`
			`// Parameters:`
			`// ichOffset - offset to first character in stem`
			`// cchLen - length of the stem`
			`// pwszPOS - string containing POS info`
			`// pwszMCat - string containing MCat info`
			`// pwszDictionaryForm - string containing Dictionary Form`
			`// lpData - client defined data`
			`//`
			`// Return:`
			`// TRUE - to abort token enumeration`
			`// FALSE - to continue`
			`//---------------------------------------------------------------------------`
			`// BOOL`
			`// EnumStemInfoCallback (`
			`// IN CONST DWORD ichOffset,`
			`// IN CONST DWORD cchLen,`
			`// IN PCWSTR pwszPOS,`
			`// IN PCWSTR pwszMCat,`
			`// IN PCWSTR pwszDictionaryForm,`
			`// IN OUT LPARAM lpData);`

			`typedef BOOL (CALLBACK * ENUM_STEM_INFO_CALLBACK)(`
			`IN CONST DWORD ichOffset,`
			`IN CONST DWORD cchLen,`
			`IN PCWSTR pwszPOS,`
			`IN PCWSTR pwszMCat,`
			`IN PCWSTR pwszDictionaryForm,`
			`IN OUT LPARAM lpData);`

			`//+--------------------------------------------------------------------------`
			`// Routine: EnumStemInfo`
			`//`
			`// Synopsis: Call this routine to get information about stems.`
			`// For example, if you want the dictionary form, part-of-speech or`
			`// MCat information for a stem, then this is the API for you`
			`//`
			`// Parameters:`
			`// pwszText - pointer to wide-character text buffer to be tokenized,`
			`// cchText - count of characters in text buffer,`
			`// fTokenizeMode - flag describing the callback mode (see above),`
			`// pcbEnumStemInfo - pointer to callback procedure handling stem info`
			`// enumeration`
			`// lpData - client defined data`
			`//`
			`// Returns:`
			`// TH_ERROR_SUCCESS - if the call completed successfully`
			`// TH_ERROR_NOHPBS - if there were no HPBs`
			`// TH_ERROR_INVALID_INPUT - if the input buffer was bad`
			`// TH_ERROR_INVALID_CALLBACK - if the input callback was bad`
			`//---------------------------------------------------------------------------`
			`INT`
			`APIENTRY`
			`EnumStemInfo(`
			`IN PCWSTR pwszText,`
			`IN DWORD cchText,`
			`IN DWORD fBeginEndHPBMode,`
			`IN ENUM_STEM_INFO_CALLBACK pcbEnumStemInfo,`
			`IN OUT DWORD *pcchTextProcessed,`
			`IN LPARAM lpData);`

			`typedef INT (APIENTRY *LP_ENUM_STEM_INFO)(`
			`IN PCWSTR pwszText,`
			`IN DWORD cchText,`
			`IN DWORD fBeginEndHPBMode,`
			`IN ENUM_STEM_INFO_CALLBACK pcbEnumStemInfo,`
			`IN OUT DWORD *pcchTextProcessed,`
			`IN LPARAM lpData);`

			`//+--------------------------------------------------------------------------`
			`// Routine: EnumSentenceOffsetsCallback`
			`//`
			`// Synopsis: client-side callback that receives a list of offsets for sentence breaks`
			`//`
			`// Parameters:`
			`// ichOffsetStart - offset to start of sentence`
			`// ichOffsetEnd - offset to end of sentence (includes terminating punctuation)`
			`// lpData - client defined data`
			`//`
			`// Return:`
			`// TRUE - to abort token enumeration`
			`// FALSE - to continue`
			`//---------------------------------------------------------------------------`
			`// BOOL`
			`// EnumSentenceOffsetsCallback (`
			`// IN DWORD ichOffsetStart,`
			`// IN DWORD ichOffsetEnd,`
			`// IN OUT LPARAM lpData);`

			`typedef BOOL (CALLBACK * ENUM_SENTENCE_OFFSETS_CALLBACK)(`
			`IN DWORD ichOffsetStart,`
			`IN DWORD ichOffsetEnd,`
			`IN OUT LPARAM lpData);`

			`//+--------------------------------------------------------------------------`
			`// Routine: EnumSentenceOffsets`
			`//`
			`// Synopsis: This is the main entry point for breaking sentences.`
			`// Sends offsets delimiting sentences to the callback.`
			`//`
			`// Parameters:`
			`// pwszText - pointer to wide-character text buffer to be tokenized,`
			`// cchText - count of characters in text buffer,`
			`// fTokenizeMode - not used. later this will be used to control how`
			`// partial sentences are handled.`
			`// pEnumSentenceOffsetsCallback - pointer to callback procedure handling offsets`
			`// lpData - client defined data`
			`//`
			`// Returns:`
			`// TH_ERROR_SUCCESS - if the call completed successfully`
			`// TH_ERROR_NOHPBS - if there were no HPBs`
			`// TH_ERROR_INVALID_INPUT - if the input buffer was bad`
			`// TH_ERROR_INVALID_CALLBACK - if the input callback was bad`
			`//---------------------------------------------------------------------------`
			`INT`
			`APIENTRY`
			`EnumSentenceOffsets(`
			`IN PCWSTR pwszText,`
			`IN DWORD cchText,`
			`IN DWORD fTokenizeMode,`
			`IN ENUM_SENTENCE_OFFSETS_CALLBACK pcbEnumSentenceOffsets,`
			`IN LPARAM lpData);`

			`typedef INT (APIENTRY *LP_ENUM_SENTENCE_OFFSETS)(`
			`IN PCWSTR pwszText,`
			`IN DWORD cchText,`
			`IN DWORD fTokenizeMode,`
			`IN ENUM_SENTENCE_OFFSETS_CALLBACK pcbEnumSentenceOffsets,`
			`IN LPARAM lpData);`


			`//+--------------------------------------------------------------------------`
			`// Routine: FEMorphCallback`
			`//`
			`// Synopsis: The callback that gets a text stream from T-Hammer.`
			`//`
			`// Parameters:`
			`// pwszWMorphRecs - a pointer to wide character text stream,`
			`// which contains mophological analyses of a given sentence`
			`// pvData - pointer to private data`
			`//`
			`// Returns:`
			`// TRUE if no more analysis is needed`
			`//---------------------------------------------------------------------------`
			`// BOOL`
			`// FEMorphCallback(`
			`// IN PWSTR pwszMorphRecs);`

			`typedef BOOL (CALLBACK * FEMORPH_CALLBACK)(`
			`IN PWSTR pwszMorphRecs,`
			`IN VOID *pvData);`

			`//+--------------------------------------------------------------------------`
			`// Routine: FEMorph`
			`//`
			`// Synopsis: This is the entry point for NLPWIN morpheme analysis.`
			`// Sends a morpheme record string back to the lex callback in NLPWIN`
			`//`
			`// Parameters:`
			`// pwszText - pointer to wide-character text buffer to be tokenized,`
			`// pcbFEMorphCB - pointer to callback procedure handling morph rec enumeration`
			`// pvData - pointer to private data`
			`//`
			`// Returns:`
			`//---------------------------------------------------------------------------`
			`INT`
			`APIENTRY`
			`FEMorph(`
			`IN PCWSTR pwszText,`
			`IN FEMORPH_CALLBACK pcbFEMorphCB,`
			`IN VOID *pvData);`

			`typedef INT (APIENTRY *LP_FEMORPH)(`
			`IN PCWSTR pwszText,`
			`IN FEMORPH_CALLBACK pcbFEMorphCB,`
			`IN VOID *pvData);`