/* * Copyright (C) 1997 - 1998, Microsoft Corporation. All Rights Reserved. * * STEMKOR.H - API entry header file for Korean Stemmer API * * See korstem.doc for details * Date - 1996 Jan. made by dhyu */ #ifndef STEMKOR_H #define STEMKOR_H typedef ULONG_PTR HSTM; /* Stemmer ID type */ typedef unsigned int UINT; typedef unsigned int SRC; /* stemmer return code */ typedef unsigned short USHORT; /* Major Options */ #define SO_NOUNPHRASE 0x00000001 #define SO_PREDICATE 0x00000002 #define SO_ALONE 0x00000004 #define SO_AUXILIARY 0x0000000a /* SO_PREDICATE | 0x00000008 */ #define SO_COMPOUND 0x00000011 /* SO_NOUNPHRASE | 0x00000010 */ #define SO_SUFFIX 0x00000021 /* SO_NOUNPHRASE | 0x00000020 */ /* Minor Options : If major options are not set, this options don't have no meaning. Some minor options also can be inserted anytime. /* If SO_NOUNPHRASE is not defined, the following four have no meaning. */ #define SO_NP_NOUN 0x00000100 #define SO_NP_PRONOUN 0x00000200 #define SO_NP_NUMBER 0x00000400 #define SO_NP_DEPENDENT 0x00000800 #define SO_NP_PROPER 0x00001000 /* If SO_SUFFIX is not define, the following have no meaning. In future, thease can be inserted. I don't know which suffix is inserted yet. */ #define SO_SUFFIX_JEOG 0x00002000 typedef struct tagDecomposeOutBuffer { LPSTR wordlist; /* pointer to the result format : word\0word_info\0word\0word_info\0 ... */ unsigned short num; /* the number of saperated words */ unsigned short sch; /* total space of chars in wordlist application should assign this value */ unsigned short len; /* returned byte contains the result */ }DOB; typedef struct tagDecomposeOutBufferW { LPWSTR wordlist; unsigned short num; unsigned short sch; unsigned short len; }WDOB; typedef DOB * LPDOB; typedef WDOB * LPWDOB; typedef struct tagComposeInputBuffer { LPSTR silsa; LPSTR heosa; WORD pos; }CIB; typedef struct tagComposeInputBufferW { LPWSTR silsa; LPWSTR heosa; WORD pos; }WCIB; #ifdef _UNICODE #define LPTDOB LPWDOB #define TCIB WCIB #define TDOB WDOB #else #define LPTDOB LPDOB #define TCIB CIB #define TDOB DOB #endif typedef WORD FAR PASCAL FNDECOMPOSE (LPDOB); typedef FNDECOMPOSE FAR *LPFNDECOMPOSE; typedef WORD FAR PASCAL FNDECOMPOSEW (LPWDOB); typedef FNDECOMPOSEW FAR *LPFNDECOMPOSEW; // Word Info : two byte /* Word Info : most left 4 bits of high byte */ #define wtINVALID 0xffff #define wtSilsa 0x8000 #define wtHeosa 0x0000 /* general POS (a part of speech) info : right 4 bits of high byte */ #define POS_NOUN 0x0100 #define POS_VERB 0x0200 #define POS_ADJECTIVE 0x0300 #define POS_PRONOUN 0x0400 #define POS_TOSSI 0x0500 #define POS_ENDING 0x0600 #define POS_ADVERB 0x0700 #define POS_SUFFIX 0x0800 #define POS_AUXVERB 0x0900 #define POS_AUXADJ 0x0a00 #define POS_SPECIFIER 0x0b00 #define POS_NUMBER 0x0c00 #define POS_PREFIX 0x0d00 #define POS_OTHERS 0x0f00 /* low byte : more detail POS info --- more word infos will be inserted in the near future */ #define DEOL_SUFFIX 0x0001 #define COPULA_OTHERS 0x0002 #define PROPER_NOUN 0x0003 /* Flag define for StemmerIsEnding */ #define IS_ENDING 0x0001 #define IS_TOSSI 0x0002 /* return code : Low Byte SRC */ #define srcOOM 1 #define srcInvalid 2 /* Unknown word */ #define srcModuleError 3 /* Something wrong with parameters, or state of stemmer module */ #define srcIOErrorMdr 4 #define srcIOErrorUdr 5 #define srcNoMoreResult 6 #define srcComposeError 7 /* Minor Error Codes. Not set unless major code also set. */ /* High Byte of SRC word var. */ #define srcModuleAlreadyBusy (128<<16) /* For non-reentrant code */ #define srcInvalidID (129<<16) /* Not yet inited or already terminated.*/ #define srcExcessBuffer (130<<16) /* return buffer size is smaller than needed */ #define srcInvalidMdr (131<<16) /* Mdr not registered with spell session */ #define srcInvalidUdr (132<<16) /* Udr not registered with spell session */ #define srcInvalidMainDict (134<<16) /* Specified dictionary not correct format */ #define srcOperNotMatchedUserDict (135<<16) /* Illegal operation for user dictionary type. */ #define srcFileReadError (136<<16) /* Generic read error */ #define srcFileWriteError (137<<16) /* Generic write error */ #define srcFileCreateError (138<<16) /* Generic create error */ #define srcFileShareError (139<<16) /* Generic share error */ #define srcModuleNotTerminated (140<<16) /* Module not able to be terminated completely.*/ #define srcUserDictFull (141<<16) /* Could not update Udr without exceeding limit.*/ #define srcInvalidUdrEntry (142<<16) /* invalid chars in string(s) */ #define srcMdrCountExceeded (144<<16) /* Too many Mdr references */ #define srcUdrCountExceeded (145<<16) /* Too many udr references */ #define srcFileOpenError (146<<16) /* Generic Open error */ #define srcFileTooLargeError (147<<16) /* Generic file too large error */ #define srcUdrReadOnly (148<<16) /* Attempt to add to or write RO udr */ #define WINSRC SRC //------------------------- FUNCTION LIST ----------------------------------- extern WINSRC StemmerInit (HSTM *); extern WINSRC StemmerSetOption (HSTM, UINT); extern WINSRC StemmerGetOption (HSTM, UINT *); extern WINSRC StemmerDecompose (HSTM, LPCSTR, LPDOB); extern WINSRC StemmerDecomposeW (HSTM, LPCWSTR, LPWDOB); extern WINSRC StemmerDecomposeMore (HSTM, LPCSTR, LPDOB); extern WINSRC StemmerDecomposeMoreW (HSTM, LPCWSTR, LPWDOB); extern WINSRC StemmerEnumDecompose (HSTM, LPCSTR, LPDOB, LPFNDECOMPOSE); extern WINSRC StemmerEnumDecomposeW (HSTM, LPCWSTR, LPWDOB, LPFNDECOMPOSE); extern WINSRC StemmerCompose (HSTM, CIB, LPSTR); extern WINSRC StemmerComposeW (HSTM, WCIB, LPWSTR); extern WINSRC StemmerCompare (HSTM, LPCSTR, LPCSTR, LPSTR, LPSTR, LPSTR, WORD *); extern WINSRC StemmerCompareW (HSTM, LPCWSTR, LPCWSTR, LPWSTR, LPWSTR, LPWSTR, WORD *); extern WINSRC StemmerOpenMdr (HSTM, char *); extern WINSRC StemmerCloseMdr (HSTM); extern WINSRC StemmerTerminate (HSTM); extern WINSRC StemmerOpenUdr (HSTM, LPCSTR); extern WINSRC StemmerCloseUdr (HSTM); extern WINSRC StemmerIsEnding (HSTM, LPCSTR, UINT, BOOL *); extern WINSRC StemmerIsEndingW (HSTM, LPCWSTR, UINT, BOOL *); #define STEMMERKEY "SYSTEM\\currentcontrolset\\control\\ContentIndex\\Language\\Korean_Default" #define STEM_DICTIONARY "StemmerDictionary" BOOL StemInit(); #endif /* STEMKOR_H */