//+--------------------------------------------------------------------------- // // Microsoft Windows // Copyright (C) Microsoft Corporation, 1991 - 1995. // // File: stemmer.cxx // // Contents: NLG's FarEast Stemmer // // History: 01-July-1996 PatHal Created. // //---------------------------------------------------------------------------- #include "pch.cxx" #pragma hdrstop #include "stemmer.hxx" extern long gulcInstances; //+--------------------------------------------------------------------------- // // Member: CStemmer::CStemmer // // Synopsis: Constructor for the CStemmer class. // // Arguments: [lcid] -- locale id // //---------------------------------------------------------------------------- CStemmer::CStemmer( LCID lcid ) : _cRefs(1) { InterlockedIncrement( &gulcInstances ); } //+--------------------------------------------------------------------------- // // Member: CStemmer::~CStemmer // // Synopsis: Destructor for the CStemmer class. // // Notes: All termination/deallocation is done by embedded smart pointers // //---------------------------------------------------------------------------- CStemmer::~CStemmer() { InterlockedDecrement( &gulcInstances ); } //+------------------------------------------------------------------------- // // Method: CStemmer::QueryInterface // // Synopsis: Rebind to other interface // // Arguments: [riid] -- IID of new interface // [ppvObject] -- New interface * returned here // // Returns: S_OK if bind succeeded, E_NOINTERFACE if bind failed // //-------------------------------------------------------------------------- SCODE STDMETHODCALLTYPE CStemmer::QueryInterface( REFIID riid, void ** ppvObject) { IUnknown *pUnkTemp; SCODE sc = S_OK; switch( riid.Data1 & 0x000000FF ) { case 0x00: if ( riid == IID_IUnknown ) pUnkTemp = (IUnknown *)this; else sc = E_NOINTERFACE; break; case 0x40: if ( riid == IID_IStemmer ) pUnkTemp = (IUnknown *)(IStemmer *)this; else sc = E_NOINTERFACE; break; default: pUnkTemp = 0; sc = E_NOINTERFACE; break; } if( 0 != pUnkTemp ) { *ppvObject = (void * )pUnkTemp; pUnkTemp->AddRef(); } else *ppvObject = 0; return(sc); } //+------------------------------------------------------------------------- // // Method: CStemmer::AddRef // // Synopsis: Increments refcount // //-------------------------------------------------------------------------- ULONG STDMETHODCALLTYPE CStemmer::AddRef() { return InterlockedIncrement( &_cRefs ); } //+------------------------------------------------------------------------- // // Method: CStemmer::Release // // Synopsis: Decrement refcount. Delete if necessary. // //-------------------------------------------------------------------------- ULONG STDMETHODCALLTYPE CStemmer::Release() { unsigned long uTmp = InterlockedDecrement( &_cRefs ); if ( 0 == uTmp ) delete this; return(uTmp); } //+------------------------------------------------------------------------- // // Method: CStemmer::Init // // Synopsis: Initialize stemmer // // Arguments: [ulMaxTokenSize] -- Maximum size token stored by caller // [pfLicense] -- Set to true if use restricted // // Returns: Status code // //-------------------------------------------------------------------------- SCODE STDMETHODCALLTYPE CStemmer::Init( ULONG ulMaxTokenSize, BOOL *pfLicense ) { if (IsBadWritePtr(pfLicense, sizeof(DWORD))) { return E_FAIL; } *pfLicense = TRUE; _ulMaxTokenSize = ulMaxTokenSize; return S_OK; } //+--------------------------------------------------------------------------- // // Member: CStemmer::GetLicenseToUse // // Synopsis: Returns a pointer to vendors license information // // Arguments: [ppwcsLicense] -- ptr to ptr to which license info is returned // //---------------------------------------------------------------------------- SCODE STDMETHODCALLTYPE CStemmer::GetLicenseToUse( const WCHAR **ppwcsLicense ) { static WCHAR const * wcsCopyright = L"Copyright Microsoft, 1991-1995"; if (IsBadWritePtr(ppwcsLicense, sizeof(DWORD))) { return ( E_FAIL ); } *ppwcsLicense = wcsCopyright; return( S_OK ); } //+--------------------------------------------------------------------------- // // Member: CStemmer::StemWord // // Synopsis: Stem a word into its inflected forms, eg swim to swims and swimming // // Arguments: [pwcInBuf] -- input Unicode word // [cwc] -- count of characters in word // [pStemSink] -- sink to collect inflected forms // //---------------------------------------------------------------------------- SCODE STDMETHODCALLTYPE CStemmer::StemWord( WCHAR const *pwc, ULONG cwc, IStemSink *pStemSink ) { SCODE sc = S_OK; if ( 0 == pStemSink || 0 == pwc ) { return E_FAIL; } if ( 0 == cwc) { return S_OK; } CONST WCHAR *pwcStem; DWORD i; BYTE ct; BOOL fRomanWord = FALSE; __try { for ( i=1; i< ( cwc - 1 ); i++, pwc++) { ct = GetCharType(*pwc); if (ct == CH) { if (!fRomanWord) { pwcStem = pwc; fRomanWord = TRUE; } } else { if (fRomanWord) { (pStemSink->PutWord)( pwcStem, pwc - pwcStem ); fRomanWord = FALSE; } else { switch (ct) { case PS: case WS: break; default: (pStemSink->PutWord)( pwc, 2 ); break; } } } } // put the last English word if (fRomanWord) { (pStemSink->PutWord)( pwcStem, pwc - pwcStem ); fRomanWord = FALSE; } // output inflected words to stemmer sink in EnumInflections callback } __except (1) { sc = E_UNEXPECTED; } return sc; }