windows-nt/Source/XPSP1/NT/enduser/speech/tts/ms_entropic/stdsentenum.cpp

/*******************************************************************************
* StdSentEnum.cpp *
*-----------------*
*   Description:
*       This module is the main implementation file for the CStdSentEnum class.
*-------------------------------------------------------------------------------
*  Created By: EDC                                        Date: 03/19/99
*  Copyright (C) 1999 Microsoft Corporation
*  All Rights Reserved
*
*******************************************************************************/

//--- Additional includes
#include "stdafx.h"
#ifndef StdSentEnum_h
#include "stdsentenum.h"
#endif
#include "spttsengdebug.h"
#include "SpAutoObjectLock.h"

//--- Locals
CComAutoCriticalSection CStdSentEnum::m_AbbrevTableCritSec;

//=== CStdSentEnum ============================================================
//

/*****************************************************************************
* CStdSentEnum::InitPron *
*------------------------*
*   Description:
*       Inits pron tables
********************************************************************* AH ***/
HRESULT CStdSentEnum::InitPron( WCHAR** OriginalPron )
{
    HRESULT hr = S_OK;
    WCHAR *NewPron = NULL;

    NewPron = new WCHAR[ wcslen( *OriginalPron ) ];
    hr = m_cpPhonemeConverter->PhoneToId( *OriginalPron, NewPron );
    if ( SUCCEEDED( hr ) )
    {
        *OriginalPron = NewPron;
    }

    return hr;
} /* InitPron */

/*****************************************************************************
* CStdSentEnum::FinalConstruct *
*------------------------------*
*   Description:
*       Constructor
********************************************************************* EDC ***/
HRESULT CStdSentEnum::FinalConstruct()
{
    SPDBG_FUNC( "CStdSentEnum::FinalConstruct" );
    HRESULT hr = S_OK;
    m_dwSpeakFlags  = 0;
    m_pTextFragList = NULL;
    m_pMorphLexicon = NULL;
    m_fHaveNamesLTS = false;
    m_eSeparatorAndDecimal = COMMA_PERIOD;
    m_eShortDateOrder      = MONTH_DAY_YEAR;
    /*** Create phone converter ***/
    if ( SUCCEEDED( hr ) )
    {
        hr = SpCreatePhoneConverter( 1033, NULL, NULL, &m_cpPhonemeConverter );
        m_AbbrevTableCritSec.Lock();
        if ( !g_fAbbrevTablesInitialized )
        {
            for ( ULONG i = 0; SUCCEEDED( hr ) && i < sp_countof( g_AbbreviationTable ); i++ )
            {
                if ( g_AbbreviationTable[i].pPron1 )
                {
                    hr = InitPron( &g_AbbreviationTable[i].pPron1 );
                }
                if ( SUCCEEDED( hr ) &&
                     g_AbbreviationTable[i].pPron2 )
                {
                    hr = InitPron( &g_AbbreviationTable[i].pPron2 );
                }
                if ( SUCCEEDED( hr ) &&
                     g_AbbreviationTable[i].pPron3 )
                {
                    hr = InitPron( &g_AbbreviationTable[i].pPron3 );
                }
            }
            for ( i = 0; SUCCEEDED( hr ) && i < sp_countof( g_AmbiguousWordTable ); i++ )
            {
                if ( g_AmbiguousWordTable[i].pPron1 )
                {
                    hr = InitPron( &g_AmbiguousWordTable[i].pPron1 );
                }
                if ( SUCCEEDED( hr ) &&
                     g_AmbiguousWordTable[i].pPron2 )
                {
                    hr = InitPron( &g_AmbiguousWordTable[i].pPron2 );
                }
                if ( SUCCEEDED( hr ) &&
                     g_AmbiguousWordTable[i].pPron3 )
                {
                    hr = InitPron( &g_AmbiguousWordTable[i].pPron3 );
                }
            }
            for ( i = 0; SUCCEEDED( hr ) && i < sp_countof( g_PostLexLookupWordTable ); i++ )
            {
                if ( g_PostLexLookupWordTable[i].pPron1 )
                {
                    hr = InitPron( &g_PostLexLookupWordTable[i].pPron1 );
                }
                if ( SUCCEEDED( hr ) &&
                     g_PostLexLookupWordTable[i].pPron2 )
                {
                    hr = InitPron( &g_PostLexLookupWordTable[i].pPron2 );
                }
                if ( SUCCEEDED( hr ) &&
                     g_PostLexLookupWordTable[i].pPron3 )
                {
                    hr = InitPron( &g_PostLexLookupWordTable[i].pPron3 );
                }
            }
            if ( SUCCEEDED( hr ) )
            {
                hr = InitPron( &g_pOfA );
                if ( SUCCEEDED( hr ) )
                {
                    hr = InitPron( &g_pOfAn );
                }
            }
        }
        if ( SUCCEEDED( hr ) )
        {
            g_fAbbrevTablesInitialized = true;
        }
        m_AbbrevTableCritSec.Unlock();
    }

    return hr;
} /* CStdSentEnum::FinalConstruct */

/*****************************************************************************
* CStdSentEnum::FinalRelease *
*----------------------------*
*   Description:
*       Destructor
********************************************************************* EDC ***/
void CStdSentEnum::FinalRelease()
{
    SPDBG_FUNC( "CStdSentEnum::FinalRelease" );

    if ( m_pMorphLexicon )
    {
        delete m_pMorphLexicon;
    }

} /* CStdSentEnum::FinalRelease */

/*****************************************************************************
* CStdSentEnum::SetFragList *
*---------------------------*
*   The text fragment list passed in is guaranteed to be valid for the lifetime
*   of this object. Each time this method is called, the sentence enumerator
*   should reset its state.
********************************************************************* EDC ***/
STDMETHODIMP CStdSentEnum::
    SetFragList( const SPVTEXTFRAG* pTextFragList, DWORD dwSpeakFlags )
{
    SPAUTO_OBJ_LOCK;
    SPDBG_FUNC( "CStdSentEnum::SetFragList" );
    HRESULT hr = S_OK;

    //--- Check args
    if( SP_IS_BAD_READ_PTR( pTextFragList ) ||
        ( dwSpeakFlags & SPF_UNUSED_FLAGS ) )
    {
        hr = E_INVALIDARG;
    }
    else
    {
        m_dwSpeakFlags   = dwSpeakFlags;
        m_pTextFragList  = pTextFragList;

        //--- grab normalization preferences from the registry
        if ( SUCCEEDED( hr ) )
        {
            CComPtr<ISpObjectToken> cpToken;
            CSpDynamicString dstrTokenKeyName;
            hr = StringFromCLSID( CLSID_MSE_TTSEngine, &dstrTokenKeyName );
            if ( SUCCEEDED( hr ) )
            {
                hr = SpCreateNewToken( L"HKEY_CURRENT_USER\\Software\\Microsoft\\Speech\\Voices", dstrTokenKeyName,
                                       &cpToken );
            }
            if ( SUCCEEDED( hr ) )
            {
                DWORD dwTemp;
                if ( SUCCEEDED( cpToken->GetDWORD( L"SeparatorAndDecimal", &dwTemp ) ) )
                {
                    m_eSeparatorAndDecimal = (SEPARATOR_AND_DECIMAL) dwTemp;
                }
                if ( SUCCEEDED( cpToken->GetDWORD( L"ShortDateOrder", &dwTemp ) ) )
                {
                    m_eShortDateOrder = (SHORT_DATE_ORDER) dwTemp;
                }
            }
        }

        //--- Reset state
        Reset();
    }

    return hr;
} /* CStdSentEnum::SetFragList */

/*****************************************************************************
* CStdSentEnum::Next *
*--------------------*
*
********************************************************************* EDC ***/
STDMETHODIMP CStdSentEnum::Next( IEnumSENTITEM **ppSentItemEnum )
{
    SPAUTO_OBJ_LOCK;
    SPDBG_FUNC( "CStdSentEnum::Next" );
    HRESULT hr = S_OK;

    //--- Check args
    if( SPIsBadWritePtr( ppSentItemEnum, sizeof( IEnumSENTITEM* ) ) )
    {
        hr = E_INVALIDARG;
    }
    else
    {
        //--- If this is NULL then the enum needs to be reset
        if( m_pCurrFrag )
        {
            SentencePointer NewSentencePointer;
            NewSentencePointer.pSentenceFrag = m_pCurrFrag;
            NewSentencePointer.pSentenceStart = m_pNextChar;

            hr = GetNextSentence( ppSentItemEnum );
            if( hr == S_OK )
            {
                //--- Update Sentence Pointer List
                hr = m_SentenceStack.Push( NewSentencePointer );
            }
        }
        else
        {
            hr = S_FALSE;
        }
    }

    return hr;
} /* CStdSentEnum::Next */

/*****************************************************************************
* CStdSentEnum::Previous *
*--------------------*
*
********************************************************************* AH ****/
STDMETHODIMP CStdSentEnum::Previous( IEnumSENTITEM **ppSentItemEnum )
{
    SPAUTO_OBJ_LOCK;
    SPDBG_FUNC( "CStdSentEnum::Previous" );
    HRESULT hr = S_OK;

    //--- Check args
    if( SPIsBadWritePtr( ppSentItemEnum, sizeof( IEnumSENTITEM* ) ) )
    {
        hr = E_INVALIDARG;
    }
    else
    {
        //--- Don't care if m_pCurrFrag is NULL, as long as we have enough on the SentenceStack
        //---   to skip backwards...
        if( m_SentenceStack.GetCount() >= 2 )
        {
            //--- Get the previous Sentence from the Sentence List, and then remove the Current Sentence
            SentencePointer &PreviousSentence = m_SentenceStack.Pop();
            PreviousSentence = m_SentenceStack.Pop();

            //--- Reset the current frag and the current text pointer position
            m_pCurrFrag = PreviousSentence.pSentenceFrag;
            m_pNextChar = PreviousSentence.pSentenceStart;
            m_pEndChar  = m_pCurrFrag->pTextStart + m_pCurrFrag->ulTextLen;

            hr = GetNextSentence( ppSentItemEnum );
            if( hr == S_OK )
            {
                //--- Update Sentence Pointer List
                hr = m_SentenceStack.Push( PreviousSentence );
            }
        }
        else
        {
            hr = S_FALSE;
        }
    }

    return hr;
} /* CStdSentEnum::Previous */

/*****************************************************************************
* SkipWhiteSpaceAndTags *
*-----------------------*
*   Skips m_pNextChar ahead to the next non-whitespace character (skipping
*   ahead in the frag list, if necessary) or sets it to NULL if it hits the
*   end of the frag list text...
********************************************************************* AH ****/
HRESULT CStdSentEnum::SkipWhiteSpaceAndTags( const WCHAR*& pStartChar, const WCHAR*& pEndChar,
                                             const SPVTEXTFRAG*& pCurrFrag, CSentItemMemory& MemoryManager,
                                             BOOL fAddToItemList, CItemList* pItemList )
{
    SPDBG_ASSERT( pStartChar <= pEndChar );
    HRESULT hr = S_OK;

    while ( pStartChar &&
            ( IsSpace( *pStartChar ) ||
              pStartChar == pEndChar ) )
    {
        //--- Skip whitespace
        while ( pStartChar < pEndChar &&
                IsSpace( *pStartChar ) )
        {
            ++pStartChar;
        }
        //--- Skip to next spoken frag, if necessary
        if ( pStartChar == pEndChar )
        {
            pCurrFrag = pCurrFrag->pNext;
            while ( pCurrFrag &&
                    pCurrFrag->State.eAction != SPVA_Speak &&
                    pCurrFrag->State.eAction != SPVA_SpellOut )
            {
                pStartChar = (WCHAR*) pCurrFrag->pTextStart;
                pEndChar   = (WCHAR*) pStartChar + pCurrFrag->ulTextLen;
                //--- Add non-spoken fragments, if fAddToItemList is true.
                if ( fAddToItemList )
                {
                    //-- Check for names lexicon XML tag...
                    if( !m_fNameItem                &&
                        m_pCurrFrag->ulTextLen == 6 &&
                        !_wcsnicmp( L"<NAME>", m_pCurrFrag->pTextStart, m_pCurrFrag->ulTextLen ) )
                    {
                        m_fNameItem = true;
                    }
                    else if( m_fNameItem                 &&
                             m_pCurrFrag->ulTextLen == 7 &&
                             !_wcsnicmp( L"</NAME>", m_pCurrFrag->pTextStart, m_pCurrFrag->ulTextLen ) )
                    {
                        m_fNameItem = false;
                    }

                    CSentItem Item;
                    Item.pItemSrcText    = pCurrFrag->pTextStart;
                    Item.ulItemSrcLen    = pCurrFrag->ulTextLen;
                    Item.ulItemSrcOffset = pCurrFrag->ulTextSrcOffset;
                    Item.ulNumWords      = 1;
                    Item.Words           = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
                    if ( SUCCEEDED( hr ) )
                    {
                        ZeroMemory( Item.Words, sizeof(TTSWord) );
                        Item.Words[0].pXmlState         = &pCurrFrag->State;
                        Item.Words[0].eWordPartOfSpeech = MS_Unknown;
                        Item.eItemPartOfSpeech          = MS_Unknown;
                        Item.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo), &hr );
                        if ( SUCCEEDED( hr ) )
                        {
                            Item.pItemInfo->Type = eWORDLIST_IS_VALID;
                            pItemList->AddTail( Item );
                        }
                    }
                }
                pCurrFrag = pCurrFrag->pNext;
            }
            if ( !pCurrFrag )
            {
                pStartChar = NULL;
                pEndChar   = NULL;
            }
            else
            {
                pStartChar  = (WCHAR*) pCurrFrag->pTextStart;
                pEndChar    = (WCHAR*) pStartChar + pCurrFrag->ulTextLen;
            }
        }
    }
    return hr;
} /* SkipWhiteSpaceAndTags */

/*****************************************************************************
* FindTokenEnd *
*--------------*
*   Returns the position of the first whitespace character after pStartChar,
*   or pEndChar, or the character after SP_MAX_WORD_LENGTH, whichever comes first.
********************************************************************* AH ****/
const WCHAR* CStdSentEnum::FindTokenEnd( const WCHAR* pStartChar, const WCHAR* pEndChar )
{
    SPDBG_ASSERT( pStartChar < pEndChar );
    ULONG ulNumChars = 1;
    const WCHAR *pPos = pStartChar;

    while ( pPos              &&
            pPos < pEndChar   &&
            !IsSpace( *pPos ) &&
            ulNumChars < SP_MAX_WORD_LENGTH )
    {
        pPos++;
        ulNumChars++;
    }

    return pPos;
} /* FindTokenEnd */

/*****************************************************************************
* CStdSentEnum::AddNextSentItem *
*-------------------------------*
*   Locates the next sentence item in the stream and adds it to the list.
*   Returns true if the last item added is the end of the sentence.
********************************************************************* AH ****/
HRESULT CStdSentEnum::AddNextSentItem( CItemList& ItemList, CSentItemMemory& MemoryManager, BOOL* pfIsEOS )
{
    SPDBG_ASSERT( m_pNextChar && pfIsEOS );
    HRESULT hr = S_OK;
    BOOL fHitPauseItem = false;
    CSentItem Item;
    ULONG ulTrailItems = 0;
    TTSItemType ItemType = eUNMATCHED;
    *pfIsEOS = false;

    //--- Skip initial whitespace characters and XML markup (by skipping ahead in the frag list).
    hr = SkipWhiteSpaceAndTags( m_pNextChar, m_pEndChar, m_pCurrFrag, MemoryManager, true, &ItemList );

    //--- This will happen when we hit the end of the frag list
    if ( !m_pNextChar )
    {
        return S_OK;
    }

    //--- Find end of the next token (next whitespace character, hyphen, or m_pEndChar).
    m_pEndOfCurrToken = FindTokenEnd( m_pNextChar, m_pEndChar );

    //--- Get Primary Insert Position
    SPLISTPOS ItemPos = ItemList.AddTail( Item );

    //--- Try looking up this token in the User Lexicon...
    WCHAR Temp = *( (WCHAR*) m_pEndOfCurrToken );
    *( (WCHAR*) m_pEndOfCurrToken ) = 0;
    SPWORDPRONUNCIATIONLIST SPList;
    ZeroMemory( &SPList, sizeof( SPWORDPRONUNCIATIONLIST ) );

    hr = m_cpAggregateLexicon->GetPronunciations( m_pNextChar, 1033, eLEXTYPE_USER, &SPList );
    if( SPList.pvBuffer )
    {
        ::CoTaskMemFree( SPList.pvBuffer );
    }

    *( (WCHAR*) m_pEndOfCurrToken ) = Temp;

    if ( SUCCEEDED( hr ) )
    {
        Item.eItemPartOfSpeech = MS_Unknown;
        Item.pItemSrcText      = m_pNextChar;
        Item.ulItemSrcLen      = (ULONG) ( m_pEndOfCurrToken - m_pNextChar );
        Item.ulItemSrcOffset   = m_pCurrFrag->ulTextSrcOffset +
                                 (ULONG)( m_pNextChar - m_pCurrFrag->pTextStart );
        Item.ulNumWords        = 1;
        Item.Words              = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
        if ( SUCCEEDED( hr ) )
        {
            ZeroMemory( Item.Words, sizeof(TTSWord) );
            Item.Words[0].pXmlState         = &m_pCurrFrag->State;
            Item.Words[0].pWordText         = m_pNextChar;
            Item.Words[0].ulWordLen         = Item.ulItemSrcLen;
            Item.Words[0].pLemma            = Item.Words[0].pWordText;
            Item.Words[0].ulLemmaLen        = Item.Words[0].ulWordLen;
            Item.Words[0].eWordPartOfSpeech = MS_Unknown;
            Item.eItemPartOfSpeech          = MS_Unknown;
            Item.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo*), &hr );
            if ( SUCCEEDED( hr ) )
            {
                Item.pItemInfo->Type = eALPHA_WORD;
                ItemList.SetAt( ItemPos, Item );
            }
        }
        m_pNextChar = m_pEndOfCurrToken;
    }
    //--- Not in the user lex - itemize, normalize, etc.
    else if ( hr == SPERR_NOT_IN_LEX )
    {
        hr = S_OK;

        //--- convert text from Unicode to Ascii
        hr = DoUnicodeToAsciiMap( m_pNextChar, (ULONG)( m_pEndOfCurrToken - m_pNextChar ), (WCHAR*)m_pNextChar );

        if ( SUCCEEDED( hr ) )
        {
            //--- Find end of the next token (next whitespace character, hyphen, or m_pEndChar)
            //---   AGAIN, since the mapping may have introduced new whitespace characters...
            m_pEndOfCurrToken = FindTokenEnd( m_pNextChar, m_pEndChar );

            //--- Insert lead items (group beginnings, quotation marks)
            while ( m_pNextChar < m_pEndOfCurrToken &&
                    ( ( ItemType = IsGroupBeginning( *m_pNextChar ) )    != eUNMATCHED ||
                      ( ItemType = IsQuotationMark( *m_pNextChar ) )     != eUNMATCHED ) )
            {
                CSentItem LeadItem;
                LeadItem.pItemSrcText       = m_pNextChar;
                LeadItem.ulItemSrcLen       = 1;
                LeadItem.ulItemSrcOffset    = m_pCurrFrag->ulTextSrcOffset +
                                              (ULONG)(( m_pNextChar - m_pCurrFrag->pTextStart ));
                LeadItem.ulNumWords         = 1;
                LeadItem.Words = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
                if ( SUCCEEDED( hr ) )
                {
                    ZeroMemory( LeadItem.Words, sizeof(TTSWord) );
                    LeadItem.Words[0].pXmlState         = &m_pCurrFrag->State;
                    LeadItem.Words[0].eWordPartOfSpeech = ConvertItemTypeToPartOfSp( ItemType );
                    LeadItem.eItemPartOfSpeech          = ConvertItemTypeToPartOfSp( ItemType );
                    LeadItem.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo), &hr );
                    if ( SUCCEEDED( hr ) )
                    {
                        LeadItem.pItemInfo->Type = ItemType;
                        if ( m_dwSpeakFlags & SPF_NLP_SPEAK_PUNC ||
                             m_pCurrFrag->State.eAction == SPVA_SpellOut )
                        {
                            CWordList TempWordList;
                            ExpandPunctuation( TempWordList, *m_pNextChar );
                            hr = SetWordList( LeadItem, TempWordList, MemoryManager );
                            LeadItem.pItemInfo->Type = eUNMATCHED;
                        }
                        ItemList.InsertBefore( ItemPos, LeadItem );
                        m_pNextChar++;
                    }
                }
                ItemType = eUNMATCHED;
            }

            //--- Insert trail items (group endings, quotation marks, misc. punctuation, EOS Items)
            m_pEndOfCurrItem = m_pEndOfCurrToken;
            BOOL fAddTrailItem = true;
            BOOL fAbbreviation = false;
            while ( (m_pEndOfCurrItem - 1) >= m_pNextChar &&
                    fAddTrailItem )
            {
                fAddTrailItem = false;
                fAbbreviation = false;

                //--- Check group endings, quotation marks, misc. punctuation.
                if ( ( ItemType = IsGroupEnding( *(m_pEndOfCurrItem - 1) ) )       != eUNMATCHED ||
                     ( ItemType = IsQuotationMark( *(m_pEndOfCurrItem - 1) ) )     != eUNMATCHED ||
                     ( ItemType = IsMiscPunctuation( *(m_pEndOfCurrItem - 1) ) )   != eUNMATCHED )
                {
                    fAddTrailItem = true;
                    if ( ItemType == eCOMMA ||
                         ItemType == eCOLON ||
                         ItemType == eSEMICOLON )
                    {
                        fHitPauseItem = true;
                    }
                }
                //--- Check EOS Items, except periods preceded by alpha characters
                else if ( ( ItemType = IsEOSItem( *(m_pEndOfCurrItem - 1) ) ) != eUNMATCHED &&
                          ! ( ItemType == ePERIOD                     &&
                              ( m_pEndOfCurrItem - 2 >= m_pNextChar ) &&
                              ( iswalpha( *(m_pEndOfCurrItem - 2) ) ) ) )
                {
                    //--- Check for ellipses
                    if ( ItemType == ePERIOD )
                    {
                        if ( m_pEndOfCurrItem == m_pEndOfCurrToken                              &&
                             ( m_pEndOfCurrItem - 2 >= m_pNextChar )                            &&
                             ( ( ItemType = IsEOSItem( *(m_pEndOfCurrItem - 2) ) ) == ePERIOD ) &&
                             ( m_pEndOfCurrItem - 3 == m_pNextChar )                            &&
                             ( ( ItemType = IsEOSItem( *(m_pEndOfCurrItem - 3) ) ) == ePERIOD ) )
                        {
                            fAddTrailItem = true;
                            ItemType      = eELLIPSIS;
                        }
                        else
                        {
                            ItemType      = ePERIOD;
                            fAddTrailItem = true;
                            *pfIsEOS      = true;
                        }
                    }
                    else
                    {
                        fAddTrailItem   = true;
                        *pfIsEOS        = true;
                    }
                }
                //--- Period preceded by alpha character - determine whether it is EOS.
                else if ( ItemType == ePERIOD )
                {
                    //--- Is it an Initialism ( e.g. "e.g." )?  If so, only EOS if the next
                    //---   word is in the common first words list...
                    hr = IsInitialism( ItemList, ItemPos, MemoryManager, pfIsEOS );
                    if ( SUCCEEDED( hr ) )
                    {
                        if ( *pfIsEOS )
                        {
                            //--- Did we see a pause item earlier?  In that case, we should NOT listen to this
                            //--- IsEOS decision from IsInitialism...
                            if ( fHitPauseItem )
                            {
                                *pfIsEOS = false;
                            }
                            else
                            {
                                fAddTrailItem = true;
                                fAbbreviation = true;
                            }
                        }
                    }
                    else if ( hr == E_INVALIDARG )
                    {
                        const WCHAR temp = (WCHAR) *( m_pEndOfCurrItem - 1 );
                        *( (WCHAR*) ( m_pEndOfCurrItem - 1 ) ) = 0;

                        const AbbrevRecord* pAbbrevRecord =
                            (AbbrevRecord*) bsearch( (void*) m_pNextChar, (void*) g_AbbreviationTable,
                                                     sp_countof( g_AbbreviationTable ), sizeof( AbbrevRecord ),
                                                     CompareStringAndAbbrevRecord );

                        *( (WCHAR*) ( m_pEndOfCurrItem - 1 ) ) = temp;

                        if ( pAbbrevRecord )
                        {
                            //--- Matched an abbreviation
                            if ( pAbbrevRecord->iSentBreakDisambig < 0 )
                            {
                                //--- Abbreviation will never end a sentence - just insert into ItemList
                                *pfIsEOS        = false;
                                hr              = S_OK;

                                Item.pItemSrcText       = m_pNextChar;
                                Item.ulItemSrcLen       = (ULONG)(m_pEndOfCurrItem - m_pNextChar);
                                Item.ulItemSrcOffset    = m_pCurrFrag->ulTextSrcOffset +
                                                          (ULONG)( m_pNextChar - m_pCurrFrag->pTextStart );
                                Item.ulNumWords         = 1;
                                Item.Words = (TTSWord*) MemoryManager.GetMemory( sizeof( TTSWord ), &hr );
                                if ( SUCCEEDED( hr ) )
                                {
                                    ZeroMemory( Item.Words, sizeof( TTSWord ) );
                                    Item.Words[0].pXmlState  = &m_pCurrFrag->State;
                                    Item.Words[0].pWordText  = Item.pItemSrcText;
                                    Item.Words[0].ulWordLen  = Item.ulItemSrcLen;
                                    Item.Words[0].pLemma     = Item.pItemSrcText;
                                    Item.Words[0].ulLemmaLen = Item.ulItemSrcLen;
                                    Item.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSAbbreviationInfo), &hr );
                                    if ( SUCCEEDED( hr ) )
                                    {
                                        if ( NeedsToBeNormalized( pAbbrevRecord ) )
                                        {
                                            Item.pItemInfo->Type = eABBREVIATION_NORMALIZE;
                                        }
                                        else
                                        {
                                            Item.pItemInfo->Type = eABBREVIATION;
                                        }
                                        ( (TTSAbbreviationInfo*) Item.pItemInfo )->pAbbreviation = pAbbrevRecord;
                                        ItemList.SetAt( ItemPos, Item );
                                    }
                                }
                            }
                            else
                            {
                                //--- Need to do some disambiguation to determine whether,
                                //---   a) this is indeed an abbreviation (e.g. "Ed.")
                                //---   b) the period doubles as EOS
                                hr = ( this->*g_SentBreakDisambigTable[pAbbrevRecord->iSentBreakDisambig] )
                                                ( pAbbrevRecord, ItemList, ItemPos, MemoryManager, pfIsEOS );
                                if ( SUCCEEDED( hr ) )
                                {
                                    if ( *pfIsEOS )
                                    {
                                        if ( fHitPauseItem )
                                        {
                                            *pfIsEOS = false;
                                        }
                                        else
                                        {
                                            fAddTrailItem = true;
                                            fAbbreviation = true;
                                        }
                                    }
                                }
                            }
                        }

                        if ( hr == E_INVALIDARG )
                        {
                            //--- Just check for periods internal to the item - this catches stuff like
                            //---   10:30p.m.
                            for ( const WCHAR* pIterator = m_pNextChar; pIterator < m_pEndOfCurrItem - 1; pIterator++ )
                            {
                                if ( *pIterator == L'.' )
                                {
                                    *pfIsEOS = false;
                                    break;
                                }
                            }
                            //--- If all previous checks have failed, it is EOS.
                            if ( pIterator == ( m_pEndOfCurrItem - 1 ) &&
                                 !fHitPauseItem )
                            {
                                hr              = S_OK;
                                fAddTrailItem   = true;
                                *pfIsEOS        = true;
                            }
                            else if ( hr == E_INVALIDARG )
                            {
                                hr = S_OK;
                            }
                        }
                    }
                }

                //--- Add trail item.
                if ( fAddTrailItem )
                {
                    ulTrailItems++;
                    CSentItem TrailItem;
                    if ( ItemType == eELLIPSIS )
                    {
                        TrailItem.pItemSrcText      = m_pEndOfCurrItem - 3;
                        TrailItem.ulItemSrcLen      = 3;
                        TrailItem.ulItemSrcOffset   = m_pCurrFrag->ulTextSrcOffset +
                                                      (ULONG)( m_pEndOfCurrItem - m_pCurrFrag->pTextStart - 3 );
                    }
                    else
                    {
                        TrailItem.pItemSrcText       = m_pEndOfCurrItem - 1;
                        TrailItem.ulItemSrcLen       = 1;
                        TrailItem.ulItemSrcOffset    = m_pCurrFrag->ulTextSrcOffset +
                                                       (ULONG)( m_pEndOfCurrItem - m_pCurrFrag->pTextStart - 1 );
                    }
                    TrailItem.ulNumWords         = 1;
                    TrailItem.Words = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
                    if ( SUCCEEDED( hr ) )
                    {
                        ZeroMemory( TrailItem.Words, sizeof(TTSWord) );
                        TrailItem.Words[0].pXmlState         = &m_pCurrFrag->State;
                        TrailItem.Words[0].eWordPartOfSpeech = ConvertItemTypeToPartOfSp( ItemType );
                        TrailItem.eItemPartOfSpeech          = ConvertItemTypeToPartOfSp( ItemType );
                        TrailItem.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo), &hr );
                        if ( SUCCEEDED( hr ) )
                        {
                            TrailItem.pItemInfo->Type = ItemType;
                            if ( m_dwSpeakFlags & SPF_NLP_SPEAK_PUNC ||
                                 ( m_pCurrFrag->State.eAction == SPVA_SpellOut &&
                                   !fAbbreviation ) )
                            {
                                CWordList TempWordList;
                                ExpandPunctuation( TempWordList, *(m_pEndOfCurrItem - 1) );
                                hr = SetWordList( TrailItem, TempWordList, MemoryManager );
                                TrailItem.pItemInfo->Type = eUNMATCHED;
                            }
                            ItemList.InsertAfter( ItemPos, TrailItem );
                            if ( !fAbbreviation )
                            {
                                if ( ItemType == eELLIPSIS )
                                {
                                    m_pEndOfCurrItem -= 3;
                                    ulTrailItems = 3;
                                }
                                else
                                {
                                    m_pEndOfCurrItem--;
                                }
                            }
                        }
                    }
                    ItemType = eUNMATCHED;
                    if ( fAbbreviation )
                    {
                        break;
                    }
                }
            }

            //--- Do Main Item Insertion
            if ( SUCCEEDED( hr ) &&
                 m_pNextChar == m_pEndOfCurrItem )
            {
                ItemList.RemoveAt( ItemPos );
            }
            else if ( SUCCEEDED( hr ) )
            {
                hr = Normalize( ItemList, ItemPos, MemoryManager );
            }

            if( m_fNameItem )
            {
                wcscpy( ItemList.GetAt( ItemPos ).CustomLtsToken, L"Names" );
            }

            //--- Advance m_pNextChar to m_pEndOfCurrItem + once for each trail item matched.
            if ( SUCCEEDED( hr ) )
            {
                if ( !fAbbreviation &&
                     m_pEndOfCurrItem + ulTrailItems != m_pEndOfCurrToken )
                {
                    //--- Multi-token item matched in Normalize()... Remove all previously matched trail items,
                    //--- as they were matched as part of the larger item...
                    m_pNextChar = m_pEndOfCurrItem;
                    Item = ItemList.GetNext( ItemPos );
                    while ( ItemPos )
                    {
                        SPLISTPOS RemovePos = ItemPos;
                        Item = ItemList.GetNext( ItemPos );
                        ItemList.RemoveAt( RemovePos );
                    }
                }
                else
                {
                    m_pNextChar = m_pEndOfCurrToken;
                }
            }
        }
    }

    return hr;
} /* CStdSentEnum::AddNextSentItem */

/*****************************************************************************
* CStdSentEnum::GetNextSentence *
*-------------------------------*
*   This method is used to create a sentence item enumerator and populate it
*   with items. If the SPF_NLP_PASSTHROUGH flag is set, each item is the block
*   of text between XML states. If the SPF_NLP_PASSTHROUGH flag is not set, each
*   item is an individual word that is looked up in the current lexicon(s).
********************************************************************* EDC ***/
HRESULT CStdSentEnum::GetNextSentence( IEnumSENTITEM** ppItemEnum )
{
    HRESULT hr = S_OK;
    ULONG ulNumItems = 0;
    const SPVTEXTFRAG* pPrevFrag = m_pCurrFrag;

    //--- Is there any work to do
    if( m_pCurrFrag == NULL ) return S_FALSE;

    //--- Create sentence enum
    CComObject<CSentItemEnum> *pItemEnum;
    hr = CComObject<CSentItemEnum>::CreateInstance( &pItemEnum );

    if( SUCCEEDED( hr ) )
    {
        pItemEnum->AddRef();
        pItemEnum->_SetOwner( GetControllingUnknown() );
        *ppItemEnum = pItemEnum;
    }

    if( SUCCEEDED( hr ) )
    {
        BOOL fSentDone = false;
        BOOL fGoToNextFrag = false;
        CItemList& ItemList = pItemEnum->_GetList();
        CSentItemMemory& MemoryManager = pItemEnum->_GetMemoryManager();

        while( SUCCEEDED(hr) && m_pCurrFrag && !fSentDone && ulNumItems < 50 )
        {
            ulNumItems++;
            if( m_pCurrFrag->State.eAction == SPVA_Speak ||
                m_pCurrFrag->State.eAction == SPVA_SpellOut )
            {
                hr = AddNextSentItem( ItemList, MemoryManager, &fSentDone );

                //--- Advance fragment?
                if( SUCCEEDED( hr ) &&
                    m_pNextChar     &&
                    m_pEndChar      &&
                    m_pNextChar >= m_pEndChar )
                {
                    fGoToNextFrag = true;
                }
            }
            else
            {
                //-- Check for lexicon
                if( !m_fNameItem                &&
                    m_pCurrFrag->ulTextLen == 6 &&
                    !_wcsnicmp( L"<NAME>", m_pCurrFrag->pTextStart, m_pCurrFrag->ulTextLen ) )
                {
                    m_fNameItem = true;
                }
                else if( m_fNameItem                 &&
                         m_pCurrFrag->ulTextLen == 7 &&
                         !_wcsnicmp( L"</NAME>", m_pCurrFrag->pTextStart, m_pCurrFrag->ulTextLen ) )
                {
                    m_fNameItem = false;
                }

                //--- Add non spoken fragments
                CSentItem Item;
                Item.pItemSrcText    = m_pCurrFrag->pTextStart;
                Item.ulItemSrcLen    = m_pCurrFrag->ulTextLen;
                Item.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset;
                Item.ulNumWords      = 1;
                Item.Words           = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
                if ( SUCCEEDED( hr ) )
                {
                    ZeroMemory( Item.Words, sizeof(TTSWord) );
                    Item.Words[0].pXmlState         = &m_pCurrFrag->State;
                    Item.Words[0].eWordPartOfSpeech = MS_Unknown;
                    Item.eItemPartOfSpeech          = MS_Unknown;
                    Item.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo), &hr );
                    if ( SUCCEEDED( hr ) )
                    {
                        Item.pItemInfo->Type = eWORDLIST_IS_VALID;
                        ItemList.AddTail( Item );
                    }
                }
                fGoToNextFrag = true;
            }

            if( SUCCEEDED( hr ) &&
                fGoToNextFrag )
            {
                fGoToNextFrag = false;
                pPrevFrag = m_pCurrFrag;
                m_pCurrFrag = m_pCurrFrag->pNext;
                if( m_pCurrFrag )
                {
                    m_pNextChar = m_pCurrFrag->pTextStart;
                    m_pEndChar  = m_pNextChar + m_pCurrFrag->ulTextLen;
                }
                else
                {
                    m_pNextChar = NULL;
                    m_pEndChar  = NULL;
                }
            }
        } // end while

        //--- If no period has been added, add one now - this will happen if the text
        //--- is ONLY XML markup...
        if ( SUCCEEDED(hr) && !fSentDone )
        {
            CSentItem EOSItem;
            EOSItem.pItemSrcText    = g_period.pStr;
            EOSItem.ulItemSrcLen    = g_period.Len;
            EOSItem.ulItemSrcOffset = pPrevFrag->ulTextSrcOffset + pPrevFrag->ulTextLen;
            EOSItem.ulNumWords      = 1;
            EOSItem.Words           = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
            if ( SUCCEEDED( hr ) )
            {
                ZeroMemory( EOSItem.Words, sizeof(TTSWord) );
                EOSItem.Words[0].pXmlState          = &g_DefaultXMLState;
                EOSItem.Words[0].eWordPartOfSpeech  = MS_EOSItem;
                EOSItem.eItemPartOfSpeech           = MS_EOSItem;
                EOSItem.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo), &hr );
                if ( SUCCEEDED( hr ) )
                {
                    EOSItem.pItemInfo->Type = ePERIOD;
                    ItemList.AddTail( EOSItem );
                }
            }
        }

        //--- Output debugging information, if sentence breaks are desired
        TTSDBG_LOGITEMLIST( pItemEnum->_GetList(), STREAM_SENTENCEBREAKS );

        if( SUCCEEDED( hr ) )
        {
            hr = DetermineProns( pItemEnum->_GetList(), pItemEnum->_GetMemoryManager() );
        }

        pItemEnum->Reset();

        //--- Output debugging information, if POS or Pronunciations are desired
        TTSDBG_LOGITEMLIST( pItemEnum->_GetList(), STREAM_LEXLOOKUP );

    }
    return hr;
} /* CStdSentEnum::GetNextSentence */

/*****************************************************************************
* CStdSentEnum::Reset *
*---------------------*
*
********************************************************************* EDC ***/
STDMETHODIMP CStdSentEnum::Reset( void )
{
    SPAUTO_OBJ_LOCK;
    SPDBG_FUNC( "CStdSentEnum::Reset" );
    HRESULT hr = S_OK;
    m_pCurrFrag = m_pTextFragList;
    m_pNextChar = m_pCurrFrag->pTextStart;
    m_pEndChar  = m_pNextChar + m_pCurrFrag->ulTextLen;
    m_SentenceStack.Reset();
    m_fNameItem = false;
    return hr;
} /* CStdSentEnum::Reset */

/*****************************************************************************
* CStdSentEnum::InitAggregateLexicon *
*------------------------------------*
*
********************************************************************* AH ****/
HRESULT CStdSentEnum::InitAggregateLexicon( void )
{
    return m_cpAggregateLexicon.CoCreateInstance(CLSID_SpLexicon);
}

/*****************************************************************************
* CStdSentEnum::AddLexiconToAggregate *
*-------------------------------------*
*
********************************************************************* AH ****/
HRESULT CStdSentEnum::AddLexiconToAggregate( ISpLexicon *pAddLexicon, DWORD dwFlags )
{
    return m_cpAggregateLexicon->AddLexicon( pAddLexicon, dwFlags );
}

/*****************************************************************************
* CStdSentEnum::InitMorphLexicon *
*--------------------------------*
*
********************************************************************* AH ****/
HRESULT CStdSentEnum::InitMorphLexicon( void )
{
    HRESULT hr = S_OK;

    m_pMorphLexicon = new CSMorph( m_cpAggregateLexicon, &hr );

    return hr;
}

void CStdSentEnum::fNamesLTS( bool fHaveNamesLTS )
{
    m_fHaveNamesLTS = fHaveNamesLTS;
}

//
//=== CSentItemEnum =========================================================
//

/*****************************************************************************
* CSentItemEnum::Next *
*---------------------*
*
********************************************************************* EDC ***/
STDMETHODIMP CSentItemEnum::
    Next( TTSSentItem *pItemEnum )
{
    SPDBG_FUNC( "CSentItemEnum::Next" );
    HRESULT hr = S_OK;

    //--- Check args
    if( SPIsBadWritePtr( pItemEnum, sizeof( TTSSentItem ) ) )
    {
        hr = E_INVALIDARG;
    }
    else
    {
        if ( m_ListPos )
        {
            *pItemEnum = m_ItemList.GetNext( m_ListPos );
        }
        else
        {
            hr = S_FALSE;
        }
    }
    return hr;
} /* CSentItemEnum::Next */

/*****************************************************************************
* CSentItemEnum::Reset *
*----------------------*
*
********************************************************************* EDC ***/
STDMETHODIMP CSentItemEnum::Reset( void )
{
    SPDBG_FUNC( "CSentItemEnum::Reset" );
    HRESULT hr = S_OK;
    m_ListPos = m_ItemList.GetHeadPosition();
    return hr;
} /* CSentItemEnum::Reset */