windows-nt/Source/XPSP1/NT/enduser/speech/tts/ms_entropic/stdsentenum.cpp

1082 lines
43 KiB
C++
Raw Normal View History

2020-09-26 03:20:57 -05:00
/*******************************************************************************
* StdSentEnum.cpp *
*-----------------*
* Description:
* This module is the main implementation file for the CStdSentEnum class.
*-------------------------------------------------------------------------------
* Created By: EDC Date: 03/19/99
* Copyright (C) 1999 Microsoft Corporation
* All Rights Reserved
*
*******************************************************************************/
//--- Additional includes
#include "stdafx.h"
#ifndef StdSentEnum_h
#include "stdsentenum.h"
#endif
#include "spttsengdebug.h"
#include "SpAutoObjectLock.h"
//--- Locals
CComAutoCriticalSection CStdSentEnum::m_AbbrevTableCritSec;
//=== CStdSentEnum ============================================================
//
/*****************************************************************************
* CStdSentEnum::InitPron *
*------------------------*
* Description:
* Inits pron tables
********************************************************************* AH ***/
HRESULT CStdSentEnum::InitPron( WCHAR** OriginalPron )
{
HRESULT hr = S_OK;
WCHAR *NewPron = NULL;
NewPron = new WCHAR[ wcslen( *OriginalPron ) ];
hr = m_cpPhonemeConverter->PhoneToId( *OriginalPron, NewPron );
if ( SUCCEEDED( hr ) )
{
*OriginalPron = NewPron;
}
return hr;
} /* InitPron */
/*****************************************************************************
* CStdSentEnum::FinalConstruct *
*------------------------------*
* Description:
* Constructor
********************************************************************* EDC ***/
HRESULT CStdSentEnum::FinalConstruct()
{
SPDBG_FUNC( "CStdSentEnum::FinalConstruct" );
HRESULT hr = S_OK;
m_dwSpeakFlags = 0;
m_pTextFragList = NULL;
m_pMorphLexicon = NULL;
m_fHaveNamesLTS = false;
m_eSeparatorAndDecimal = COMMA_PERIOD;
m_eShortDateOrder = MONTH_DAY_YEAR;
/*** Create phone converter ***/
if ( SUCCEEDED( hr ) )
{
hr = SpCreatePhoneConverter( 1033, NULL, NULL, &m_cpPhonemeConverter );
m_AbbrevTableCritSec.Lock();
if ( !g_fAbbrevTablesInitialized )
{
for ( ULONG i = 0; SUCCEEDED( hr ) && i < sp_countof( g_AbbreviationTable ); i++ )
{
if ( g_AbbreviationTable[i].pPron1 )
{
hr = InitPron( &g_AbbreviationTable[i].pPron1 );
}
if ( SUCCEEDED( hr ) &&
g_AbbreviationTable[i].pPron2 )
{
hr = InitPron( &g_AbbreviationTable[i].pPron2 );
}
if ( SUCCEEDED( hr ) &&
g_AbbreviationTable[i].pPron3 )
{
hr = InitPron( &g_AbbreviationTable[i].pPron3 );
}
}
for ( i = 0; SUCCEEDED( hr ) && i < sp_countof( g_AmbiguousWordTable ); i++ )
{
if ( g_AmbiguousWordTable[i].pPron1 )
{
hr = InitPron( &g_AmbiguousWordTable[i].pPron1 );
}
if ( SUCCEEDED( hr ) &&
g_AmbiguousWordTable[i].pPron2 )
{
hr = InitPron( &g_AmbiguousWordTable[i].pPron2 );
}
if ( SUCCEEDED( hr ) &&
g_AmbiguousWordTable[i].pPron3 )
{
hr = InitPron( &g_AmbiguousWordTable[i].pPron3 );
}
}
for ( i = 0; SUCCEEDED( hr ) && i < sp_countof( g_PostLexLookupWordTable ); i++ )
{
if ( g_PostLexLookupWordTable[i].pPron1 )
{
hr = InitPron( &g_PostLexLookupWordTable[i].pPron1 );
}
if ( SUCCEEDED( hr ) &&
g_PostLexLookupWordTable[i].pPron2 )
{
hr = InitPron( &g_PostLexLookupWordTable[i].pPron2 );
}
if ( SUCCEEDED( hr ) &&
g_PostLexLookupWordTable[i].pPron3 )
{
hr = InitPron( &g_PostLexLookupWordTable[i].pPron3 );
}
}
if ( SUCCEEDED( hr ) )
{
hr = InitPron( &g_pOfA );
if ( SUCCEEDED( hr ) )
{
hr = InitPron( &g_pOfAn );
}
}
}
if ( SUCCEEDED( hr ) )
{
g_fAbbrevTablesInitialized = true;
}
m_AbbrevTableCritSec.Unlock();
}
return hr;
} /* CStdSentEnum::FinalConstruct */
/*****************************************************************************
* CStdSentEnum::FinalRelease *
*----------------------------*
* Description:
* Destructor
********************************************************************* EDC ***/
void CStdSentEnum::FinalRelease()
{
SPDBG_FUNC( "CStdSentEnum::FinalRelease" );
if ( m_pMorphLexicon )
{
delete m_pMorphLexicon;
}
} /* CStdSentEnum::FinalRelease */
/*****************************************************************************
* CStdSentEnum::SetFragList *
*---------------------------*
* The text fragment list passed in is guaranteed to be valid for the lifetime
* of this object. Each time this method is called, the sentence enumerator
* should reset its state.
********************************************************************* EDC ***/
STDMETHODIMP CStdSentEnum::
SetFragList( const SPVTEXTFRAG* pTextFragList, DWORD dwSpeakFlags )
{
SPAUTO_OBJ_LOCK;
SPDBG_FUNC( "CStdSentEnum::SetFragList" );
HRESULT hr = S_OK;
//--- Check args
if( SP_IS_BAD_READ_PTR( pTextFragList ) ||
( dwSpeakFlags & SPF_UNUSED_FLAGS ) )
{
hr = E_INVALIDARG;
}
else
{
m_dwSpeakFlags = dwSpeakFlags;
m_pTextFragList = pTextFragList;
//--- grab normalization preferences from the registry
if ( SUCCEEDED( hr ) )
{
CComPtr<ISpObjectToken> cpToken;
CSpDynamicString dstrTokenKeyName;
hr = StringFromCLSID( CLSID_MSE_TTSEngine, &dstrTokenKeyName );
if ( SUCCEEDED( hr ) )
{
hr = SpCreateNewToken( L"HKEY_CURRENT_USER\\Software\\Microsoft\\Speech\\Voices", dstrTokenKeyName,
&cpToken );
}
if ( SUCCEEDED( hr ) )
{
DWORD dwTemp;
if ( SUCCEEDED( cpToken->GetDWORD( L"SeparatorAndDecimal", &dwTemp ) ) )
{
m_eSeparatorAndDecimal = (SEPARATOR_AND_DECIMAL) dwTemp;
}
if ( SUCCEEDED( cpToken->GetDWORD( L"ShortDateOrder", &dwTemp ) ) )
{
m_eShortDateOrder = (SHORT_DATE_ORDER) dwTemp;
}
}
}
//--- Reset state
Reset();
}
return hr;
} /* CStdSentEnum::SetFragList */
/*****************************************************************************
* CStdSentEnum::Next *
*--------------------*
*
********************************************************************* EDC ***/
STDMETHODIMP CStdSentEnum::Next( IEnumSENTITEM **ppSentItemEnum )
{
SPAUTO_OBJ_LOCK;
SPDBG_FUNC( "CStdSentEnum::Next" );
HRESULT hr = S_OK;
//--- Check args
if( SPIsBadWritePtr( ppSentItemEnum, sizeof( IEnumSENTITEM* ) ) )
{
hr = E_INVALIDARG;
}
else
{
//--- If this is NULL then the enum needs to be reset
if( m_pCurrFrag )
{
SentencePointer NewSentencePointer;
NewSentencePointer.pSentenceFrag = m_pCurrFrag;
NewSentencePointer.pSentenceStart = m_pNextChar;
hr = GetNextSentence( ppSentItemEnum );
if( hr == S_OK )
{
//--- Update Sentence Pointer List
hr = m_SentenceStack.Push( NewSentencePointer );
}
}
else
{
hr = S_FALSE;
}
}
return hr;
} /* CStdSentEnum::Next */
/*****************************************************************************
* CStdSentEnum::Previous *
*--------------------*
*
********************************************************************* AH ****/
STDMETHODIMP CStdSentEnum::Previous( IEnumSENTITEM **ppSentItemEnum )
{
SPAUTO_OBJ_LOCK;
SPDBG_FUNC( "CStdSentEnum::Previous" );
HRESULT hr = S_OK;
//--- Check args
if( SPIsBadWritePtr( ppSentItemEnum, sizeof( IEnumSENTITEM* ) ) )
{
hr = E_INVALIDARG;
}
else
{
//--- Don't care if m_pCurrFrag is NULL, as long as we have enough on the SentenceStack
//--- to skip backwards...
if( m_SentenceStack.GetCount() >= 2 )
{
//--- Get the previous Sentence from the Sentence List, and then remove the Current Sentence
SentencePointer &PreviousSentence = m_SentenceStack.Pop();
PreviousSentence = m_SentenceStack.Pop();
//--- Reset the current frag and the current text pointer position
m_pCurrFrag = PreviousSentence.pSentenceFrag;
m_pNextChar = PreviousSentence.pSentenceStart;
m_pEndChar = m_pCurrFrag->pTextStart + m_pCurrFrag->ulTextLen;
hr = GetNextSentence( ppSentItemEnum );
if( hr == S_OK )
{
//--- Update Sentence Pointer List
hr = m_SentenceStack.Push( PreviousSentence );
}
}
else
{
hr = S_FALSE;
}
}
return hr;
} /* CStdSentEnum::Previous */
/*****************************************************************************
* SkipWhiteSpaceAndTags *
*-----------------------*
* Skips m_pNextChar ahead to the next non-whitespace character (skipping
* ahead in the frag list, if necessary) or sets it to NULL if it hits the
* end of the frag list text...
********************************************************************* AH ****/
HRESULT CStdSentEnum::SkipWhiteSpaceAndTags( const WCHAR*& pStartChar, const WCHAR*& pEndChar,
const SPVTEXTFRAG*& pCurrFrag, CSentItemMemory& MemoryManager,
BOOL fAddToItemList, CItemList* pItemList )
{
SPDBG_ASSERT( pStartChar <= pEndChar );
HRESULT hr = S_OK;
while ( pStartChar &&
( IsSpace( *pStartChar ) ||
pStartChar == pEndChar ) )
{
//--- Skip whitespace
while ( pStartChar < pEndChar &&
IsSpace( *pStartChar ) )
{
++pStartChar;
}
//--- Skip to next spoken frag, if necessary
if ( pStartChar == pEndChar )
{
pCurrFrag = pCurrFrag->pNext;
while ( pCurrFrag &&
pCurrFrag->State.eAction != SPVA_Speak &&
pCurrFrag->State.eAction != SPVA_SpellOut )
{
pStartChar = (WCHAR*) pCurrFrag->pTextStart;
pEndChar = (WCHAR*) pStartChar + pCurrFrag->ulTextLen;
//--- Add non-spoken fragments, if fAddToItemList is true.
if ( fAddToItemList )
{
//-- Check for names lexicon XML tag...
if( !m_fNameItem &&
m_pCurrFrag->ulTextLen == 6 &&
!_wcsnicmp( L"<NAME>", m_pCurrFrag->pTextStart, m_pCurrFrag->ulTextLen ) )
{
m_fNameItem = true;
}
else if( m_fNameItem &&
m_pCurrFrag->ulTextLen == 7 &&
!_wcsnicmp( L"</NAME>", m_pCurrFrag->pTextStart, m_pCurrFrag->ulTextLen ) )
{
m_fNameItem = false;
}
CSentItem Item;
Item.pItemSrcText = pCurrFrag->pTextStart;
Item.ulItemSrcLen = pCurrFrag->ulTextLen;
Item.ulItemSrcOffset = pCurrFrag->ulTextSrcOffset;
Item.ulNumWords = 1;
Item.Words = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
if ( SUCCEEDED( hr ) )
{
ZeroMemory( Item.Words, sizeof(TTSWord) );
Item.Words[0].pXmlState = &pCurrFrag->State;
Item.Words[0].eWordPartOfSpeech = MS_Unknown;
Item.eItemPartOfSpeech = MS_Unknown;
Item.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo), &hr );
if ( SUCCEEDED( hr ) )
{
Item.pItemInfo->Type = eWORDLIST_IS_VALID;
pItemList->AddTail( Item );
}
}
}
pCurrFrag = pCurrFrag->pNext;
}
if ( !pCurrFrag )
{
pStartChar = NULL;
pEndChar = NULL;
}
else
{
pStartChar = (WCHAR*) pCurrFrag->pTextStart;
pEndChar = (WCHAR*) pStartChar + pCurrFrag->ulTextLen;
}
}
}
return hr;
} /* SkipWhiteSpaceAndTags */
/*****************************************************************************
* FindTokenEnd *
*--------------*
* Returns the position of the first whitespace character after pStartChar,
* or pEndChar, or the character after SP_MAX_WORD_LENGTH, whichever comes first.
********************************************************************* AH ****/
const WCHAR* CStdSentEnum::FindTokenEnd( const WCHAR* pStartChar, const WCHAR* pEndChar )
{
SPDBG_ASSERT( pStartChar < pEndChar );
ULONG ulNumChars = 1;
const WCHAR *pPos = pStartChar;
while ( pPos &&
pPos < pEndChar &&
!IsSpace( *pPos ) &&
ulNumChars < SP_MAX_WORD_LENGTH )
{
pPos++;
ulNumChars++;
}
return pPos;
} /* FindTokenEnd */
/*****************************************************************************
* CStdSentEnum::AddNextSentItem *
*-------------------------------*
* Locates the next sentence item in the stream and adds it to the list.
* Returns true if the last item added is the end of the sentence.
********************************************************************* AH ****/
HRESULT CStdSentEnum::AddNextSentItem( CItemList& ItemList, CSentItemMemory& MemoryManager, BOOL* pfIsEOS )
{
SPDBG_ASSERT( m_pNextChar && pfIsEOS );
HRESULT hr = S_OK;
BOOL fHitPauseItem = false;
CSentItem Item;
ULONG ulTrailItems = 0;
TTSItemType ItemType = eUNMATCHED;
*pfIsEOS = false;
//--- Skip initial whitespace characters and XML markup (by skipping ahead in the frag list).
hr = SkipWhiteSpaceAndTags( m_pNextChar, m_pEndChar, m_pCurrFrag, MemoryManager, true, &ItemList );
//--- This will happen when we hit the end of the frag list
if ( !m_pNextChar )
{
return S_OK;
}
//--- Find end of the next token (next whitespace character, hyphen, or m_pEndChar).
m_pEndOfCurrToken = FindTokenEnd( m_pNextChar, m_pEndChar );
//--- Get Primary Insert Position
SPLISTPOS ItemPos = ItemList.AddTail( Item );
//--- Try looking up this token in the User Lexicon...
WCHAR Temp = *( (WCHAR*) m_pEndOfCurrToken );
*( (WCHAR*) m_pEndOfCurrToken ) = 0;
SPWORDPRONUNCIATIONLIST SPList;
ZeroMemory( &SPList, sizeof( SPWORDPRONUNCIATIONLIST ) );
hr = m_cpAggregateLexicon->GetPronunciations( m_pNextChar, 1033, eLEXTYPE_USER, &SPList );
if( SPList.pvBuffer )
{
::CoTaskMemFree( SPList.pvBuffer );
}
*( (WCHAR*) m_pEndOfCurrToken ) = Temp;
if ( SUCCEEDED( hr ) )
{
Item.eItemPartOfSpeech = MS_Unknown;
Item.pItemSrcText = m_pNextChar;
Item.ulItemSrcLen = (ULONG) ( m_pEndOfCurrToken - m_pNextChar );
Item.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset +
(ULONG)( m_pNextChar - m_pCurrFrag->pTextStart );
Item.ulNumWords = 1;
Item.Words = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
if ( SUCCEEDED( hr ) )
{
ZeroMemory( Item.Words, sizeof(TTSWord) );
Item.Words[0].pXmlState = &m_pCurrFrag->State;
Item.Words[0].pWordText = m_pNextChar;
Item.Words[0].ulWordLen = Item.ulItemSrcLen;
Item.Words[0].pLemma = Item.Words[0].pWordText;
Item.Words[0].ulLemmaLen = Item.Words[0].ulWordLen;
Item.Words[0].eWordPartOfSpeech = MS_Unknown;
Item.eItemPartOfSpeech = MS_Unknown;
Item.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo*), &hr );
if ( SUCCEEDED( hr ) )
{
Item.pItemInfo->Type = eALPHA_WORD;
ItemList.SetAt( ItemPos, Item );
}
}
m_pNextChar = m_pEndOfCurrToken;
}
//--- Not in the user lex - itemize, normalize, etc.
else if ( hr == SPERR_NOT_IN_LEX )
{
hr = S_OK;
//--- convert text from Unicode to Ascii
hr = DoUnicodeToAsciiMap( m_pNextChar, (ULONG)( m_pEndOfCurrToken - m_pNextChar ), (WCHAR*)m_pNextChar );
if ( SUCCEEDED( hr ) )
{
//--- Find end of the next token (next whitespace character, hyphen, or m_pEndChar)
//--- AGAIN, since the mapping may have introduced new whitespace characters...
m_pEndOfCurrToken = FindTokenEnd( m_pNextChar, m_pEndChar );
//--- Insert lead items (group beginnings, quotation marks)
while ( m_pNextChar < m_pEndOfCurrToken &&
( ( ItemType = IsGroupBeginning( *m_pNextChar ) ) != eUNMATCHED ||
( ItemType = IsQuotationMark( *m_pNextChar ) ) != eUNMATCHED ) )
{
CSentItem LeadItem;
LeadItem.pItemSrcText = m_pNextChar;
LeadItem.ulItemSrcLen = 1;
LeadItem.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset +
(ULONG)(( m_pNextChar - m_pCurrFrag->pTextStart ));
LeadItem.ulNumWords = 1;
LeadItem.Words = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
if ( SUCCEEDED( hr ) )
{
ZeroMemory( LeadItem.Words, sizeof(TTSWord) );
LeadItem.Words[0].pXmlState = &m_pCurrFrag->State;
LeadItem.Words[0].eWordPartOfSpeech = ConvertItemTypeToPartOfSp( ItemType );
LeadItem.eItemPartOfSpeech = ConvertItemTypeToPartOfSp( ItemType );
LeadItem.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo), &hr );
if ( SUCCEEDED( hr ) )
{
LeadItem.pItemInfo->Type = ItemType;
if ( m_dwSpeakFlags & SPF_NLP_SPEAK_PUNC ||
m_pCurrFrag->State.eAction == SPVA_SpellOut )
{
CWordList TempWordList;
ExpandPunctuation( TempWordList, *m_pNextChar );
hr = SetWordList( LeadItem, TempWordList, MemoryManager );
LeadItem.pItemInfo->Type = eUNMATCHED;
}
ItemList.InsertBefore( ItemPos, LeadItem );
m_pNextChar++;
}
}
ItemType = eUNMATCHED;
}
//--- Insert trail items (group endings, quotation marks, misc. punctuation, EOS Items)
m_pEndOfCurrItem = m_pEndOfCurrToken;
BOOL fAddTrailItem = true;
BOOL fAbbreviation = false;
while ( (m_pEndOfCurrItem - 1) >= m_pNextChar &&
fAddTrailItem )
{
fAddTrailItem = false;
fAbbreviation = false;
//--- Check group endings, quotation marks, misc. punctuation.
if ( ( ItemType = IsGroupEnding( *(m_pEndOfCurrItem - 1) ) ) != eUNMATCHED ||
( ItemType = IsQuotationMark( *(m_pEndOfCurrItem - 1) ) ) != eUNMATCHED ||
( ItemType = IsMiscPunctuation( *(m_pEndOfCurrItem - 1) ) ) != eUNMATCHED )
{
fAddTrailItem = true;
if ( ItemType == eCOMMA ||
ItemType == eCOLON ||
ItemType == eSEMICOLON )
{
fHitPauseItem = true;
}
}
//--- Check EOS Items, except periods preceded by alpha characters
else if ( ( ItemType = IsEOSItem( *(m_pEndOfCurrItem - 1) ) ) != eUNMATCHED &&
! ( ItemType == ePERIOD &&
( m_pEndOfCurrItem - 2 >= m_pNextChar ) &&
( iswalpha( *(m_pEndOfCurrItem - 2) ) ) ) )
{
//--- Check for ellipses
if ( ItemType == ePERIOD )
{
if ( m_pEndOfCurrItem == m_pEndOfCurrToken &&
( m_pEndOfCurrItem - 2 >= m_pNextChar ) &&
( ( ItemType = IsEOSItem( *(m_pEndOfCurrItem - 2) ) ) == ePERIOD ) &&
( m_pEndOfCurrItem - 3 == m_pNextChar ) &&
( ( ItemType = IsEOSItem( *(m_pEndOfCurrItem - 3) ) ) == ePERIOD ) )
{
fAddTrailItem = true;
ItemType = eELLIPSIS;
}
else
{
ItemType = ePERIOD;
fAddTrailItem = true;
*pfIsEOS = true;
}
}
else
{
fAddTrailItem = true;
*pfIsEOS = true;
}
}
//--- Period preceded by alpha character - determine whether it is EOS.
else if ( ItemType == ePERIOD )
{
//--- Is it an Initialism ( e.g. "e.g." )? If so, only EOS if the next
//--- word is in the common first words list...
hr = IsInitialism( ItemList, ItemPos, MemoryManager, pfIsEOS );
if ( SUCCEEDED( hr ) )
{
if ( *pfIsEOS )
{
//--- Did we see a pause item earlier? In that case, we should NOT listen to this
//--- IsEOS decision from IsInitialism...
if ( fHitPauseItem )
{
*pfIsEOS = false;
}
else
{
fAddTrailItem = true;
fAbbreviation = true;
}
}
}
else if ( hr == E_INVALIDARG )
{
const WCHAR temp = (WCHAR) *( m_pEndOfCurrItem - 1 );
*( (WCHAR*) ( m_pEndOfCurrItem - 1 ) ) = 0;
const AbbrevRecord* pAbbrevRecord =
(AbbrevRecord*) bsearch( (void*) m_pNextChar, (void*) g_AbbreviationTable,
sp_countof( g_AbbreviationTable ), sizeof( AbbrevRecord ),
CompareStringAndAbbrevRecord );
*( (WCHAR*) ( m_pEndOfCurrItem - 1 ) ) = temp;
if ( pAbbrevRecord )
{
//--- Matched an abbreviation
if ( pAbbrevRecord->iSentBreakDisambig < 0 )
{
//--- Abbreviation will never end a sentence - just insert into ItemList
*pfIsEOS = false;
hr = S_OK;
Item.pItemSrcText = m_pNextChar;
Item.ulItemSrcLen = (ULONG)(m_pEndOfCurrItem - m_pNextChar);
Item.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset +
(ULONG)( m_pNextChar - m_pCurrFrag->pTextStart );
Item.ulNumWords = 1;
Item.Words = (TTSWord*) MemoryManager.GetMemory( sizeof( TTSWord ), &hr );
if ( SUCCEEDED( hr ) )
{
ZeroMemory( Item.Words, sizeof( TTSWord ) );
Item.Words[0].pXmlState = &m_pCurrFrag->State;
Item.Words[0].pWordText = Item.pItemSrcText;
Item.Words[0].ulWordLen = Item.ulItemSrcLen;
Item.Words[0].pLemma = Item.pItemSrcText;
Item.Words[0].ulLemmaLen = Item.ulItemSrcLen;
Item.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSAbbreviationInfo), &hr );
if ( SUCCEEDED( hr ) )
{
if ( NeedsToBeNormalized( pAbbrevRecord ) )
{
Item.pItemInfo->Type = eABBREVIATION_NORMALIZE;
}
else
{
Item.pItemInfo->Type = eABBREVIATION;
}
( (TTSAbbreviationInfo*) Item.pItemInfo )->pAbbreviation = pAbbrevRecord;
ItemList.SetAt( ItemPos, Item );
}
}
}
else
{
//--- Need to do some disambiguation to determine whether,
//--- a) this is indeed an abbreviation (e.g. "Ed.")
//--- b) the period doubles as EOS
hr = ( this->*g_SentBreakDisambigTable[pAbbrevRecord->iSentBreakDisambig] )
( pAbbrevRecord, ItemList, ItemPos, MemoryManager, pfIsEOS );
if ( SUCCEEDED( hr ) )
{
if ( *pfIsEOS )
{
if ( fHitPauseItem )
{
*pfIsEOS = false;
}
else
{
fAddTrailItem = true;
fAbbreviation = true;
}
}
}
}
}
if ( hr == E_INVALIDARG )
{
//--- Just check for periods internal to the item - this catches stuff like
//--- 10:30p.m.
for ( const WCHAR* pIterator = m_pNextChar; pIterator < m_pEndOfCurrItem - 1; pIterator++ )
{
if ( *pIterator == L'.' )
{
*pfIsEOS = false;
break;
}
}
//--- If all previous checks have failed, it is EOS.
if ( pIterator == ( m_pEndOfCurrItem - 1 ) &&
!fHitPauseItem )
{
hr = S_OK;
fAddTrailItem = true;
*pfIsEOS = true;
}
else if ( hr == E_INVALIDARG )
{
hr = S_OK;
}
}
}
}
//--- Add trail item.
if ( fAddTrailItem )
{
ulTrailItems++;
CSentItem TrailItem;
if ( ItemType == eELLIPSIS )
{
TrailItem.pItemSrcText = m_pEndOfCurrItem - 3;
TrailItem.ulItemSrcLen = 3;
TrailItem.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset +
(ULONG)( m_pEndOfCurrItem - m_pCurrFrag->pTextStart - 3 );
}
else
{
TrailItem.pItemSrcText = m_pEndOfCurrItem - 1;
TrailItem.ulItemSrcLen = 1;
TrailItem.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset +
(ULONG)( m_pEndOfCurrItem - m_pCurrFrag->pTextStart - 1 );
}
TrailItem.ulNumWords = 1;
TrailItem.Words = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
if ( SUCCEEDED( hr ) )
{
ZeroMemory( TrailItem.Words, sizeof(TTSWord) );
TrailItem.Words[0].pXmlState = &m_pCurrFrag->State;
TrailItem.Words[0].eWordPartOfSpeech = ConvertItemTypeToPartOfSp( ItemType );
TrailItem.eItemPartOfSpeech = ConvertItemTypeToPartOfSp( ItemType );
TrailItem.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo), &hr );
if ( SUCCEEDED( hr ) )
{
TrailItem.pItemInfo->Type = ItemType;
if ( m_dwSpeakFlags & SPF_NLP_SPEAK_PUNC ||
( m_pCurrFrag->State.eAction == SPVA_SpellOut &&
!fAbbreviation ) )
{
CWordList TempWordList;
ExpandPunctuation( TempWordList, *(m_pEndOfCurrItem - 1) );
hr = SetWordList( TrailItem, TempWordList, MemoryManager );
TrailItem.pItemInfo->Type = eUNMATCHED;
}
ItemList.InsertAfter( ItemPos, TrailItem );
if ( !fAbbreviation )
{
if ( ItemType == eELLIPSIS )
{
m_pEndOfCurrItem -= 3;
ulTrailItems = 3;
}
else
{
m_pEndOfCurrItem--;
}
}
}
}
ItemType = eUNMATCHED;
if ( fAbbreviation )
{
break;
}
}
}
//--- Do Main Item Insertion
if ( SUCCEEDED( hr ) &&
m_pNextChar == m_pEndOfCurrItem )
{
ItemList.RemoveAt( ItemPos );
}
else if ( SUCCEEDED( hr ) )
{
hr = Normalize( ItemList, ItemPos, MemoryManager );
}
if( m_fNameItem )
{
wcscpy( ItemList.GetAt( ItemPos ).CustomLtsToken, L"Names" );
}
//--- Advance m_pNextChar to m_pEndOfCurrItem + once for each trail item matched.
if ( SUCCEEDED( hr ) )
{
if ( !fAbbreviation &&
m_pEndOfCurrItem + ulTrailItems != m_pEndOfCurrToken )
{
//--- Multi-token item matched in Normalize()... Remove all previously matched trail items,
//--- as they were matched as part of the larger item...
m_pNextChar = m_pEndOfCurrItem;
Item = ItemList.GetNext( ItemPos );
while ( ItemPos )
{
SPLISTPOS RemovePos = ItemPos;
Item = ItemList.GetNext( ItemPos );
ItemList.RemoveAt( RemovePos );
}
}
else
{
m_pNextChar = m_pEndOfCurrToken;
}
}
}
}
return hr;
} /* CStdSentEnum::AddNextSentItem */
/*****************************************************************************
* CStdSentEnum::GetNextSentence *
*-------------------------------*
* This method is used to create a sentence item enumerator and populate it
* with items. If the SPF_NLP_PASSTHROUGH flag is set, each item is the block
* of text between XML states. If the SPF_NLP_PASSTHROUGH flag is not set, each
* item is an individual word that is looked up in the current lexicon(s).
********************************************************************* EDC ***/
HRESULT CStdSentEnum::GetNextSentence( IEnumSENTITEM** ppItemEnum )
{
HRESULT hr = S_OK;
ULONG ulNumItems = 0;
const SPVTEXTFRAG* pPrevFrag = m_pCurrFrag;
//--- Is there any work to do
if( m_pCurrFrag == NULL ) return S_FALSE;
//--- Create sentence enum
CComObject<CSentItemEnum> *pItemEnum;
hr = CComObject<CSentItemEnum>::CreateInstance( &pItemEnum );
if( SUCCEEDED( hr ) )
{
pItemEnum->AddRef();
pItemEnum->_SetOwner( GetControllingUnknown() );
*ppItemEnum = pItemEnum;
}
if( SUCCEEDED( hr ) )
{
BOOL fSentDone = false;
BOOL fGoToNextFrag = false;
CItemList& ItemList = pItemEnum->_GetList();
CSentItemMemory& MemoryManager = pItemEnum->_GetMemoryManager();
while( SUCCEEDED(hr) && m_pCurrFrag && !fSentDone && ulNumItems < 50 )
{
ulNumItems++;
if( m_pCurrFrag->State.eAction == SPVA_Speak ||
m_pCurrFrag->State.eAction == SPVA_SpellOut )
{
hr = AddNextSentItem( ItemList, MemoryManager, &fSentDone );
//--- Advance fragment?
if( SUCCEEDED( hr ) &&
m_pNextChar &&
m_pEndChar &&
m_pNextChar >= m_pEndChar )
{
fGoToNextFrag = true;
}
}
else
{
//-- Check for lexicon
if( !m_fNameItem &&
m_pCurrFrag->ulTextLen == 6 &&
!_wcsnicmp( L"<NAME>", m_pCurrFrag->pTextStart, m_pCurrFrag->ulTextLen ) )
{
m_fNameItem = true;
}
else if( m_fNameItem &&
m_pCurrFrag->ulTextLen == 7 &&
!_wcsnicmp( L"</NAME>", m_pCurrFrag->pTextStart, m_pCurrFrag->ulTextLen ) )
{
m_fNameItem = false;
}
//--- Add non spoken fragments
CSentItem Item;
Item.pItemSrcText = m_pCurrFrag->pTextStart;
Item.ulItemSrcLen = m_pCurrFrag->ulTextLen;
Item.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset;
Item.ulNumWords = 1;
Item.Words = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
if ( SUCCEEDED( hr ) )
{
ZeroMemory( Item.Words, sizeof(TTSWord) );
Item.Words[0].pXmlState = &m_pCurrFrag->State;
Item.Words[0].eWordPartOfSpeech = MS_Unknown;
Item.eItemPartOfSpeech = MS_Unknown;
Item.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo), &hr );
if ( SUCCEEDED( hr ) )
{
Item.pItemInfo->Type = eWORDLIST_IS_VALID;
ItemList.AddTail( Item );
}
}
fGoToNextFrag = true;
}
if( SUCCEEDED( hr ) &&
fGoToNextFrag )
{
fGoToNextFrag = false;
pPrevFrag = m_pCurrFrag;
m_pCurrFrag = m_pCurrFrag->pNext;
if( m_pCurrFrag )
{
m_pNextChar = m_pCurrFrag->pTextStart;
m_pEndChar = m_pNextChar + m_pCurrFrag->ulTextLen;
}
else
{
m_pNextChar = NULL;
m_pEndChar = NULL;
}
}
} // end while
//--- If no period has been added, add one now - this will happen if the text
//--- is ONLY XML markup...
if ( SUCCEEDED(hr) && !fSentDone )
{
CSentItem EOSItem;
EOSItem.pItemSrcText = g_period.pStr;
EOSItem.ulItemSrcLen = g_period.Len;
EOSItem.ulItemSrcOffset = pPrevFrag->ulTextSrcOffset + pPrevFrag->ulTextLen;
EOSItem.ulNumWords = 1;
EOSItem.Words = (TTSWord*) MemoryManager.GetMemory( sizeof(TTSWord), &hr );
if ( SUCCEEDED( hr ) )
{
ZeroMemory( EOSItem.Words, sizeof(TTSWord) );
EOSItem.Words[0].pXmlState = &g_DefaultXMLState;
EOSItem.Words[0].eWordPartOfSpeech = MS_EOSItem;
EOSItem.eItemPartOfSpeech = MS_EOSItem;
EOSItem.pItemInfo = (TTSItemInfo*) MemoryManager.GetMemory( sizeof(TTSItemInfo), &hr );
if ( SUCCEEDED( hr ) )
{
EOSItem.pItemInfo->Type = ePERIOD;
ItemList.AddTail( EOSItem );
}
}
}
//--- Output debugging information, if sentence breaks are desired
TTSDBG_LOGITEMLIST( pItemEnum->_GetList(), STREAM_SENTENCEBREAKS );
if( SUCCEEDED( hr ) )
{
hr = DetermineProns( pItemEnum->_GetList(), pItemEnum->_GetMemoryManager() );
}
pItemEnum->Reset();
//--- Output debugging information, if POS or Pronunciations are desired
TTSDBG_LOGITEMLIST( pItemEnum->_GetList(), STREAM_LEXLOOKUP );
}
return hr;
} /* CStdSentEnum::GetNextSentence */
/*****************************************************************************
* CStdSentEnum::Reset *
*---------------------*
*
********************************************************************* EDC ***/
STDMETHODIMP CStdSentEnum::Reset( void )
{
SPAUTO_OBJ_LOCK;
SPDBG_FUNC( "CStdSentEnum::Reset" );
HRESULT hr = S_OK;
m_pCurrFrag = m_pTextFragList;
m_pNextChar = m_pCurrFrag->pTextStart;
m_pEndChar = m_pNextChar + m_pCurrFrag->ulTextLen;
m_SentenceStack.Reset();
m_fNameItem = false;
return hr;
} /* CStdSentEnum::Reset */
/*****************************************************************************
* CStdSentEnum::InitAggregateLexicon *
*------------------------------------*
*
********************************************************************* AH ****/
HRESULT CStdSentEnum::InitAggregateLexicon( void )
{
return m_cpAggregateLexicon.CoCreateInstance(CLSID_SpLexicon);
}
/*****************************************************************************
* CStdSentEnum::AddLexiconToAggregate *
*-------------------------------------*
*
********************************************************************* AH ****/
HRESULT CStdSentEnum::AddLexiconToAggregate( ISpLexicon *pAddLexicon, DWORD dwFlags )
{
return m_cpAggregateLexicon->AddLexicon( pAddLexicon, dwFlags );
}
/*****************************************************************************
* CStdSentEnum::InitMorphLexicon *
*--------------------------------*
*
********************************************************************* AH ****/
HRESULT CStdSentEnum::InitMorphLexicon( void )
{
HRESULT hr = S_OK;
m_pMorphLexicon = new CSMorph( m_cpAggregateLexicon, &hr );
return hr;
}
void CStdSentEnum::fNamesLTS( bool fHaveNamesLTS )
{
m_fHaveNamesLTS = fHaveNamesLTS;
}
//
//=== CSentItemEnum =========================================================
//
/*****************************************************************************
* CSentItemEnum::Next *
*---------------------*
*
********************************************************************* EDC ***/
STDMETHODIMP CSentItemEnum::
Next( TTSSentItem *pItemEnum )
{
SPDBG_FUNC( "CSentItemEnum::Next" );
HRESULT hr = S_OK;
//--- Check args
if( SPIsBadWritePtr( pItemEnum, sizeof( TTSSentItem ) ) )
{
hr = E_INVALIDARG;
}
else
{
if ( m_ListPos )
{
*pItemEnum = m_ItemList.GetNext( m_ListPos );
}
else
{
hr = S_FALSE;
}
}
return hr;
} /* CSentItemEnum::Next */
/*****************************************************************************
* CSentItemEnum::Reset *
*----------------------*
*
********************************************************************* EDC ***/
STDMETHODIMP CSentItemEnum::Reset( void )
{
SPDBG_FUNC( "CSentItemEnum::Reset" );
HRESULT hr = S_OK;
m_ListPos = m_ItemList.GetHeadPosition();
return hr;
} /* CSentItemEnum::Reset */