windows-nt/Source/XPSP1/NT/enduser/speech/tts/ms_entropic/ttsengine.cpp

482 lines
15 KiB
C++
Raw Normal View History

2020-09-26 03:20:57 -05:00
/*******************************************************************************
* MSE_TTSEngine.cpp *
*---------------*
* Description:
* This module is the main implementation file for the CMSE_TTSEngine class.
*-------------------------------------------------------------------------------
* Created By: EDC Date: 03/12/99
* Copyright (C) 1999 Microsoft Corporation
* All Rights Reserved
*
*******************************************************************************/
//--- Additional includes
#include "stdafx.h"
#include <stdio.h>
#include "TTSEngine.h"
#include "stdsentenum.h"
#ifdef USE_VOICEDATAOBJ
#include "VoiceDataObj.h"
#endif
#include "commonlx.h"
#include "perf\\ttsperf.h"
#if USE_PERF_COUNTERS
CPerfCounterManager g_pcm;
#endif
/*****************************************************************************
* MSE_TTSEngine::FinalConstruct *
*----------------------------*
* Description:
* Constructor
********************************************************************* EDC ***/
HRESULT MSE_TTSEngine::FinalConstruct()
{
SPDBG_FUNC( "MSE_TTSEngine::FinalConstruct" );
HRESULT hr = S_OK;
m_pBEnd = NULL;
#if USE_PERF_COUNTERS
if (g_pcm.Init("TTSPerf", perfcMax / 2 - 1, 100) == ERROR_SUCCESS)
{
m_pco.Init(&g_pcm);
}
#endif
return hr;
} /* MSE_TTSEngine::FinalConstruct */
/*****************************************************************************
* MSE_TTSEngine::FinalRelease *
*--------------------------*
* Description:
* destructor
********************************************************************* EDC ***/
void MSE_TTSEngine::FinalRelease()
{
SPDBG_FUNC( "MSE_TTSEngine::FinalRelease" );
if ( m_pBEnd )
{
delete m_pBEnd;
}
} /* MSE_TTSEngine::FinalRelease */
/*****************************************************************************
* MSE_TTSEngine::SetObjectToken *
*-------------------------------*
* Description:
* This method is called during construction to give the TTS driver object
* access to the voice's object token for initialization purposes...
******************************************************************* AARONHAL ***/
HRESULT MSE_TTSEngine::SetObjectToken( ISpObjectToken *pToken )
{
SPDBG_FUNC( "MSE_TTSEngine::SetObjectToken" );
HRESULT hr = S_OK;
//--- Call old SetObjectToken, in VoiceData
m_cpToken = pToken;
#ifdef USE_VOICEDATAOBJ
hr = m_VoiceDataObj.SetObjectToken( pToken );
#endif
//--- Do old VoiceInit( ) stuff...
if ( SUCCEEDED( hr ) )
{
//--- Create sentence enumerator and initialize
CComObject<CStdSentEnum> *pSentEnum;
hr = CComObject<CStdSentEnum>::CreateInstance( &pSentEnum );
//--- Create aggregate lexicon
if ( SUCCEEDED( hr ) )
{
hr = pSentEnum->InitAggregateLexicon();
}
//--- Create vendor lexicon and add to aggregate
if ( SUCCEEDED( hr ) )
{
CComPtr<ISpObjectToken> cpToken;
hr = SpGetSubTokenFromToken(pToken, L"Lex", &cpToken);
CComPtr<ISpLexicon> cpCompressedLexicon;
if ( SUCCEEDED( hr ) )
{
hr = SpCreateObjectFromToken(cpToken, &cpCompressedLexicon);
}
if (SUCCEEDED(hr))
{
hr = pSentEnum->AddLexiconToAggregate(cpCompressedLexicon, eLEXTYPE_PRIVATE1);
}
}
//--- Create LTS lexicon and add to aggregate
if ( SUCCEEDED( hr ) )
{
CComPtr<ISpObjectToken> cpToken;
hr = SpGetSubTokenFromToken(pToken, L"Lts", &cpToken);
CComPtr<ISpLexicon> cpLTSLexicon;
if ( SUCCEEDED( hr ) )
{
hr = SpCreateObjectFromToken(cpToken, &cpLTSLexicon);
}
if ( SUCCEEDED( hr ) )
{
hr = pSentEnum->AddLexiconToAggregate(cpLTSLexicon, eLEXTYPE_PRIVATE2);
}
}
//--- Create Names LTS lexicon and add to aggregate
if ( SUCCEEDED( hr ) )
{
CComPtr<ISpObjectToken> cpToken;
hr = SpGetSubTokenFromToken(pToken, L"Names", &cpToken);
CComPtr<ISpLexicon> cpLTSLexicon;
if ( SUCCEEDED( hr ) )
{
hr = SpCreateObjectFromToken(cpToken, &cpLTSLexicon);
if ( SUCCEEDED( hr ) )
{
hr = pSentEnum->AddLexiconToAggregate( cpLTSLexicon, eLEXTYPE_PRIVATE3 );
if ( SUCCEEDED( hr ) )
{
pSentEnum->fNamesLTS( true );
}
}
}
else
{
//--- No "Names" subtoken in the registry - just behave as we did
// before the Names LTS code was added...
pSentEnum->fNamesLTS( false );
hr = S_OK;
}
}
//--- Create morphology lexicon
if ( SUCCEEDED( hr ) )
{
hr = pSentEnum->InitMorphLexicon();
}
//--- Set member sentence enumerator
if ( SUCCEEDED( hr ) )
{
m_cpSentEnum = pSentEnum;
}
}
//--- Do old InitDriver stuff
if ( SUCCEEDED( hr ) )
{
//--------------------------
// Get voice information
//--------------------------
#ifdef USE_VOICEDATAOBJ
hr = m_VoiceDataObj.GetVoiceInfo( &m_VoiceInfo );
if( SUCCEEDED(hr) )
{
m_SampleRate = m_VoiceInfo.SampleRate;
//-----------------------------
// Reverb is always stereo
//-----------------------------
if (m_VoiceInfo.eReverbType != REVERB_TYPE_OFF )
{
//------------------
// Stereo
//------------------
m_IsStereo = true;
m_BytesPerSample = 4;
}
else
{
//------------------
// MONO
//------------------
m_IsStereo = false;
m_BytesPerSample = 2;
}
#else
{
#endif
//--------------------------
// Initialize BACKEND
//--------------------------
m_pBEnd = CBackEnd::ClassFactory();
if ( m_pBEnd )
{
CSpDynamicString dstrSFontPath;
hr = pToken->GetStringValue( L"Sfont", &dstrSFontPath );
if ( SUCCEEDED( hr ) )
{
char *pszSFontPath = NULL;
pszSFontPath = dstrSFontPath.CopyToChar();
if ( !pszSFontPath )
{
hr = E_OUTOFMEMORY;
}
else if ( !m_pBEnd->LoadTable( pszSFontPath ) )
{
hr = E_FAIL;
}
else
{
m_pBEnd->SetFrontEndFlag ();
m_pBEnd->SetGain( 2.0 );
::CoTaskMemFree( pszSFontPath );
}
}
}
else
{
hr = E_OUTOFMEMORY;
}
//--------------------------
// Initialize FRONTEND obj
//--------------------------
if( SUCCEEDED( hr ))
{
EntropicPitchInfo PitchInfo;
int BaseLine, RefLine, TopLine;
m_pBEnd->GetSpeakerInfo( &BaseLine, &RefLine, &TopLine );
PitchInfo.BasePitch = ( TopLine + BaseLine ) / 2;
PitchInfo.Range = TopLine - BaseLine;
#ifdef USE_VOICEDATAOBJ
hr = m_FEObj.Init( &m_VoiceDataObj, NULL, &m_VoiceInfo, PitchInfo );
#else
hr = m_FEObj.Init( NULL /*&m_VoiceDataObj*/, NULL, NULL /*&m_VoiceInfo*/, PitchInfo, m_pBEnd->GetPhoneSetFlag() );
#endif
}
}
}
return hr;
} /* MSE_TTSEngine::SetObjectToken */
/*****************************************************************************
* MSE_TTSEngine::Speak *
*-------------------*
* Description:
* This method is supposed to speak the text observing the associated
* XML state.
********************************************************************* EDC ***/
STDMETHODIMP MSE_TTSEngine::
Speak( DWORD dwSpeakFlags, REFGUID rguidFormatId,
const WAVEFORMATEX * /* pWaveFormatEx ignored */,
const SPVTEXTFRAG* pTextFragList,
ISpTTSEngineSite* pOutputSite )
{
SPDBG_FUNC( "MSE_TTSEngine::Speak" );
HRESULT hr = S_OK;
#if USE_PERF_COUNTERS
m_pco.IncrementCounter (perfcSpeakCalls);
#endif
//--- Early exit?
if( ( rguidFormatId != SPDFID_WaveFormatEx && rguidFormatId != SPDFID_Text ) || SP_IS_BAD_INTERFACE_PTR( pOutputSite ) )
{
hr = E_INVALIDARG;
}
else
{
//--- Debug Macro - open file for debugging output
TTSDBG_OPENFILE;
//--- Initialize sentence enumerator
hr = m_cpSentEnum->SetFragList( pTextFragList, dwSpeakFlags );
if( SUCCEEDED( hr ) )
{
// The following code is here just for testing.
// It should be removed once all the tools accept the
// new way of outputing debug info.
if( rguidFormatId == SPDFID_Text )
{
//--- Enumerate and write out all sentence items.
IEnumSENTITEM *pItemEnum;
TTSSentItem Item;
//--- Write unicode signature
static const WCHAR Signature = 0xFEFF;
hr = pOutputSite->Write( &Signature, sizeof(Signature), NULL );
while( (hr = m_cpSentEnum->Next( &pItemEnum) ) == S_OK )
{
while( (hr = pItemEnum->Next( &Item )) == S_OK )
{
// Is there a valid normalized-word-list?
if ( Item.pItemInfo->Type & eWORDLIST_IS_VALID )
{
for ( ULONG i = 0; i < Item.ulNumWords; i++ )
{
if ( Item.Words[i].pXmlState->eAction == SPVA_Speak ||
Item.Words[i].pXmlState->eAction == SPVA_SpellOut )
{
ULONG cb = Item.Words[i].ulWordLen * sizeof( WCHAR );
hr = pOutputSite->Write( Item.Words[i].pWordText, cb, NULL );
if( hr == S_OK )
{
//--- Insert space between items
hr = pOutputSite->Write( L" ", sizeof( WCHAR ), NULL );
}
}
}
}
else // no word list - just write the original text.
{
ULONG cb = Item.ulItemSrcLen * sizeof( WCHAR );
hr = pOutputSite->Write( Item.pItemSrcText, cb, NULL );
if ( SUCCEEDED(hr) )
{
//--- Insert space between items
hr = pOutputSite->Write( L" ", sizeof( WCHAR ), NULL );
}
}
}
pItemEnum->Release();
//--- Insert mark between sentences
if( SUCCEEDED( hr ) )
{
static const WCHAR CRLF[2] = { 0x000D, 0x000A };
hr = pOutputSite->Write( CRLF, 2*sizeof(WCHAR), NULL );
}
}
static const WCHAR ENDL = 0x0000;
hr = pOutputSite->Write( &ENDL, sizeof(WCHAR), NULL );
}
else
{
//--- Render the text
m_FEObj.PrepareSpeech( m_cpSentEnum, pOutputSite );
SPEECH_STATE SpeechState = SPEECH_CONTINUE;
SentenceData *pSentence = NULL;
short *pSamples = NULL;
int nSamples = 0;
while ( SpeechState == SPEECH_CONTINUE )
{
hr = m_FEObj.NextData( (void**)&pSentence, &SpeechState );
if ( SUCCEEDED( hr ) &&
SpeechState == SPEECH_CONTINUE )
{
if ( !m_pBEnd->NewPhoneString( pSentence->pPhones, pSentence->ulNumPhones,
pSentence->pf0, pSentence->ulNumf0 ) )
{
hr = E_FAIL;
}
else
{
while ( SUCCEEDED( hr ) &&
m_pBEnd->OutputPending() )
{
if ( !m_pBEnd->GenerateOutput( &pSamples, &nSamples ) )
{
hr = E_FAIL;
}
else if ( nSamples )
{
hr = pOutputSite->Write( (void*)pSamples, nSamples * sizeof( short ), NULL );
}
}
}
}
if ( pSentence )
{
if ( pSentence->pPhones )
{
delete pSentence->pPhones;
pSentence->pPhones = NULL;
}
if ( pSentence->pf0 )
{
delete pSentence->pf0;
pSentence->pf0 = NULL;
}
delete pSentence;
pSentence = NULL;
}
}
}
}
//--- Debug Macro - close debugging file
TTSDBG_CLOSEFILE;
}
return hr;
} /* MSE_TTSEngine::Speak */
//--- This is the only format the Entropic backend supports...
static const WAVEFORMATEX EntropicFormat =
{
1,
1,
8000,
16000,
2,
16,
0
};
/****************************************************************************
* MSE_TTSEngine::GetOutputFormat *
*-----------------------------*
* Description:
*
* Returns:
*
******************************************************************* PACOG ***/
STDMETHODIMP MSE_TTSEngine::GetOutputFormat(const GUID * pTargetFormatId, const WAVEFORMATEX * /* pTargetWaveFormatEx */,
GUID * pDesiredFormatId, WAVEFORMATEX ** ppCoMemDesiredWaveFormatEx)
{
SPDBG_FUNC("MSE_TTSEngine::GetOutputFormat");
HRESULT hr = S_OK;
if( ( SP_IS_BAD_WRITE_PTR(pDesiredFormatId) ) ||
( SP_IS_BAD_WRITE_PTR(ppCoMemDesiredWaveFormatEx) ) )
{
hr = E_INVALIDARG;
}
else if (pTargetFormatId == NULL || *pTargetFormatId != SPDFID_Text)
{
*pDesiredFormatId = SPDFID_WaveFormatEx;
*ppCoMemDesiredWaveFormatEx = (WAVEFORMATEX *)::CoTaskMemAlloc(sizeof(WAVEFORMATEX));
if (*ppCoMemDesiredWaveFormatEx)
{
**ppCoMemDesiredWaveFormatEx = EntropicFormat;
}
else
{
hr = E_OUTOFMEMORY;
}
}
else
{
*pDesiredFormatId = SPDFID_Text;
*ppCoMemDesiredWaveFormatEx = NULL;
}
SPDBG_REPORT_ON_FAIL( hr );
return hr;
} /* MSE_TTSEngine::GetOutputFormat */