/****************************************************************************** * TrueTalk.cpp * *--------------* * This module is the main implementation for class CTrueTalk *------------------------------------------------------------------------------ * Copyright (C) 2000 Microsoft Corporation Date: 02/29/00 * All Rights Reserved * ********************************************************************* PACOG ***/ #include "stdafx.h" #include "TrueTalk.h" #include "frontend.h" #include "backend.h" #include "queue.h" const int CTrueTalk::m_iQueueSize = 512; static const char g_pFlagCharacter = 0x00; static const unsigned char g_AnsiToAscii[] = { /*** Control characters - map to whitespace ***/ 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, /*** ASCII displayables ***/ 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F, 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F, 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F, 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F, 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, /*** Control character ***/ 0x20, /*** Euro symbol ***/ 0x80, /*** Control character ***/ 0x20, /*** Extended ASCII values ***/ 0x27, // low single quote - map to single quote 0x20, // f-like character - map to space 0x22, // low double quote - map to double quote 0x2C, // elipsis - map to comma 0x20, // cross - map to space 0x20, // double cross - map to space 0x5E, // caret like accent - map to caret 0x89, // strange percent like sign 0x53, // S-hat - map to S 0x27, // left angle bracket like thing - map to single quote 0x20, // weird OE character - map to space 0x20, // control characters - map to space 0x20, 0x20, 0x20, 0x27, // left single quote - map to single quote 0x27, // right single quote - map to single quote 0x22, // left double quote - map to double quote 0x22, // right double quote - map to double quote 0x20, // bullet - map to space 0x2D, // long hyphen - map to hyphen 0x2D, // even longer hyphen - map to hyphen 0x7E, // tilde-like thing - map to tilde 0x98, // TM 0x73, // s-hat - map to s 0x27, // right angle bracket like thing - map to single quote 0x20, // weird oe like character - map to space 0x20, // control character - map to space 0x20, // control character - map to space 0x59, // Y with umlaut like accent - map to Y 0x20, // space? - map to space 0x20, // upside-down exclamation point - map to space 0xA2, // cents symbol 0xA3, // pounds symbol 0x20, // generic currency symbol - map to space 0xA5, // yen symbol 0x7C, // broken bar - map to bar 0x20, // strange symbol - map to space 0x20, // umlaut - map to space 0xA9, // copyright symbol 0x20, // strange a character - map to space 0x22, // strange <<-like character - map to double quote 0x20, // strange line-like character - map to space 0x2D, // hyphen-like character - map to hyphen 0xAE, // registered symbol 0x20, // high line - map to space 0xB0, // degree sign 0xB1, // plus-minus sign 0xB2, // superscript 2 0xB3, // superscript 3 0xB4, // single prime 0x20, // greek character - map to space 0x20, // paragraph symbol - map to space 0x20, // mid-height dot - map to space 0x20, // cedilla - map to space 0xB9, // superscript one 0x20, // circle with line - map to space 0x22, // strange >>-like character - map to double quote 0xBC, // vulgar 1/4 0xBD, // vulgar 1/2 0xBE, // vulgar 3/4 0x20, // upside-down question mark - map to space 0x41, // Accented uppercase As - map to A 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x43, // C with cedilla - map to C 0x45, // Accented uppercase Es - map to E 0x45, 0x45, 0x45, 0x49, // Accented uppercase Is - map to I 0x49, 0x49, 0x49, 0x20, // strange character - map to space 0x4E, // Accented uppercase N - map to N 0x4F, // Accented uppercase Os - map to O 0x4F, 0x4F, 0x4F, 0x4F, 0x20, // strange character - map to space 0x4F, // another O? - map to O 0x55, // Accented uppercase Us - map to U 0x55, 0x55, 0x55, 0x59, // Accented uppercase Y - map to Y 0x20, // strange character - map to space 0xDF, // Beta 0x61, // Accented lowercase as - map to a 0x61, 0x61, 0x61, 0x61, 0x61, 0x61, 0x63, // c with cedilla - map to c 0x65, // Accented lowercase es - map to e 0x65, 0x65, 0x65, 0x69, // Accented lowercase is - map to i 0x69, 0x69, 0x69, 0x75, // eth - map to t 0x6E, // Accented lowercase n - map to n 0x6F, // Accented lowercase os - map to o 0x6F, 0x6F, 0x6F, 0x6F, 0xF7, // division symbol 0x6F, // another o? - map to o 0x76, // Accented lowercase us - map to u 0x76, 0x76, 0x76, 0x79, // accented lowercase y - map to y 0x20, // strange character - map to space 0x79, // accented lowercase y - map to y }; /***************************************************************************** * CTrueTalk::InitThreading * *--------------------------* * Description: * ******************************************************************* PACOG ***/ void CTrueTalk::InitThreading() { CFrontEnd::InitThreading(); } /***************************************************************************** * CTrueTalk::ReleaseThreading * *-----------------------------* * Description: * ******************************************************************* PACOG ***/ void CTrueTalk::ReleaseThreading() { CFrontEnd::ReleaseThreading(); } /***************************************************************************** * CTrueTalk::FinalConstruct * *---------------------------* * Description: * Constructor ******************************************************************* PACOG ***/ HRESULT CTrueTalk::FinalConstruct() { HRESULT hr = S_OK; m_cpToken = 0; m_pTtp = 0; m_pBend = 0; m_pPhoneQueue = 0; m_dGain = 1.0; m_dwDebugLevel = 0; m_fTextOutput = false; m_WaveFormatEx.wFormatTag = WAVE_FORMAT_PCM; m_WaveFormatEx.nChannels = 1; m_WaveFormatEx.nSamplesPerSec = 0; m_WaveFormatEx.nAvgBytesPerSec = 0; m_WaveFormatEx.nBlockAlign = 2; m_WaveFormatEx.wBitsPerSample = 16; m_WaveFormatEx.cbSize = 0; return hr; } /***************************************************************************** * CTrueTalk::FinalRelease * *-------------------------* * Description: * Destructor ******************************************************************* PACOG ***/ void CTrueTalk::FinalRelease() { if ( m_pTtp) { delete m_pTtp; } if ( m_pBend ) { delete m_pBend; } if (m_pPhoneQueue) { delete m_pPhoneQueue; } } /***************************************************************************** * CTrueTalk::SetObjectToken * *---------------------------* * Description: * This function performs the majority of the initialization of the voice. * Once the object token has been provided, the filenames are read from the * token key and the files are mapped.+++ ******************************************************************* PACOG ***/ STDMETHODIMP CTrueTalk::SetObjectToken (ISpObjectToken * pToken) { HRESULT hr = SpGenericSetObjectToken(pToken, m_cpToken); char pszFilePath[_MAX_PATH+1]; bool fIsBrEng = false; //-- Read debug info first if ( SUCCEEDED (hr) ) { hr = m_cpToken->GetDWORD (L"DebugInterest", &m_dwDebugLevel); if ( FAILED(hr) ) { m_dwDebugLevel = 0; hr = S_OK; } } // Determine engine language if (SUCCEEDED(hr)) { CComPtr cpToken; CSpDynamicString dstrLanguage; hr = SpGetSubTokenFromToken(m_cpToken, L"Attributes", &cpToken); if (SUCCEEDED(hr)) { hr = cpToken->GetStringValue (L"Language", &dstrLanguage); } if (SUCCEEDED(hr)) { WCHAR* ptr; ptr = wcschr (dstrLanguage.m_psz, ';'); if ( ptr ) { *ptr = 0; } if (wcscmp(dstrLanguage.m_psz, L"809") == 0) { fIsBrEng = true; } } } //-- Initialize front-end if ( SUCCEEDED (hr) ) { CSpDynamicString dstrFilePath; hr = m_cpToken->GetStringValue( L"Dictionary", &dstrFilePath ); if (SUCCEEDED(hr)) { WideCharToMultiByte (CP_ACP, 0, dstrFilePath.m_psz, -1, pszFilePath, _MAX_PATH, 0, 0); } } if ( SUCCEEDED (hr) ) { if ((m_pTtp = CFrontEnd::ClassFactory()) == 0) { return E_OUTOFMEMORY; } if (!m_pTtp->Init (pszFilePath, 0)) { if (m_dwDebugLevel) { fprintf (stderr, "Error initializing ttp with dictionary path %s\n", pszFilePath); } return E_OUTOFMEMORY; } if ((m_pPhoneQueue = new CPhStrQueue (m_iQueueSize)) == 0) { return E_OUTOFMEMORY; } } //-- And now, the back end if ( SUCCEEDED (hr) ) { CSpDynamicString dstrFilePath; hr = m_cpToken->GetStringValue( L"Sfont", &dstrFilePath ); if (SUCCEEDED(hr)) { WideCharToMultiByte (CP_ACP, 0, dstrFilePath.m_psz, -1, pszFilePath, _MAX_PATH, 0, 0); } } if ( SUCCEEDED (hr) ) { int iBaseLine; int iRefLine; int iTopLine; if ((m_pBend = CBackEnd::ClassFactory()) == 0) { return E_OUTOFMEMORY; } if ( !m_pBend->LoadTable (pszFilePath, m_dwDebugLevel) ) { if (m_dwDebugLevel) { fprintf (stderr, "Error loading table %s\n", pszFilePath); } return E_OUTOFMEMORY; } CSpDynamicString dstrGain; hr = m_cpToken->GetStringValue( L"Gain", &dstrGain); if (SUCCEEDED(hr)) { m_dGain = wcstod (dstrGain.m_psz, NULL); m_pBend->SetGain (m_dGain); } m_pBend->GetSpeakerInfo(&iBaseLine, &iRefLine, &iTopLine); m_pTtp->SetSpeakerParams(iBaseLine, iRefLine, iTopLine, fIsBrEng); m_WaveFormatEx.nSamplesPerSec = m_pBend->GetSampFreq(); m_WaveFormatEx.nAvgBytesPerSec = m_WaveFormatEx.nSamplesPerSec * m_WaveFormatEx.nBlockAlign; } return hr; } /**************************************************************************** * CTrueTalk::GetOutputFormat * *----------------------------* * Description: * * Returns: * ******************************************************************* PACOG ***/ HRESULT CTrueTalk::GetOutputFormat( const GUID * pTargetFormatId, const WAVEFORMATEX * pTargetWaveFormatEx, GUID * pDesiredFormatId, WAVEFORMATEX ** ppCoMemDesiredWaveFormatEx ) { HRESULT hr = S_OK; if( ( SP_IS_BAD_WRITE_PTR(pDesiredFormatId) ) || ( SP_IS_BAD_WRITE_PTR(ppCoMemDesiredWaveFormatEx) ) ) { hr = E_POINTER; } if ( pTargetFormatId && *pTargetFormatId == SPDFID_Text) { *pDesiredFormatId = SPDFID_Text; m_fTextOutput = true; } else { *pDesiredFormatId = SPDFID_WaveFormatEx; *ppCoMemDesiredWaveFormatEx = (WAVEFORMATEX *)::CoTaskMemAlloc(sizeof(WAVEFORMATEX)); if (*ppCoMemDesiredWaveFormatEx) { **ppCoMemDesiredWaveFormatEx = m_WaveFormatEx; } else { hr = E_OUTOFMEMORY; } } return hr; } /***************************************************************************** * CTrueTalk::Speak * *------------------* * Description: * This method is supposed to speak the text observing the associated * XML state. ******************************************************************* PACOG ***/ HRESULT CTrueTalk::Speak (DWORD dwSpeakFlags, REFGUID rguidFormatId, const WAVEFORMATEX * pWaveFormatEx, const SPVTEXTFRAG * pTextFragList, ISpTTSEngineSite * pOutputSite) { HRESULT hr = S_OK; Phone* pPhones = 0; int iNumPhones; float* pfF0 = 0; int iNumF0; char* pcSamples = 0; int iNumSamples = 0; if (SyncActions(pOutputSite) != 0) { goto exit; } hr = RunFrontEnd (pTextFragList, pOutputSite); if ( FAILED(hr) ) { goto exit; } while ( m_pPhoneQueue->Size() >0 ) //-- While something to synthesize { pPhones = 0; iNumPhones = 0; pfF0 = 0; iNumF0 = 0; //-- Got something from front end, synthesize if (m_pPhoneQueue->FirstElement (&pPhones, &iNumPhones, &pfF0, &iNumF0)) { m_pPhoneQueue->Forward(); m_pBend->NewPhoneString (pPhones, iNumPhones, pfF0, iNumF0); while ( m_pBend->OutputPending() ) { if (SyncActions(pOutputSite) != 0) { break; } if (!m_pBend->GenerateOutput ( (short**)&pcSamples, &iNumSamples)) { hr = E_OUTOFMEMORY; goto exit; } if (pcSamples) { hr = pOutputSite->Write (pcSamples, iNumSamples*sizeof(short), 0); pcSamples = 0; iNumSamples = 0; if ( FAILED (hr) ) { goto exit; } } } } if (pPhones) { free (pPhones); pPhones = 0; } if (pfF0) { free (pfF0); pfF0 = 0; } } exit: if (pPhones) { free (pPhones); } if (pfF0) { free (pfF0); } return hr; } /***************************************************************************** * CTrueTalk::RunFrontEnd * *------------------------* * Description: * ******************************************************************* PACOG ***/ HRESULT CTrueTalk::RunFrontEnd (const SPVTEXTFRAG *pTextFragList, ISpTTSEngineSite* pOutputSite) { HRESULT hr = S_OK; int iStrLen; char* pszTxtPtr; Phone* pPhones; int iNumPhones; float* pfF0; int iNumF0; const SPVTEXTFRAG* pTempFrag = pTextFragList; m_pPhoneQueue->Reset(); //Estimate size of array iStrLen = 0; for ( pTempFrag = pTextFragList; pTempFrag ; pTempFrag = pTempFrag->pNext ) { if (pTempFrag->State.eAction == SPVA_Speak || pTempFrag->State.eAction == SPVA_Pronounce || pTempFrag->State.eAction == SPVA_SpellOut) { iStrLen += pTempFrag->ulTextLen + 1; } } if ( iStrLen ) { if (m_fTextOutput) { //--- Write unicode signature static const WCHAR Signature = 0xFEFF; hr = pOutputSite->Write( &Signature, sizeof(Signature), NULL ); for (pTempFrag = pTextFragList; SUCCEEDED(hr) && pTempFrag; pTempFrag = pTempFrag->pNext) { if (pTempFrag->State.eAction == SPVA_Speak || pTempFrag->State.eAction == SPVA_Pronounce || pTempFrag->State.eAction == SPVA_SpellOut) { hr = pOutputSite->Write( (WCHAR*)pTempFrag->pTextStart, pTempFrag->ulTextLen * sizeof(WCHAR), NULL ); if (SUCCEEDED(hr)) { hr = pOutputSite->Write( L" ", sizeof(WCHAR), NULL ); } } } //--- Insert mark between blocks if( SUCCEEDED( hr ) ) { static const WCHAR CRLF[2] = { 0x000D, 0x000A }; hr = pOutputSite->Write( CRLF, 2*sizeof(WCHAR), NULL ); } if( SUCCEEDED( hr ) ) { static const WCHAR ENDL = 0x0000; hr = pOutputSite->Write( &ENDL, sizeof(WCHAR), NULL ); } } else { //Allocate array char* pszString = new char[iStrLen + 2]; if ( !pszString) { hr = E_OUTOFMEMORY; } iStrLen = 0; //Copy data into array for (pTempFrag = pTextFragList; SUCCEEDED(hr) && pTempFrag; pTempFrag = pTempFrag->pNext) { if (pTempFrag->State.eAction == SPVA_Speak || pTempFrag->State.eAction == SPVA_Pronounce || pTempFrag->State.eAction == SPVA_SpellOut) { hr = DoUnicodeToAsciiMap( (WCHAR*)pTempFrag->pTextStart, pTempFrag->ulTextLen, pszString + iStrLen); iStrLen += pTempFrag->ulTextLen; pszString[iStrLen++] = ' '; } } pszString[iStrLen] = '\0'; //Process string m_pTtp->Lock(); pszTxtPtr = pszString; while (SUCCEEDED(hr) && pszTxtPtr) { pPhones = 0; iNumPhones = 0; pfF0 = 0; iNumF0 = 0; //-- These calls are serialized (critical section), to avoid // conflicts with other channels. pszTxtPtr = m_pTtp->Process (pszTxtPtr, &pPhones, &iNumPhones, &pfF0, &iNumF0); if (iNumPhones) { if ( ! m_pPhoneQueue->Push (pPhones, iNumPhones, pfF0, iNumF0) ) { hr = E_OUTOFMEMORY; } } } m_pTtp->Unlock(); delete[] pszString; } } return hr; } /***************************************************************************** * CTrueTalk::DoUnicodeToAsciiMap * *--------------------------------* * Description: * ******************************************************************* PACOG ***/ HRESULT CTrueTalk::DoUnicodeToAsciiMap ( const WCHAR *pUnicodeString, ULONG ulUnicodeStringLength, char* pszAsciiString ) { HRESULT hr = S_OK; if ( pUnicodeString && ulUnicodeStringLength > 0 && pszAsciiString) { //--- Map WCHARs to ANSI chars if ( !WideCharToMultiByte( 1252, NULL, pUnicodeString, ulUnicodeStringLength, pszAsciiString, ulUnicodeStringLength, &g_pFlagCharacter, NULL ) ) { hr = E_UNEXPECTED; } if (SUCCEEDED(hr)) { //--- Use internal table to map ANSI to ASCII for (ULONG i = 0; i GetActions(); if ( iActions != SPVES_CONTINUE ) { if (iActions & SPVES_SKIP) { //This might not be the best default // maybe completely ignoring the flag... pOutputSite->CompleteSkip (0); } if (iActions & SPVES_RATE) { long lRate; pOutputSite->GetRate (&lRate); m_pTtp->SetRate (lRate); } if (iActions & SPVES_VOLUME) { unsigned short usVolume; pOutputSite->GetVolume (&usVolume); m_pBend->SetGain ( (m_dGain * usVolume) / 100.0); } } return (iActions & SPVES_ABORT); }