/******************************************************************************* * Frontend.cpp * *--------------* * Description: * This module is the main implementation file for the CFrontend class. *------------------------------------------------------------------------------- * Created By: mc Date: 03/12/99 * Copyright (C) 1999 Microsoft Corporation * All Rights Reserved * *******************************************************************************/ //--- Additional includes #include "stdafx.h" #include "ms_entropicengine.h" #include "Frontend.h" #include "spdebug.h" #include "FeedChain.h" #include "AlloOps.h" #include "sapi.h" #include "StdSentEnum.h" static bool IsVowel ( char* ph ); //----------------------------- // Data.cpp //----------------------------- extern const short g_IPAToAllo[]; extern const float g_RateScale[]; inline short GetPhoneF0( float *pF0Contour, float CurrentTime, float Length ) { float Total = 0; int startIndex = (int) ( CurrentTime / PITCH_BUF_RES + 0.5 ); int endIndex = (int) ( ( CurrentTime + Length ) / PITCH_BUF_RES + 0.5 ); for ( int i = startIndex; i < endIndex; i++ ) { Total += pF0Contour[i]; } Total /= endIndex - startIndex; return (short) Total; } const char* OldMapPhoneSet (ALLO_CODE code) { static struct tagPhoneMap { const char* name; ALLO_CODE code; } phoneMap [] = { {"iy", _IY_}, {"ih", _IH_}, {"eh", _EH_}, {"ae", _AE_}, {"aa", _AA_}, {"ah", _AH_}, {"ao", _AO_}, {"uh", _UH_}, {"ax", _AX_}, {"axr", _ER_}, // or "er" {"ey", _EY_}, {"ay", _AY_}, {"oy", _OY_}, {"aw", _AW_}, {"ow", _OW_}, {"uw", _UW_}, {"ix", _IX_}, {"sil", _SIL_}, {"w", _w_}, {"y", _y_}, {"r", _r_}, {"l", _l_}, {"hh", _h_}, {"m", _m_}, {"n", _n_}, {"ng", _NG_}, {"f", _f_}, {"v", _v_}, {"th", _TH_}, {"dh", _DH_}, {"s", _s_}, {"z", _z_}, {"sh", _SH_}, {"zh", _ZH_}, {"p", _p_}, {"b", _b_}, {"t", _t_}, {"d", _d_}, {"k", _k_}, {"g", _g_}, {"ch", _CH_}, {"jh", _JH_}, {"dx", _DX_}, {"", _STRESS1_}, {"", _STRESS2_}, {"", _EMPHSTRESS_}, {"", _SYLLABLE_} }; static int nPhonesMap = sizeof (phoneMap) / sizeof(phoneMap[0]); int i; for ( i = 0; i < nPhonesMap; i++ ) { if (code == phoneMap[i].code) { return phoneMap[i].name; } } return ""; } const char* NewMapPhoneSet (ALLO_CODE code) { static struct tagPhoneMap { const char* name; ALLO_CODE code; } phoneMap [] = { {"iy", _IY_}, {"ih", _IH_}, {"eh", _EH_}, {"ae", _AE_}, {"aa", _AA_}, {"ah", _AH_}, {"ao", _AO_}, {"uh", _UH_}, {"ax", _AX_}, {"er", _ER_}, // or "er" {"ey", _EY_}, {"ay", _AY_}, {"oy", _OY_}, {"aw", _AW_}, {"ow", _OW_}, {"uw", _UW_}, {"ix", _IX_}, {"sil", _SIL_}, {"w", _w_}, {"y", _y_}, {"r", _r_}, {"l", _l_}, {"h", _h_}, {"m", _m_}, {"n", _n_}, {"ng", _NG_}, {"f", _f_}, {"v", _v_}, {"th", _TH_}, {"dh", _DH_}, {"s", _s_}, {"z", _z_}, {"sh", _SH_}, {"zh", _ZH_}, {"p", _p_}, {"b", _b_}, {"t", _t_}, {"d", _d_}, {"k", _k_}, {"g", _g_}, {"ch", _CH_}, {"jh", _JH_}, {"dx", _DX_}, {"", _STRESS1_}, {"", _STRESS2_}, {"", _EMPHSTRESS_}, {"", _SYLLABLE_} }; static int nPhonesMap = sizeof (phoneMap) / sizeof(phoneMap[0]); int i; for ( i = 0; i < nPhonesMap; i++ ) { if (code == phoneMap[i].code) { return phoneMap[i].name; } } return ""; } /***************************************************************************** * CFrontend::CFrontend * *----------------------* * Description: * ********************************************************************** MC ***/ CFrontend::CFrontend() { SPDBG_FUNC( "CFrontend::CFrontend" ); #ifdef USE_VOICEDATAOBJ m_pUnits = NULL; #endif m_unitCount = 0; m_CurUnitIndex = 0; m_pAllos = NULL; m_pSrcObj = NULL; m_fNewPhoneSet = FALSE; } /* CFrontend::CFrontend */ /***************************************************************************** * CFrontend::~CFrontend * *-----------------------* * Description: * ********************************************************************** MC ***/ CFrontend::~CFrontend() { SPDBG_FUNC( "CFrontend::~CFrontend" ); #ifdef USE_VOICEDATAOBJ DisposeUnits(); #endif if( m_pAllos ) { delete m_pAllos; m_pAllos = NULL; } DeleteTokenList(); } /* CFrontend::~CFrontend */ /***************************************************************************** * CFrontend::CntrlToRatio * *-------------------------* * Description: * Return rate ratio from control * ********************************************************************** MC ***/ float CFrontend::CntrlToRatio( long rateControl ) { SPDBG_FUNC( "CFrontend::CntrlToRatio" ); float rateRatio; if( rateControl < 0 ) { //-------------------------------- // DECREASE the rate //-------------------------------- if( rateControl < MIN_USER_RATE ) { rateControl = MIN_USER_RATE; // clip to min } rateRatio = 1.0f / ::g_RateScale[0 - rateControl]; } else { //-------------------------------- // INCREASE the rate //-------------------------------- if( rateControl > MAX_USER_RATE ) { rateControl = MAX_USER_RATE; // clip to max } rateRatio = ::g_RateScale[rateControl]; } return rateRatio; } /* CFrontend::CntrlToRatio */ /***************************************************************************** * CFrontend::Init * *-----------------* * Description: * Init voice dependent variables, call once when object is created+++ * ********************************************************************** MC ***/ #ifdef USE_VOICEDATAOBJ HRESULT CFontend::Init( CVoiceData* pVoiceDataObj, CFeedChain *pSrcObj, MSVOICEINFO* pVoiceInfo, EntropicPitchInfo PitchInfo, bool fNewPhoneSet ) #else HRESULT CFrontend::Init( void* pVoiceDataObj, CFeedChain *pSrcObj, void* pVoiceInfo, EntropicPitchInfo PitchInfo, bool fNewPhoneSet ) #endif { SPDBG_FUNC( "CFrontend::Init" ); HRESULT hr = S_OK; m_pSrcObj = pSrcObj; m_BasePitch = PitchInfo.BasePitch; #ifdef USE_VOICEDATAOBJ m_pVoiceDataObj = pVoiceDataObj; m_ProsodyGain = ((float)pVoiceInfo->ProsodyGain) / 100.0f; m_SampleRate = (float)pVoiceInfo->SampleRate; #endif // NOTE: move these to voice data? // m_VoiceWPM = pVoiceInfo->Rate; // m_PitchRange = pVoiceInfo->PitchRange; m_VoiceWPM = 180; m_PitchRange = PitchInfo.Range; m_RateRatio_API = m_RateRatio_PROSODY = 1.0f; m_fNewPhoneSet = fNewPhoneSet; return hr; } /* CFrontend::Init */ static ULONG IPA_to_Allo( WCHAR* pSrc, ALLO_CODE* pDest ) { ULONG iIpa, iAllo, i; ULONG gotMatch; // for debugging iIpa = iAllo = 0; while( pSrc[iIpa] > 0 ) { gotMatch = false; //----------------------------------------- // ...then search for single word IPA's //----------------------------------------- for( i = 0; i < NUMBER_OF_ALLO; i++ ) { if( pSrc[iIpa] == g_IPAToAllo[i] ) { pDest[iAllo] = (ALLO_CODE)i; gotMatch = true; break; } } if( gotMatch ) { iAllo++; } /*else { // Should NEVER get here. Unsupported IPA unicode! // Ignore it and go on. }*/ //---------------------------------- // Clip at max length //---------------------------------- if( iAllo >= (SP_MAX_PRON_LENGTH-1) ) { iAllo = SP_MAX_PRON_LENGTH-1; break; } iIpa++; } return iAllo; } /***************************************************************************** * CFrontend::AlloToUnit * *-----------------------* * Description: * Transform ALLO stream into backend UNIT stream+++ * ********************************************************************** MC ***/ #ifdef USE_VOICEDATAOBJ HRESULT CFrontend::AlloToUnit( CAlloList *pAllos, UNITINFO *pu ) { SPDBG_FUNC( "CFrontend::AlloToUnit" ); bool bFirstPass; long msPhon, attr; ULONG numOfCells; CAlloCell *pCurCell, *pNextCell; HRESULT hr = S_OK; bFirstPass = true; numOfCells = pAllos->GetCount(); pCurCell = pAllos->GetHeadCell(); pNextCell = pAllos->GetNextCell(); while( pCurCell ) { //-------------------------------------- // Get next allo ID //-------------------------------------- if( pNextCell ) { pu->NextAlloID = (USHORT)pNextCell->m_allo; } else { pu->NextAlloID = _SIL_; } //-------------------------------------- // Convert to Whistler phon code //-------------------------------------- attr = 0; if( pCurCell->m_ctrlFlags & PRIMARY_STRESS ) { attr |= ALLO_IS_STRESSED; } hr = m_pVoiceDataObj->AlloToUnit( (short)pCurCell->m_allo, attr, &msPhon ); if( FAILED(hr) ) { //------------------------ // allo ID is invalid //------------------------ break; } else { pu->PhonID = msPhon; pu->AlloID = (USHORT)pCurCell->m_allo; pu->flags = 0; pu->AlloFeatures = 0; pu->ctrlFlags = pCurCell->m_ctrlFlags; //-------------------------------------- // Flag WORD boundary //-------------------------------------- if( pCurCell->m_ctrlFlags & WORD_START ) { pu->flags |= WORD_START_FLAG; //---------------------------------------------- // Remember source word position and length //---------------------------------------------- pu->srcPosition = pCurCell->m_SrcPosition; pu->srcLen = pCurCell->m_SrcLen; } //---------------------------------------------------- // Flag SENTENCE boundary on 1st displayable word //---------------------------------------------------- if( bFirstPass && (pCurCell->m_SentenceLen > 0) ) { bFirstPass = false; pu->flags |= SENT_START_FLAG; //---------------------------------------------- // Remember source word position and length //---------------------------------------------- pu->sentencePosition = pCurCell->m_SentencePosition; pu->sentenceLen = pCurCell->m_SentenceLen; } pu->nKnots = KNOTS_PER_PHON; /*for( k = 0; k < pu->nKnots; k++ ) { pu->pTime[k] = pCurCell->m_ftTime[k] * m_SampleRate; pu->pF0[k] = pCurCell->m_ftPitch[k]; pu->pAmp[k] = pu->ampRatio; }*/ //---------------------------- // Controls and events //---------------------------- pu->user_Volume = pCurCell->m_user_Volume; pu->pBMObj = (void*)pCurCell->m_pBMObj; pCurCell->m_pBMObj = NULL; //---------------------------------------- // Pass features for viseme event //---------------------------------------- if( pCurCell->m_ctrlFlags & PRIMARY_STRESS ) { pu->AlloFeatures |= SPVFEATURE_STRESSED; } if( pCurCell->m_ctrlFlags & EMPHATIC_STRESS ) { pu->AlloFeatures |= SPVFEATURE_EMPHASIS; } pu->duration = PITCH_BUF_RES; pu->silenceSource = pCurCell->m_SilenceSource; pu++; } pCurCell = pNextCell; pNextCell = pAllos->GetNextCell(); } return hr; } /* CFrontend::AlloToUnit */ #endif /***************************************************************************** * CFrontend::PrepareSpeech * *--------------------------* * Description: * Prepare frontend for new speech * ********************************************************************** MC ***/ void CFrontend::PrepareSpeech( IEnumSpSentence* pEnumSent, ISpTTSEngineSite *pOutputSite ) { SPDBG_FUNC( "CFrontend::PrepareSpeech" ); m_pEnumSent = pEnumSent; m_SpeechState = SPEECH_CONTINUE; m_CurUnitIndex = m_unitCount = 0; m_HasSpeech = false; m_pOutputSite = pOutputSite; m_fInQuoteProsody = m_fInParenProsody = false; m_CurPitchOffs = 0; m_CurPitchRange = 1.0; } /* CFrontend::PrepareSpeech */ /***************************************************************************** * IsTokenPunct * *--------------* * Description: * Return TRUE if char is , . ! or ? * ********************************************************************** MC ***/ bool fIsPunctuation( TTSSentItem Item ) { SPDBG_FUNC( "IsTokenPunct" ); return ( Item.pItemInfo->Type == eCOMMA || Item.pItemInfo->Type == eSEMICOLON || Item.pItemInfo->Type == eCOLON || Item.pItemInfo->Type == ePERIOD || Item.pItemInfo->Type == eQUESTION || Item.pItemInfo->Type == eEXCLAMATION || Item.pItemInfo->Type == eHYPHEN ); } /* fIsPunctuation */ /***************************************************************************** * CFrontend::ToBISymbols * *------------------------* * Description: * Label each word with ToBI prosody notation+++ * ********************************************************************** MC ***/ HRESULT CFrontend::ToBISymbols() { SPDBG_FUNC( "CFrontend::ToBISymbols" ); TOBI_PHRASE *pTPhrase; long i, cPhrases; PROSODY_POS prevPOS, curPOS; bool possible_YNQ = false; long cTok; CFEToken *pTok, *pPrevTok, *pAuxTok; bool hasEmph = false; SPLISTPOS listPos; //---------------------------------- // Get memory for phrase array //---------------------------------- pAuxTok = NULL; // To quiet the compiler cTok = m_TokList.GetCount(); if( cTok ) { pTPhrase = new TOBI_PHRASE[cTok]; // worse case: each token is a phrase if( pTPhrase ) { //--------------------------------------------- // Find sub-phrases from POS // For now, detect function/content boundaries //--------------------------------------------- hasEmph = false; cPhrases = 0; i = 0; listPos = m_TokList.GetHeadPosition(); pTok = m_TokList.GetNext( listPos ); prevPOS = pTok->m_posClass; while( pTok->phon_Str[0] == _SIL_ ) { if( i >= (cTok-1) ) { break; } i++; if( listPos != NULL ) { pTok = m_TokList.GetNext( listPos ); } } if( pTok->m_posClass == POS_AUX ) { //--------------------------------- // Could be a yes/no question //--------------------------------- possible_YNQ = true; pAuxTok = pTok; } pTPhrase[cPhrases].start = i; for( ; i < cTok; i++ ) { curPOS = pTok->m_posClass; if( (curPOS != prevPOS) && (pTok->phon_Str[0] != _SIL_) ) { pTPhrase[cPhrases].posClass = prevPOS; pTPhrase[cPhrases].end = i-1; cPhrases++; pTPhrase[cPhrases].start = i; prevPOS = curPOS; } if( pTok->user_Emph > 0 ) { hasEmph = true; } if( listPos != NULL ) { pTok = m_TokList.GetNext( listPos ); } } //------------------------------- // Complete last phrase //------------------------------- pTPhrase[cPhrases].posClass = prevPOS; pTPhrase[cPhrases].end = i-1; cPhrases++; for( i = 0; i < cPhrases; i++ ) { //------------------------------------------------------- // Sequence of function words, place a low tone // on the LAST word in a func sequence, // if there are more than 1 words in the sequence. //------------------------------------------------------- if( ((pTPhrase[i].posClass == POS_FUNC) || (pTPhrase[i].posClass == POS_AUX)) && (pTPhrase[i].end - pTPhrase[i].start) ) { pTok = (CFEToken*)m_TokList.GetAt( m_TokList.FindIndex( pTPhrase[i].end )); if( pTok->m_Accent == K_NOACC ) { pTok->m_Accent = K_LSTAR; pTok->m_Accent_Prom = 2; pTok->m_AccentSource = ACC_FunctionSeq; } } //------------------------------------------------------- // Sequence of content words, place a high or // rising tone, of random prominence, // on the FIRST word in the content sequence //------------------------------------------------------- else if ( ((pTPhrase[i].posClass == POS_CONTENT) || (pTPhrase[i].posClass == POS_UNK)) ) { pTok = (CFEToken*)m_TokList.GetAt( m_TokList.FindIndex( pTPhrase[i].start )); if( pTok->m_Accent == K_NOACC ) { pTok->m_Accent = K_HSTAR; pTok->m_Accent_Prom = rand() % 5; pTok->m_AccentSource = ACC_ContentSeq; } } } delete pTPhrase; //----------------------------------------- // Now, insert the BOUNDARY tags //----------------------------------------- listPos = m_TokList.GetHeadPosition(); pPrevTok = m_TokList.GetNext( listPos ); for( i = 1; i < cTok; i++ ) { pTok = m_TokList.GetNext( listPos ); //-------------------------------- // Place a terminal boundary //-------------------------------- if( pTok->m_TuneBoundaryType != NULL_BOUNDARY ) { switch( pTok->m_TuneBoundaryType ) { case YN_QUEST_BOUNDARY: { pPrevTok->m_Accent = K_LSTAR; pPrevTok->m_Accent_Prom = 10; pPrevTok->m_Boundary = K_HMINUSHPERC; pPrevTok->m_Boundary_Prom = 10; //-- Diagnostic if( pPrevTok->m_AccentSource == ACC_NoSource ) { pPrevTok->m_AccentSource = ACC_YNQuest; } //-- Diagnostic if( pPrevTok->m_BoundarySource == BND_NoSource ) { pPrevTok->m_BoundarySource = BND_YNQuest; } //------------------------------------------------------- // Accent an aux verb in initial position (possible ynq) //------------------------------------------------------- if( possible_YNQ ) { pAuxTok->m_Accent = K_HSTAR; pAuxTok->m_Accent_Prom = 5; pAuxTok->m_AccentSource = ACC_InitialVAux; } } break; case WH_QUEST_BOUNDARY: case DECLAR_BOUNDARY: case EXCLAM_BOUNDARY: { if (pPrevTok->m_posClass == POS_CONTENT) { pPrevTok->m_Accent = K_HSTAR; pPrevTok->m_Accent_Prom = 4; //-- Diagnostic if( pPrevTok->m_AccentSource == ACC_NoSource ) { pPrevTok->m_AccentSource = ACC_Period; } } pPrevTok->m_Boundary = K_LMINUSLPERC; pPrevTok->m_Boundary_Prom = 10; //--- Diagnostic if( pPrevTok->m_BoundarySource == BND_NoSource ) { pPrevTok->m_BoundarySource = BND_Period; } } break; case PHRASE_BOUNDARY: { if (pPrevTok->m_posClass == POS_CONTENT) { pPrevTok->m_Accent = K_LHSTAR; pPrevTok->m_Accent_Prom = 10; //-- Diagnostic if( pPrevTok->m_AccentSource == ACC_NoSource ) { pPrevTok->m_AccentSource = ACC_Comma; } } pPrevTok->m_Boundary = K_LMINUSHPERC; pPrevTok->m_Boundary_Prom = 5; //-- Diagnostic if( pPrevTok->m_BoundarySource == BND_NoSource ) { pPrevTok->m_BoundarySource = BND_Comma; } } break; case NUMBER_BOUNDARY: { pPrevTok->m_Boundary = K_LMINUSHPERC; pPrevTok->m_Boundary_Prom = 5; //-- Diagnostic if( pPrevTok->m_BoundarySource == BND_NoSource ) { pPrevTok->m_BoundarySource = BND_NumberTemplate; } } break; default: { // Use comma for all other boundaries if (pPrevTok->m_posClass == POS_CONTENT) { pPrevTok->m_Accent = K_LHSTAR; pPrevTok->m_Accent_Prom = 10; //-- Diagnostic if( pPrevTok->m_AccentSource == ACC_NoSource ) { pPrevTok->m_AccentSource = pTok->m_AccentSource; } } pPrevTok->m_Boundary = K_LMINUSHPERC; pPrevTok->m_Boundary_Prom = 5; //-- Diagnostic if( pPrevTok->m_BoundarySource == BND_NoSource ) { pPrevTok->m_BoundarySource = pTok->m_BoundarySource; } } break; } } pPrevTok = pTok; } //-------------------------------------------- // Loop through each word and increase // pitch prominence if EMPHASIZED and // decrease prominence for all others //-------------------------------------------- if( hasEmph ) { SPLISTPOS listPos; pPrevTok = NULL; listPos = m_TokList.GetHeadPosition(); while( listPos ) { pTok = m_TokList.GetNext( listPos ); //------------------------------ // Is this word emphasized? //------------------------------ if( pTok->user_Emph > 0 ) { //------------------------------ // Add my clever H*+L*™ tag //------------------------------ pTok->m_Accent = K_HSTARLSTAR; pTok->m_Accent_Prom = 10; pTok->m_Boundary = K_NOBND; // Delete any boundary tag here... if( pPrevTok ) { pPrevTok->m_Boundary = K_NOBND; // ...or before } } else { //----------------------------------- // Is non-emphasized word accented? //----------------------------------- if( (pTok->m_Accent != K_NOACC) && (pTok->m_Accent_Prom > 5) ) { //------------------------------ // Then clip its prominence at 5 //------------------------------ pTok->m_Accent_Prom = 5; } //------------------------------ // Is it a boundary? //------------------------------ /*if( (pTok->m_Boundary != K_NOBND) && (pTok->m_Boundary_Prom > 5) ) { //------------------------------ // Then clip its prominence at 5 //------------------------------ pTok->m_Boundary_Prom = 5; }*/ } pPrevTok = pTok; } } } } return S_OK; } /* ToBISymbols */ /***************************************************************************** * CFrontend::TokensToAllo * *------------------------* * Description: * Transform TOKENS into ALLOS * ********************************************************************** MC ***/ HRESULT CFrontend::TokensToAllo( CFETokenList *pTokList, CAlloList *pAllo ) { SPDBG_FUNC( "CFrontend::TokToAllo" ); CAlloCell *pLastCell; long i; long cTok; CFEToken *pCurToken, *pNextToken, *pPrevTok; SPLISTPOS listPos; pLastCell = pAllo->GetTailCell(); // Get end (silence) if( pLastCell ) { pPrevTok = NULL; listPos = pTokList->GetHeadPosition(); pCurToken = pTokList->GetNext( listPos ); cTok = pTokList->GetCount(); for( i = 0; i < cTok; i++ ) { //---------------------------- // Get NEXT word //---------------------------- if( i < (cTok -1) ) { pNextToken = pTokList->GetNext( listPos ); } else { pNextToken = NULL; } if( pAllo->WordToAllo( pPrevTok, pCurToken, pNextToken, pLastCell ) ) { m_HasSpeech = true; } //---------------------------- // Bump the pipeline //---------------------------- pPrevTok = pCurToken; pCurToken = pNextToken; } } return S_OK; } /* CFrontend::TokensToAllo */ /***************************************************************************** * CFrontend::GetItemControls * *----------------------------* * Description: * Set user control values from Sent Enum item. ********************************************************************** MC ***/ void CFrontend::GetItemControls( const SPVSTATE* pXmlState, CFEToken* pToken ) { SPDBG_FUNC( "CFrontend::GetItemControls" ); pToken->user_Volume = pXmlState->Volume; pToken->user_Rate = pXmlState->RateAdj; pToken->user_Pitch = pXmlState->PitchAdj.MiddleAdj; pToken->user_Emph = pXmlState->EmphAdj; pToken->m_DurScale = CntrlToRatio( pToken->user_Rate ); if( (pToken->m_DurScale * m_RateRatio_API * m_RateRatio_PROSODY) < DISCRETE_BKPT ) { //-- If the total rate is low enough, insert breaks between words pToken->m_TermSil = 0.050f / (pToken->m_DurScale * m_RateRatio_API * m_RateRatio_PROSODY); pToken->m_DurScale = DISCRETE_BKPT; } else { pToken->m_TermSil = 0; } } /* CFrontend::GetItemControls */ /***************************************************************************** * CFrontend::GetPOSClass * *------------------------* * Description: * Transform SAPI POS code to func/content/aux class. ********************************************************************** MC ***/ PROSODY_POS CFrontend::GetPOSClass( ENGPARTOFSPEECH sapiPOS ) { SPDBG_FUNC( "CFrontend::GetPOSClass" ); PROSODY_POS posClass; posClass = POS_UNK; switch( sapiPOS ) { case MS_Noun: case MS_Verb: case MS_Adj: case MS_Adv: case MS_Interjection: { posClass = POS_CONTENT; break; } case MS_VAux: { posClass = POS_AUX; break; } case MS_Modifier: case MS_Function: case MS_Interr: case MS_Pron: case MS_ObjPron: case MS_SubjPron: case MS_RelPron: case MS_Conj: case MS_CConj: case MS_Det: case MS_Contr: case MS_Prep: { posClass = POS_FUNC; break; } } return posClass; } /* CFrontend::GetPOSClass */ #define QUOTE_HESITATION 100 // Number of msec #define PAREN_HESITATION 100 // Number of msec #define PAREN_HESITATION_TAIL 100 // Number of msec #define EMPH_HESITATION 1 // Number of msec /***************************************************************************** * CFrontend::StateQuoteProsody * *------------------------------* * Description: * ********************************************************************** MC ***/ bool CFrontend::StateQuoteProsody( CFEToken *pWordTok, TTSSentItem *pSentItem, bool fInsertSil ) { SPDBG_FUNC( "CFrontend::StateQuoteProsody" ); bool result = false; if( !m_fInParenProsody ) { if( m_fInQuoteProsody ) { //------------------------------ // Stop quote prosody //------------------------------ m_fInQuoteProsody = false; m_CurPitchOffs = 0.0f; m_CurPitchRange = 1.0f; if( fInsertSil ) { (void)InsertSilenceAtTail( pWordTok, pSentItem, QUOTE_HESITATION ); pWordTok->m_SilenceSource = SIL_QuoteEnd; } } else { //------------------------------ // Begin quote prosody //------------------------------ m_fInQuoteProsody = true; m_CurPitchOffs = 0.1f; m_CurPitchRange = 1.25f; if( fInsertSil ) { (void)InsertSilenceAtTail( pWordTok, pSentItem, QUOTE_HESITATION ); pWordTok->m_SilenceSource = SIL_QuoteStart; } } result = true; } return result; } /* CFrontend::StateQuoteProsody */ /***************************************************************************** * CFrontend::StartParenProsody * *------------------------------* * Description: * ********************************************************************** MC ***/ bool CFrontend::StartParenProsody( CFEToken *pWordTok, TTSSentItem *pSentItem, bool fInsertSil ) { SPDBG_FUNC( "CFrontend::StartParenProsody" ); bool result = false; if( (!m_fInParenProsody) && (!m_fInQuoteProsody) ) { m_CurPitchOffs = -0.2f; m_CurPitchRange = 0.75f; m_fInParenProsody = true; m_RateRatio_PROSODY = 1.25f; if( fInsertSil ) { (void)InsertSilenceAtTail( pWordTok, pSentItem, PAREN_HESITATION ); pWordTok->m_SilenceSource = SIL_ParenStart; } result = true; } return result; } /* CFrontend::StartParenProsody */ /***************************************************************************** * CFrontend::EndParenProsody * *----------------------------* * Description: * ********************************************************************** MC ***/ bool CFrontend::EndParenProsody( CFEToken *pWordTok, TTSSentItem *pSentItem, bool fInsertSil ) { SPDBG_FUNC( "CFrontend::EndParenProsody" ); bool result = false; if( m_fInParenProsody ) { m_fInParenProsody = false; m_CurPitchOffs = 0.0f; m_CurPitchRange = 1.0f; m_RateRatio_PROSODY = 1.0f; if( fInsertSil ) { (void)InsertSilenceAtTail( pWordTok, pSentItem, PAREN_HESITATION_TAIL ); pWordTok->m_SilenceSource = SIL_ParenStart; } result = true; } return result; } /* CFrontend::EndParenProsody */ /***************************************************************************** * CFrontend::InsertSilenceAtTail * *--------------------------------* * Description: * ********************************************************************** MC ***/ SPLISTPOS CFrontend::InsertSilenceAtTail( CFEToken *pWordTok, TTSSentItem *pSentItem, long msec ) { SPDBG_FUNC( "CFrontend::InsertSilenceAtTail" ); if( msec > 0 ) { pWordTok->user_Break = msec; } pWordTok->phon_Len = 1; pWordTok->phon_Str[0] = _SIL_; pWordTok->srcPosition = pSentItem->ulItemSrcOffset; pWordTok->srcLen = pSentItem->ulItemSrcLen; pWordTok->tokStr[0] = 0; // There's no orth for Break pWordTok->tokLen = 0; pWordTok->m_PitchBaseOffs = m_CurPitchOffs; pWordTok->m_PitchRangeScale = m_CurPitchRange; pWordTok->m_ProsodyDurScale = m_RateRatio_PROSODY; //---------------------------------- // Advance to next token //---------------------------------- return m_TokList.AddTail( pWordTok ); } /* CFrontend::InsertSilenceAtTail */ /***************************************************************************** * CFrontend::InsertSilenceAfterPos * *-----------------------------------* * Description: * Insert silence token AFTER 'position' * ********************************************************************** MC ***/ SPLISTPOS CFrontend::InsertSilenceAfterPos( CFEToken *pWordTok, SPLISTPOS position ) { SPDBG_FUNC( "CFrontend::InsertSilenceAfterPos" ); pWordTok->phon_Len = 1; pWordTok->phon_Str[0] = _SIL_; pWordTok->srcPosition = 0; pWordTok->srcLen = 0; pWordTok->tokStr[0] = '+'; // punctuation pWordTok->tokStr[1] = 0; // delimiter pWordTok->tokLen = 1; pWordTok->m_PitchBaseOffs = m_CurPitchOffs; pWordTok->m_PitchRangeScale = m_CurPitchRange; pWordTok->m_ProsodyDurScale = m_RateRatio_PROSODY; pWordTok->m_DurScale = 0; //---------------------------------- // Advance to next token //---------------------------------- return m_TokList.InsertAfter( position, pWordTok ); } /* CFrontend::InsertSilenceAfterPos */ /***************************************************************************** * CFrontend::InsertSilenceBeforePos * *------------------------------------* * Description: * Insert silence token BEFORE 'position' * ********************************************************************** MC ***/ SPLISTPOS CFrontend::InsertSilenceBeforePos( CFEToken *pWordTok, SPLISTPOS position ) { SPDBG_FUNC( "CFrontend::InsertSilenceBeforePos" ); pWordTok->phon_Len = 1; pWordTok->phon_Str[0] = _SIL_; pWordTok->srcPosition = 0; pWordTok->srcLen = 0; pWordTok->tokStr[0] = '+'; // punctuation pWordTok->tokStr[1] = 0; // delimiter pWordTok->tokLen = 1; pWordTok->m_PitchBaseOffs = m_CurPitchOffs; pWordTok->m_PitchRangeScale = m_CurPitchRange; pWordTok->m_ProsodyDurScale = m_RateRatio_PROSODY; pWordTok->m_DurScale = 0; //---------------------------------- // Advance to next token //---------------------------------- return m_TokList.InsertBefore( position, pWordTok ); } /* CFrontend::InsertSilenceBeforePos */ #define K_ACCENT_PROM ((rand() % 4) + 4) #define K_DEACCENT_PROM 5 #define K_ACCENT K_HSTAR #define K_DEACCENT K_NOACC /***************************************************************************** * CFrontend::ProsodyTemplates * *-----------------------------* * Description: * Call prosody template function for supported item types. * ********************************************************************** MC ***/ void CFrontend::ProsodyTemplates( SPLISTPOS clusterPos, TTSSentItem *pSentItem ) { SPDBG_FUNC( "CFrontend::ProsodyTemplates" ); long cWordCount; CFEToken *pClusterTok; switch( pSentItem->pItemInfo->Type ) { //--------------------------------------- // Numbers //--------------------------------------- case eNUM_ROMAN_NUMERAL: case eNUM_ROMAN_NUMERAL_ORDINAL: { if ( ( (TTSRomanNumeralItemInfo*) pSentItem->pItemInfo )->pNumberInfo->Type != eDATE_YEAR ) { if ( ((TTSNumberItemInfo*)((TTSRomanNumeralItemInfo*)pSentItem->pItemInfo)->pNumberInfo)->pIntegerPart ) { DoIntegerTemplate( &clusterPos, (TTSNumberItemInfo*)((TTSRomanNumeralItemInfo*)pSentItem->pItemInfo)->pNumberInfo, pSentItem->ulNumWords ); } if ( ((TTSNumberItemInfo*)((TTSRomanNumeralItemInfo*)pSentItem->pItemInfo)->pNumberInfo)->pDecimalPart ) { DoNumByNumTemplate( &clusterPos, ((TTSNumberItemInfo*)((TTSRomanNumeralItemInfo*)pSentItem->pItemInfo)->pNumberInfo)->pDecimalPart->ulNumDigits ); } } } break; case eNUM_CARDINAL: case eNUM_DECIMAL: case eNUM_ORDINAL: case eNUM_MIXEDFRACTION: { if ( ( (TTSNumberItemInfo*) pSentItem->pItemInfo )->pIntegerPart ) { cWordCount = DoIntegerTemplate( &clusterPos, (TTSNumberItemInfo*) pSentItem->pItemInfo, pSentItem->ulNumWords ); } if( ( (TTSNumberItemInfo*) pSentItem->pItemInfo )->pDecimalPart ) { //----------------------------------------- // Skip "point" string... //----------------------------------------- (void) m_TokList.GetNext( clusterPos ); //----------------------------------------- // ...and do single digit prosody //----------------------------------------- DoNumByNumTemplate( &clusterPos, ( (TTSNumberItemInfo*) pSentItem->pItemInfo )->pDecimalPart->ulNumDigits ); } if ( ( (TTSNumberItemInfo*) pSentItem->pItemInfo )->pFractionalPart ) { //----------------------------------------- // Skip "and" string... //----------------------------------------- pClusterTok = m_TokList.GetNext( clusterPos ); if( pClusterTok->m_Accent == K_NOACC ) { //-------------------------------------- // Force POS for "and" to noun // so phrasing rules don't kick in! //-------------------------------------- pClusterTok->m_Accent = K_DEACCENT; pClusterTok->m_Accent_Prom = K_DEACCENT_PROM; pClusterTok->POScode = MS_Noun; pClusterTok->m_posClass = POS_CONTENT; } //----------------------------------------- // ...and do fraction prosody //----------------------------------------- cWordCount = DoFractionTemplate( &clusterPos, (TTSNumberItemInfo*) pSentItem->pItemInfo, pSentItem->ulNumWords ); } } break; //--------------------------------------- // Fraction //--------------------------------------- case eNUM_FRACTION: { cWordCount = DoFractionTemplate( &clusterPos, (TTSNumberItemInfo*) pSentItem->pItemInfo, pSentItem->ulNumWords ); } break; //--------------------------------------- // Money //--------------------------------------- case eNUM_CURRENCY: { DoCurrencyTemplate( clusterPos, pSentItem ); } break; //--------------------------------------- // Phone Numbers //--------------------------------------- case eNUM_PHONENUMBER: case eNEWNUM_PHONENUMBER: { DoPhoneNumberTemplate( clusterPos, pSentItem ); } break; //--------------------------------------- // Time-of-Day //--------------------------------------- case eTIMEOFDAY: { DoTODTemplate( clusterPos, pSentItem ); } break; case eELLIPSIS: { CFEToken *pWordTok; pWordTok = new CFEToken; if( pWordTok ) { clusterPos = InsertSilenceAtTail( pWordTok, pSentItem, 0 ); //clusterPos = m_TokList.GetTailPosition( ); //clusterPos = InsertSilenceAfterPos( pWordTok, clusterPos ); pWordTok->m_SilenceSource = SIL_Ellipsis; pWordTok->m_TuneBoundaryType = ELLIPSIS_BOUNDARY; pWordTok->m_BoundarySource = BND_Ellipsis; } } break; } } /* CFrontend::ProsodyTemplates */ /***************************************************************************** * CFrontend::DoTODTemplate * *--------------------------* * Description: * Prosody template for time-of-day. * * TODO: Temp kludge - needs more info in TTSTimeOfDayItemInfo ********************************************************************** MC ***/ void CFrontend::DoTODTemplate( SPLISTPOS clusterPos, TTSSentItem *pSentItem ) { SPDBG_FUNC( "CFrontend::DoTODTemplate" ); TTSTimeOfDayItemInfo *pTOD; CFEToken *pWordTok; CFEToken *pClusterTok; SPLISTPOS curPos, nextPos, prevPos; curPos = nextPos = clusterPos; pTOD = (TTSTimeOfDayItemInfo*)&pSentItem->pItemInfo->Type; // Can't do 24 hr because there's no way to tell // if it's 1 or 2 digits (18: vs 23:) if( !pTOD->fTwentyFourHour ) { //------------------------------------- // Get HOUR token //------------------------------------- pClusterTok = m_TokList.GetNext( nextPos ); //------------------------------------- // Accent hour //------------------------------------- pClusterTok->m_Accent = K_ACCENT; pClusterTok->m_Accent_Prom = K_ACCENT_PROM; pClusterTok->m_AccentSource = ACC_TimeOFDay_HR; //--------------------------------- // Insert SILENCE after hour //--------------------------------- pWordTok = new CFEToken; if( pWordTok ) { nextPos = InsertSilenceAfterPos( pWordTok, clusterPos ); pWordTok->m_SilenceSource = SIL_TimeOfDay_HR; pWordTok->m_TuneBoundaryType = NUMBER_BOUNDARY; pWordTok->m_BoundarySource = BND_TimeOFDay_HR; pWordTok = NULL; //---------------------------- // Skip last digit //---------------------------- if( clusterPos != NULL ) { curPos = nextPos; pClusterTok = m_TokList.GetNext( nextPos ); } } if( pTOD->fMinutes ) { curPos = nextPos; pClusterTok = m_TokList.GetNext( nextPos ); //------------------------------------- // Accent 1st digit for minutes //------------------------------------- pClusterTok->m_Accent = K_ACCENT; pClusterTok->m_Accent_Prom = K_ACCENT_PROM; pClusterTok->m_AccentSource = ACC_TimeOFDay_1stMin; } if( pTOD->fTimeAbbreviation ) { curPos = prevPos = m_TokList.GetTailPosition( ); pClusterTok = m_TokList.GetPrev( prevPos ); pWordTok = new CFEToken; if( pWordTok ) { prevPos = InsertSilenceBeforePos( pWordTok, prevPos ); pWordTok->m_SilenceSource = SIL_TimeOfDay_AB; pWordTok->m_TuneBoundaryType = TOD_BOUNDARY; pWordTok->m_BoundarySource = BND_TimeOFDay_AB; pWordTok = NULL; //pClusterTok = m_TokList.GetNext( clusterPos ); //pClusterTok = m_TokList.GetNext( clusterPos ); } //------------------------------------- // Accent "M" //------------------------------------- pClusterTok = m_TokList.GetNext( curPos ); pClusterTok->m_Accent = K_ACCENT; pClusterTok->m_Accent_Prom = K_ACCENT_PROM; pClusterTok->m_AccentSource = ACC_TimeOFDay_M; } } } /* CFrontend::DoTODTemplate */ CFEToken *CFrontend::InsertPhoneSilenceAtSpace( SPLISTPOS *pClusterPos, BOUNDARY_SOURCE bndSrc, SILENCE_SOURCE silSrc ) { CFEToken *pWordTok; SPLISTPOS curPos, nextPos; curPos = nextPos = *pClusterPos; //--------------------------------- // Insert SILENCE after area code //--------------------------------- pWordTok = new CFEToken; if( pWordTok ) { nextPos = InsertSilenceBeforePos( pWordTok, curPos ); pWordTok->m_SilenceSource = silSrc; pWordTok->m_TuneBoundaryType = PHONE_BOUNDARY; pWordTok->m_BoundarySource = bndSrc; pWordTok->m_AccentSource = ACC_PhoneBnd_AREA; // @@@@ ??? pWordTok = NULL; //---------------------------- // Skip last digit //---------------------------- if( nextPos != NULL ) { curPos = nextPos; pWordTok = m_TokList.GetNext( nextPos ); } } //pWordTok = m_TokList.GetNext( clusterPos ); //----------------------------------------- // Filter and embedded silences //----------------------------------------- while( (pWordTok->phon_Str[0] == _SIL_) && (nextPos != NULL) ) { curPos = nextPos; pWordTok = m_TokList.GetNext( nextPos ); } *pClusterPos = curPos; return pWordTok; } void CFrontend::InsertPhoneSilenceAtEnd( BOUNDARY_SOURCE bndSrc, SILENCE_SOURCE silSrc ) { CFEToken *pWordTok; SPLISTPOS curPos, nextPos; curPos = m_TokList.GetTailPosition( ); //--------------------------------- // Insert SILENCE after area code //--------------------------------- pWordTok = new CFEToken; if( pWordTok ) { nextPos = InsertSilenceAfterPos( pWordTok, curPos ); pWordTok->m_SilenceSource = silSrc; pWordTok->m_TuneBoundaryType = PHONE_BOUNDARY; pWordTok->m_BoundarySource = bndSrc; pWordTok->m_AccentSource = ACC_PhoneBnd_AREA; // @@@@ ??? } } /***************************************************************************** * CFrontend::DoPhoneNumberTemplate * *----------------------------------* * Description: * Prosody template for phone numbers. * ********************************************************************** MC ***/ void CFrontend::DoPhoneNumberTemplate( SPLISTPOS clusterPos, TTSSentItem *pSentItem ) { SPDBG_FUNC( "CFrontend::DoPhoneNumberTemplate" ); TTSPhoneNumberItemInfo *pFone; CFEToken *pClusterTok; long cWordCount; SPLISTPOS curPos, nextPos; curPos = nextPos = clusterPos; pFone = (TTSPhoneNumberItemInfo*)&pSentItem->pItemInfo->Type; //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- // // COUNTRY CODE // //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- if( pFone->pCountryCode ) { //------------------------------------- // Skip "country" and... //------------------------------------- curPos = nextPos; pClusterTok = m_TokList.GetNext( nextPos ); //------------------------------------- // ...skip "code" //------------------------------------- curPos = nextPos; pClusterTok = m_TokList.GetNext( nextPos ); cWordCount = DoIntegerTemplate( &nextPos, pFone->pCountryCode, pSentItem->ulNumWords ); pClusterTok = InsertPhoneSilenceAtSpace( &nextPos, BND_Phone_COUNTRY, SIL_Phone_COUNTRY ); } //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- // // "One" // //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- if( pFone->fOne ) { //------------------------------------- // Skip "One" //------------------------------------- curPos = nextPos; pClusterTok = m_TokList.GetNext( nextPos ); //------------------------------------- // and add silence //------------------------------------- pClusterTok = InsertPhoneSilenceAtSpace( &nextPos, BND_Phone_ONE, SIL_Phone_ONE ); } //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- // // AREA CODE // //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- if( pFone->pAreaCode ) { if( (pFone->fIs800) && nextPos ) { //-------------------------- // Skip digit //-------------------------- curPos = nextPos; pClusterTok = m_TokList.GetNext( nextPos ); //-------------------------- // Skip "hundred" //-------------------------- curPos = nextPos; pClusterTok = m_TokList.GetNext( nextPos ); if( nextPos ) { pClusterTok = InsertPhoneSilenceAtSpace( &nextPos, BND_Phone_AREA, SIL_Phone_AREA ); } } else { //------------------------------------- // Skip "area" and... //------------------------------------- curPos = nextPos; pClusterTok = m_TokList.GetNext( nextPos ); //------------------------------------- // ...skip "code" //------------------------------------- curPos = nextPos; pClusterTok = m_TokList.GetNext( nextPos ); DoNumByNumTemplate( &nextPos, pFone->pAreaCode->ulNumDigits ); if( nextPos ) { pClusterTok = InsertPhoneSilenceAtSpace( &nextPos, BND_Phone_AREA, SIL_Phone_AREA ); } } } //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- // // Digits // //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- unsigned long i; for( i = 0; i < pFone->ulNumGroups; i++ ) { DoNumByNumTemplate( &nextPos, pFone->ppGroups[i]->ulNumDigits ); if( nextPos ) { pClusterTok = InsertPhoneSilenceAtSpace( &nextPos, BND_Phone_DIGITS, SIL_Phone_DIGITS ); } } InsertPhoneSilenceAtEnd( BND_Phone_DIGITS, SIL_Phone_DIGITS ); } /* CFrontend::DoPhoneNumberTemplate */ /***************************************************************************** * CFrontend::DoCurrencyTemplate * *-------------------------------* * Description: * Prosody template for currency. * ********************************************************************** MC ***/ void CFrontend::DoCurrencyTemplate( SPLISTPOS clusterPos, TTSSentItem *pSentItem ) { SPDBG_FUNC( "CFrontend::DoCurrencyTemplate" ); TTSCurrencyItemInfo *pMoney; CFEToken *pWordTok; CFEToken *pClusterTok = NULL; long cWordCount; SPLISTPOS curPos, nextPos; pMoney = (TTSCurrencyItemInfo*)&pSentItem->pItemInfo->Type; curPos = nextPos = clusterPos; if( pMoney->pPrimaryNumberPart->Type != eNUM_CARDINAL ) { return; } cWordCount = DoIntegerTemplate( &nextPos, pMoney->pPrimaryNumberPart, pSentItem->ulNumWords ); curPos = nextPos; if( cWordCount > 1 ) { if( pMoney->fQuantifier ) { if( nextPos != NULL ) { curPos = nextPos; pClusterTok = m_TokList.GetNext( nextPos ); } cWordCount--; } } if( cWordCount > 1 ) { //--------------------------------- // Insert SILENCE after "dollars" //--------------------------------- pWordTok = new CFEToken; if( pWordTok ) { nextPos = InsertSilenceAfterPos( pWordTok, curPos ); pWordTok->m_SilenceSource = SIL_Currency_DOLLAR; pWordTok->m_TuneBoundaryType = NUMBER_BOUNDARY; pWordTok->m_BoundarySource = BND_Currency_DOLLAR; pWordTok = NULL; //---------------------------- // Skip "dollar(s)" //---------------------------- if( nextPos != NULL ) { curPos = nextPos; pClusterTok = m_TokList.GetNext( nextPos ); } } if( pMoney->pSecondaryNumberPart != NULL ) { //---------------------------- // Skip SILENCE //---------------------------- if( nextPos != NULL ) { curPos = nextPos; pClusterTok = m_TokList.GetNext( nextPos ); } cWordCount--; //---------------------------- // Skip AND //---------------------------- if( nextPos != NULL ) { curPos = nextPos; if( pClusterTok->m_Accent == K_NOACC ) { //-------------------------------------- // Force POS for "and" to noun // so phrasing rules don't kick in! //-------------------------------------- pClusterTok->m_Accent = K_DEACCENT; pClusterTok->m_Accent_Prom = K_DEACCENT_PROM; pClusterTok->POScode = MS_Noun; pClusterTok->m_posClass = POS_CONTENT; } pClusterTok = m_TokList.GetNext( nextPos ); } cWordCount--; cWordCount = DoIntegerTemplate( &curPos, pMoney->pSecondaryNumberPart, cWordCount ); } } } /* CFrontend::DoCurrencyTemplate */ /***************************************************************************** * CFrontend::DoNumByNumTemplate * *---------------------------------* * Description: * Prosody template for RIGHT hand side of the decimal point. * ********************************************************************** MC ***/ void CFrontend::DoNumByNumTemplate( SPLISTPOS *pClusterPos, long cWordCount ) { SPDBG_FUNC( "CFrontend::DoNumByNumTemplate" ); CFEToken *pClusterTok; SPLISTPOS curPos, nextPos; curPos = nextPos = *pClusterPos; while( cWordCount > 1 ) { pClusterTok = NULL; //------------------------------------------------------------- // Right side of decimal point - add H* to every other word //------------------------------------------------------------- if( nextPos != NULL ) { curPos = nextPos; pClusterTok = m_TokList.GetNext( nextPos ); } cWordCount--; if( pClusterTok ) { pClusterTok->m_Accent = K_ACCENT; pClusterTok->m_Accent_Prom = K_ACCENT_PROM; pClusterTok->m_AccentSource = ACC_NumByNum; } if( nextPos != NULL ) { curPos = nextPos; pClusterTok = m_TokList.GetNext( nextPos ); } cWordCount--; } if( cWordCount > 0 ) { if( nextPos != NULL ) { curPos = nextPos; pClusterTok = m_TokList.GetNext( nextPos ); } cWordCount--; } *pClusterPos = nextPos; } /* CFrontend::DoNumByNumTemplate */ /***************************************************************************** * CFrontend::DoFractionTemplate * *------------------------------* * Description: * Prosody template for RIGHT side of the decimal point. * ********************************************************************** MC ***/ long CFrontend::DoFractionTemplate( SPLISTPOS *pClusterPos, TTSNumberItemInfo *pNInfo, long cWordCount ) { SPDBG_FUNC( "CFrontend::DoFractionTemplate" ); CFEToken *pClusterTok; TTSFractionItemInfo *pFInfo; CFEToken *pWordTok; pFInfo = pNInfo->pFractionalPart; //--- Do Numerator... if ( pFInfo->pNumerator->pIntegerPart ) { cWordCount = DoIntegerTemplate( pClusterPos, pFInfo->pNumerator, cWordCount ); } if( pFInfo->pNumerator->pDecimalPart ) { //----------------------------------------- // Skip "point" string... //----------------------------------------- (void) m_TokList.GetNext( *pClusterPos ); //----------------------------------------- // ...and do single digit prosody //----------------------------------------- DoNumByNumTemplate( pClusterPos, pFInfo->pNumerator->pDecimalPart->ulNumDigits ); } //--- Special case - a non-standard fraction (e.g. 1/4) if( !pFInfo->fIsStandard ) { if( !*pClusterPos ) { *pClusterPos = m_TokList.GetTailPosition( ); } else { pClusterTok = m_TokList.GetPrev( *pClusterPos ); } } pWordTok = new CFEToken; if( pWordTok ) { *pClusterPos = InsertSilenceBeforePos( pWordTok, *pClusterPos ); pWordTok->m_SilenceSource = SIL_Fractions_NUM; pWordTok->m_TuneBoundaryType = NUMBER_BOUNDARY; pWordTok->m_BoundarySource = BND_Frac_Num; pWordTok = NULL; //---------------------------- // Skip numerator //---------------------------- if( *pClusterPos != NULL ) { pClusterTok = m_TokList.GetNext( *pClusterPos ); } } //--- Do Denominator... if ( pFInfo->pDenominator->pIntegerPart ) { //----------------------------------------- // Skip "over" string... //----------------------------------------- pClusterTok = m_TokList.GetNext( *pClusterPos ); if( pClusterTok->m_Accent == K_NOACC ) { //-------------------------------------- // Force POS for "and" to noun // so phrasing rules don't kick in! //-------------------------------------- pClusterTok->m_Accent = K_DEACCENT; pClusterTok->m_Accent_Prom = K_DEACCENT_PROM; pClusterTok->POScode = MS_Noun; pClusterTok->m_posClass = POS_CONTENT; } cWordCount = DoIntegerTemplate( pClusterPos, pFInfo->pDenominator, cWordCount ); } if( pFInfo->pDenominator->pDecimalPart ) { //----------------------------------------- // Skip "point" string... //----------------------------------------- (void) m_TokList.GetNext( *pClusterPos ); //----------------------------------------- // ...and do single digit prosody //----------------------------------------- DoNumByNumTemplate( pClusterPos, pFInfo->pDenominator->pDecimalPart->ulNumDigits ); } return cWordCount; } /* CFrontend::DoFractionTemplate */ /***************************************************************************** * CFrontend::DoIntegerTemplate * *------------------------------* * Description: * Prosody template for LEFT hand side of the decimal point. * ********************************************************************** MC ***/ long CFrontend::DoIntegerTemplate( SPLISTPOS *pClusterPos, TTSNumberItemInfo *pNInfo, long cWordCount ) { SPDBG_FUNC( "CFrontend::DoIntegerTemplate" ); long i; CFEToken *pClusterTok; CFEToken *pWordTok = NULL; SPLISTPOS curPos, nextPos; //------------------------------------------ // Special currency hack...sorry //------------------------------------------ if( pNInfo->pIntegerPart->fDigitByDigit ) { DoNumByNumTemplate( pClusterPos, pNInfo->pIntegerPart->ulNumDigits ); return cWordCount - pNInfo->pIntegerPart->ulNumDigits; } nextPos = curPos = *pClusterPos; pClusterTok = m_TokList.GetNext( nextPos ); pClusterTok->m_Accent = K_DEACCENT; pClusterTok->m_Accent_Prom = K_DEACCENT_PROM; if( pNInfo->fNegative ) { //--------------------------------- // Skip "NEGATIVE" //--------------------------------- if( nextPos != NULL ) { curPos = nextPos; pClusterTok = m_TokList.GetNext( nextPos ); pClusterTok->m_Accent = K_DEACCENT; pClusterTok->m_Accent_Prom = K_DEACCENT_PROM; } cWordCount--; } for( i = (pNInfo->pIntegerPart->lNumGroups -1); i >= 0; i-- ) { //------------------------------------ // Accent 1st digit in group //------------------------------------ pClusterTok->m_Accent = K_ACCENT; pClusterTok->m_Accent_Prom = K_ACCENT_PROM; pClusterTok->m_AccentSource = ACC_IntegerGroup; if( pNInfo->pIntegerPart->Groups[i].fHundreds ) { //--------------------------------- // Skip "X HUNDRED" //--------------------------------- if( nextPos != NULL ) { curPos = nextPos; pClusterTok = m_TokList.GetNext( nextPos ); if( pClusterTok->m_Accent == K_NOACC ) { pClusterTok->m_Accent = K_DEACCENT; pClusterTok->m_Accent_Prom = K_DEACCENT_PROM; } } cWordCount--; if( nextPos != NULL ) { curPos = nextPos; pClusterTok = m_TokList.GetNext( nextPos ); if( pClusterTok->m_Accent == K_NOACC ) { pClusterTok->m_Accent = K_DEACCENT; pClusterTok->m_Accent_Prom = K_DEACCENT_PROM; } } cWordCount--; } if( pNInfo->pIntegerPart->Groups[i].fTens ) { //--------------------------------- // Skip "X-TY" //--------------------------------- if( nextPos != NULL ) { curPos = nextPos; pClusterTok = m_TokList.GetNext( nextPos ); if( pClusterTok->m_Accent == K_NOACC ) { pClusterTok->m_Accent = K_DEACCENT; pClusterTok->m_Accent_Prom = K_DEACCENT_PROM; } } cWordCount--; } if( pNInfo->pIntegerPart->Groups[i].fOnes ) { //--------------------------------- // Skip "X" //--------------------------------- if( nextPos != NULL ) { curPos = nextPos; pClusterTok = m_TokList.GetNext( nextPos ); if( pClusterTok->m_Accent == K_NOACC ) { pClusterTok->m_Accent = K_DEACCENT; pClusterTok->m_Accent_Prom = K_DEACCENT_PROM; } } cWordCount--; } if( pNInfo->pIntegerPart->Groups[i].fQuantifier ) { //--------------------------------- // Insert SILENCE after quant //--------------------------------- if( pWordTok == NULL ) { pWordTok = new CFEToken; } if( pWordTok ) { nextPos = InsertSilenceAfterPos( pWordTok, curPos ); pWordTok->m_SilenceSource = SIL_Integer_Quant; pWordTok->m_TuneBoundaryType = NUMBER_BOUNDARY; pWordTok->m_BoundarySource = BND_IntegerQuant; pWordTok = NULL; if( pClusterTok->m_Accent == K_NOACC ) { pClusterTok->m_Accent = K_DEACCENT; pClusterTok->m_Accent_Prom = K_DEACCENT_PROM; } if( nextPos != NULL ) { //------------------------------ // Skip inserted silence //------------------------------ curPos = nextPos; pClusterTok = m_TokList.GetNext( nextPos ); } if( nextPos != NULL ) { //----------------------------------- // Skip quantifier string //----------------------------------- curPos = nextPos; pClusterTok = m_TokList.GetNext( nextPos ); } cWordCount--; } } } *pClusterPos = curPos; return cWordCount; } /* CFrontend::DoIntegerTemplate */ /***************************************************************************** * CFrontend::GetSentenceTokens * *------------------------------* * Description: * Collect Senence Enum tokens. * Copy tokens into 'm_TokList' and token count into 'm_cNumOfWords' * S_FALSE return means no more input sentences.+++ * ********************************************************************** MC ***/ HRESULT CFrontend::GetSentenceTokens( DIRECTION eDirection ) { SPDBG_FUNC( "CFrontend::GetSentenceTokens" ); HRESULT eHR = S_OK; bool fLastItem = false; IEnumSENTITEM *pItemizer; TTSSentItem sentItem; long tokenIndex; CFEToken *pWordTok; bool lastWasTerm = false; bool lastWasSil = true; TUNE_TYPE defaultTune = PHRASE_BOUNDARY; long cNumOfItems, cCurItem, cCurWord; SPLISTPOS clusterPos, tempPos; m_cNumOfWords = 0; pWordTok = NULL; clusterPos = NULL; if ( eDirection == eNEXT ) { eHR = m_pEnumSent->Next( &pItemizer ); } else { eHR = m_pEnumSent->Previous( &pItemizer ); } if( eHR == S_OK ) { //-------------------------------------------- // There's still another sentence to speak //-------------------------------------------- tokenIndex = 0; CItemList& ItemList = ((CSentItemEnum*)pItemizer)->_GetList(); cNumOfItems = (ItemList.GetCount()) -1; cCurItem = 0; //------------------------------------ // Collect all sentence tokens //------------------------------------ while( (eHR = pItemizer->Next( &sentItem )) == S_OK ) { clusterPos = NULL; cCurWord = sentItem.ulNumWords; for ( ULONG i = 0; i < sentItem.ulNumWords; i++ ) { //------------------------------ // Always have a working token //------------------------------ if( pWordTok == NULL ) { pWordTok = new CFEToken; } if( pWordTok ) { if( sentItem.pItemInfo->Type & eWORDLIST_IS_VALID ) { //------------------------------------------ // Get tag values (vol, rate, pitch, etc.) //------------------------------------------ GetItemControls( sentItem.Words[i].pXmlState, pWordTok ); //------------------------------------------ // //------------------------------------------ //------------------------------------- // Switch on token type //------------------------------------- switch ( sentItem.Words[i].pXmlState->eAction ) { case SPVA_Speak: case SPVA_SpellOut: { //---------------------------------- // Speak this token //---------------------------------- pWordTok->tokLen = sentItem.Words[i].ulWordLen; if( pWordTok->tokLen > (TOKEN_LEN_MAX -1) ) { //----------------------------------- // Clip to max string length //----------------------------------- pWordTok->tokLen = TOKEN_LEN_MAX -1; } //-------------------------- // Copy token string // Append C-string delimiter //-------------------------- memcpy( &pWordTok->tokStr[0], &sentItem.Words[i].pWordText[0], pWordTok->tokLen * sizeof(WCHAR) ); pWordTok->tokStr[pWordTok->tokLen] = 0; //string delimiter pWordTok->phon_Len = IPA_to_Allo( sentItem.Words[i].pWordPron, pWordTok->phon_Str ); pWordTok->POScode = sentItem.Words[i].eWordPartOfSpeech; pWordTok->m_posClass = GetPOSClass( pWordTok->POScode ); pWordTok->srcPosition = sentItem.ulItemSrcOffset; pWordTok->srcLen = sentItem.ulItemSrcLen; pWordTok->m_PitchBaseOffs = m_CurPitchOffs; pWordTok->m_PitchRangeScale = m_CurPitchRange; pWordTok->m_ProsodyDurScale = m_RateRatio_PROSODY; //---------------------------------- // Advance to next token //---------------------------------- tempPos = m_TokList.AddTail( pWordTok ); if( clusterPos == NULL ) { //-------------------------------------- // Remember where currentitem started //-------------------------------------- clusterPos = tempPos; } pWordTok = NULL; // Get a new ptr next time tokenIndex++; lastWasTerm = false; lastWasSil = false; break; } case SPVA_Silence: { (void)InsertSilenceAtTail( pWordTok, &sentItem, sentItem.Words[i].pXmlState->SilenceMSecs ); pWordTok->m_SilenceSource = SIL_XML; pWordTok = NULL; // Get a new ptr next time tokenIndex++; lastWasTerm = false; break; } case SPVA_Pronounce: { pWordTok->tokStr[0] = 0; // There's no orth for Pron types pWordTok->tokLen = 0; pWordTok->phon_Len = IPA_to_Allo( sentItem.Words[i].pXmlState->pPhoneIds, pWordTok->phon_Str ); pWordTok->POScode = sentItem.Words[i].eWordPartOfSpeech; pWordTok->m_posClass = GetPOSClass( pWordTok->POScode ); pWordTok->srcPosition = sentItem.ulItemSrcOffset; pWordTok->srcLen = sentItem.ulItemSrcLen; pWordTok->m_PitchBaseOffs = m_CurPitchOffs; pWordTok->m_PitchRangeScale = m_CurPitchRange; pWordTok->m_ProsodyDurScale = m_RateRatio_PROSODY; //---------------------------------- // Advance to next token //---------------------------------- tempPos = m_TokList.AddTail( pWordTok ); if( clusterPos == NULL ) { //-------------------------------------- // Remember where currentitem started //-------------------------------------- clusterPos = tempPos; } pWordTok = NULL; // Get a new ptr next time tokenIndex++; lastWasTerm = false; lastWasSil = false; break; } case SPVA_Bookmark: { BOOKMARK_ITEM *pMarker; //------------------------------------------------- // Create bookmark list if it's not already there //------------------------------------------------- if( pWordTok->pBMObj == NULL ) { pWordTok->pBMObj = new CBookmarkList; } if( pWordTok->pBMObj ) { //-------------------------------------------------------- // Allocate memory for bookmark string // (add 1 to length for string delimiter) //-------------------------------------------------------- pWordTok->tokLen = sentItem.Words[i].ulWordLen; pMarker = new BOOKMARK_ITEM; if (pMarker) { //---------------------------------------- // We'll need the text ptr and length // when this bookmark event gets posted //---------------------------------------- pMarker->pBMItem = (LPARAM)sentItem.pItemSrcText; //--- Punch NULL character into end of bookmark string for Event... WCHAR* pTemp = (WCHAR*) sentItem.pItemSrcText + sentItem.ulItemSrcLen; *pTemp = 0; //----------------------------------- // Add this bookmark to list //----------------------------------- pWordTok->pBMObj->m_BMList.AddTail( pMarker ); } } break; } default: { SPDBG_DMSG1( "Unknown SPVSTATE eAction: %d\n", sentItem.Words[i].pXmlState->eAction ); break; } } } else { //----------------------------- // Maybe token is punctuation //----------------------------- if ( fIsPunctuation(sentItem) ) { TUNE_TYPE bType = NULL_BOUNDARY; switch ( sentItem.pItemInfo->Type ) { case eCOMMA: case eSEMICOLON: case eCOLON: case eHYPHEN: if( !lastWasSil ) { bType = PHRASE_BOUNDARY; } break; case ePERIOD: if( fLastItem ) { bType = DECLAR_BOUNDARY; } else { defaultTune = DECLAR_BOUNDARY; } break; case eQUESTION: if( fLastItem ) { bType = YN_QUEST_BOUNDARY; } else { defaultTune = YN_QUEST_BOUNDARY; } break; case eEXCLAMATION: if( fLastItem ) { bType = EXCLAM_BOUNDARY; } else { defaultTune = EXCLAM_BOUNDARY; } break; } if( (bType != NULL_BOUNDARY) && (tokenIndex > 0) ) { pWordTok->m_TuneBoundaryType = bType; pWordTok->phon_Len = 1; pWordTok->phon_Str[0] = _SIL_; pWordTok->srcPosition = sentItem.ulItemSrcOffset; pWordTok->srcLen = sentItem.ulItemSrcLen; pWordTok->tokStr[0] = sentItem.pItemSrcText[0]; // punctuation pWordTok->tokStr[1] = 0; // delimiter pWordTok->tokLen = 1; pWordTok->m_SilenceSource = SIL_Term; pWordTok->m_TermSil = 0; //---------------------------------- // Advance to next token //---------------------------------- tempPos = m_TokList.AddTail( pWordTok ); if( clusterPos == NULL ) { //-------------------------------------- // Remember where currentitem started //-------------------------------------- clusterPos = tempPos; } pWordTok = NULL; // Get a new ptr next time tokenIndex++; lastWasTerm = true; lastWasSil = true; } } else { switch ( sentItem.pItemInfo->Type ) { //case eSINGLE_QUOTE: case eDOUBLE_QUOTE: if( StateQuoteProsody( pWordTok, &sentItem, (!fLastItem) & (!lastWasSil) ) ) { if( (!fLastItem) & (!lastWasSil) ) { pWordTok = NULL; // Get a new ptr next time tokenIndex++; } lastWasTerm = false; lastWasSil = true; } break; case eOPEN_PARENTHESIS: case eOPEN_BRACKET: case eOPEN_BRACE: if( StartParenProsody( pWordTok, &sentItem, !fLastItem ) ) { if( !fLastItem ) { pWordTok = NULL; // Get a new ptr next time tokenIndex++; } lastWasTerm = false; lastWasSil = true; } break; case eCLOSE_PARENTHESIS: case eCLOSE_BRACKET: case eCLOSE_BRACE: if( EndParenProsody( pWordTok, &sentItem, !fLastItem ) ) { if( !fLastItem ) { pWordTok = NULL; // Get a new ptr next time tokenIndex++; } lastWasTerm = false; lastWasSil = true; } break; } } } } else { eHR = E_OUTOFMEMORY; break; } if( --cCurWord == 0 ) { cCurItem++; } if( cCurItem == cNumOfItems ) { fLastItem = true; } } //------------------------------------- // Tag special word clusters //------------------------------------- ProsodyTemplates( clusterPos, &sentItem ); } pItemizer->Release(); //------------------------------------------------------ // Make sure sentence ends on termination //------------------------------------------------------ if( !lastWasTerm ) { //------------------------ // Add a comma //------------------------ if( pWordTok == NULL ) { pWordTok = new CFEToken; } if( pWordTok ) { pWordTok->m_TuneBoundaryType = defaultTune; pWordTok->m_BoundarySource = BND_ForcedTerm; pWordTok->m_SilenceSource = SIL_Term; pWordTok->phon_Len = 1; pWordTok->phon_Str[0] = _SIL_; pWordTok->srcPosition = sentItem.ulItemSrcOffset; pWordTok->srcLen = sentItem.ulItemSrcLen; pWordTok->tokStr[0] = '.'; // punctuation pWordTok->tokStr[1] = 0; // delimiter pWordTok->tokLen = 1; // pWordTok->m_BoundarySource = bndSource; //---------------------------------- // Advance to next token //---------------------------------- tempPos = m_TokList.AddTail( pWordTok ); if( clusterPos == NULL ) { //-------------------------------------- // Remember where current item started //-------------------------------------- clusterPos = tempPos; } pWordTok = NULL; // Get a new ptr next time tokenIndex++; } else { //---------------------------------- // Bail-out or we'll crash //---------------------------------- eHR = E_OUTOFMEMORY; } } m_cNumOfWords = tokenIndex; if( eHR == S_FALSE ) { //---------------------------------- // Return only errors //---------------------------------- eHR = S_OK; } } else { eHR = eHR; // !!!! } //------------------------------- // Cleanup memory allocation //------------------------------- if( pWordTok != NULL ) { delete pWordTok; } //--------------------------------------------------- // Get sentence position and length for SAPI events //--------------------------------------------------- CalcSentenceLength(); return eHR; } /* CFrontend::GetSentenceTokens */ /***************************************************************************** * CFrontend::CalcSentenceLength * *-------------------------------* * Description: * Loop thru token list and sum the source char count. * ********************************************************************** MC ***/ void CFrontend::CalcSentenceLength() { long firstIndex, lastIndex, lastLen; bool firstState; SPLISTPOS listPos; CFEToken *pWordTok, *pFirstTok = NULL; //--------------------------------------------- // Find the 1st and last words in sentence //--------------------------------------------- firstIndex = lastIndex = lastLen = 0; firstState = true; listPos = m_TokList.GetHeadPosition(); while( listPos ) { pWordTok = m_TokList.GetNext( listPos ); //------------------------------------------- // Look at at displayable words only //------------------------------------------- if( pWordTok->srcLen > 0 ) { if( firstState ) { firstState = false; firstIndex = pWordTok->srcPosition; pFirstTok = pWordTok; } else { lastIndex = pWordTok->srcPosition; lastLen = pWordTok->srcLen; } } } //-------------------------------------------------- // Calculate sentence length for head list item //-------------------------------------------------- if( pFirstTok ) { pFirstTok->sentencePosition = firstIndex; // Sentence starts here... pFirstTok->sentenceLen = (lastIndex - firstIndex) + lastLen; // ...and this is the length } } /***************************************************************************** * CFrontend::DisposeUnits * *-------------------------* * Description: * Delete memory allocated to 'm_pUnits'. * Clean-up memory for Bookmarks * ********************************************************************** MC ***/ #ifdef USE_VOICEDATAOBJ void CFrontend::DisposeUnits( ) { SPDBG_FUNC( "CFrontend::DisposeUnits" ); ULONG unitIndex; if( m_pUnits ) { //----------------------------------------- // Clean-up Bookmark memory allocation //----------------------------------------- for( unitIndex = m_CurUnitIndex; unitIndex < m_unitCount; unitIndex++) { if( m_pUnits[unitIndex].pBMObj != NULL ) { //--------------------------------------- // Dispose bookmark list //--------------------------------------- delete m_pUnits[unitIndex].pBMObj; m_pUnits[unitIndex].pBMObj = NULL; } } delete m_pUnits; m_pUnits = NULL; } } /* CFrontend::DisposeUnits */ #endif /***************************************************************************** * CFrontend::ParseNextSentence * *------------------------------* * Description: * Fill 'm_pUnits' array with next sentence. * If there's no more input text, * return with 'm_SpeechState' set to SPEECH_DONE +++ * ********************************************************************** MC ***/ HRESULT CFrontend::ParseSentence( DIRECTION eDirection ) { SPDBG_FUNC( "CFrontend::ParseNextSentence" ); HRESULT hr = S_OK; //----------------------------------------------------- // If there's a previous unit array, free its memory //----------------------------------------------------- #ifdef USE_VOICEDATAOBJ DisposeUnits(); #endif m_CurUnitIndex = 0; m_unitCount = 0; DeleteTokenList(); #ifdef USE_VOICEDATAOBJ m_pUnits = NULL; #endif //----------------------------------------------------- // If there's a previous allo array, free its memory //----------------------------------------------------- if( m_pAllos ) { delete m_pAllos; m_pAllos = NULL; } //----------------------------------------------------- // Fill token array with next sentence // Skip empty sentences. // NOTE: includes non-speaking items //----------------------------------------------------- do { hr = GetSentenceTokens( eDirection ); } while( (hr == S_OK) && (m_cNumOfWords == 0) ); if( hr == S_OK ) { //-------------------------------------------- // Prepare word emphasis //-------------------------------------------- DoWordAccent(); //-------------------------------------------- // Word level prosodic lables //-------------------------------------------- DoPhrasing(); ToBISymbols(); //-------------------------------------------- // Convert tokens to allo list //-------------------------------------------- m_pAllos = new CAlloList; if (m_pAllos == NULL) { //----------------------- // Out of memory //----------------------- hr = E_FAIL; } if( SUCCEEDED(hr) ) { //-------------------------------- // Convert word to allo strteam //------------------------------- TokensToAllo( &m_TokList, m_pAllos ); //---------------------------- // Tag sentence syllables //---------------------------- m_SyllObj.TagSyllables( m_pAllos ); //-------------------------------------------- // Dispose token array, no longer needed //-------------------------------------------- DeleteTokenList(); //-------------------------------------------- // Create the unit array // NOTE: //-------------------------------------------- #ifdef USE_VOICEDATAOBJ hr = UnitLookahead (); if( hr == S_OK ) { //-------------------------------------------- // Compute allo durations //-------------------------------------------- UnitToAlloDur( m_pAllos, m_pUnits ); m_DurObj.AlloDuration( m_pAllos, m_RateRatio_API ); //-------------------------------------------- // Modulate allo pitch //-------------------------------------------- m_PitchObj.AlloPitch( m_pAllos, m_BasePitch, m_PitchRange ); } #else m_DurObj.AlloDuration( m_pAllos, m_RateRatio_API ); m_PitchObj.AlloPitch( m_pAllos, m_BasePitch, m_PitchRange ); #endif } #ifdef USE_VOICEDATAOBJ if( hr == S_OK ) { AlloToUnitPitch( m_pAllos, m_pUnits ); } #endif } if( FAILED(hr) ) { //------------------------------------------ // Either the input text is dry or we failed. // Try to fail gracefully // 1 - Clean up memory // 2 - End the speech //------------------------------------------ if( m_pAllos ) { delete m_pAllos; m_pAllos = 0; } DeleteTokenList(); #ifdef USE_VOICEDATAOBJ DisposeUnits(); #endif m_SpeechState = SPEECH_DONE; } else if( hr == S_FALSE ) { //--------------------------------- // No more input text //--------------------------------- hr = S_OK; m_SpeechState = SPEECH_DONE; } return hr; } /* CFrontend::ParseNextSentence */ /***************************************************************************** * CFrontend::UnitLookahead * *--------------------------* * Description: * ********************************************************************** MC ***/ #ifdef USE_VOICEDATAOBJ HRESULT CFrontend::UnitLookahead () { SPDBG_FUNC( "CFrontend::UnitLookahead" ); HRESULT hr = S_OK; UNIT_CVT *pPhon2Unit = NULL; ULONG i; m_unitCount = m_pAllos->GetCount(); m_pUnits = new UNITINFO[m_unitCount]; if( m_pUnits ) { pPhon2Unit = new UNIT_CVT[m_unitCount]; if( pPhon2Unit ) { //-------------------------------------------- // Convert allo list to unit array //-------------------------------------------- memset( m_pUnits, 0, m_unitCount * sizeof(UNITINFO) ); hr = AlloToUnit( m_pAllos, m_pUnits ); if( SUCCEEDED(hr) ) { //-------------------------------------------- // Initialize UNIT_CVT //-------------------------------------------- for( i = 0; i < m_unitCount; i++ ) { pPhon2Unit[i].PhonID = m_pUnits[i].PhonID; pPhon2Unit[i].flags = m_pUnits[i].flags; } //-------------------------------------------- // Compute triphone IDs //-------------------------------------------- hr = m_pVoiceDataObj->GetUnitIDs( pPhon2Unit, m_unitCount ); if( SUCCEEDED(hr) ) { //-------------------------------------------- // Copy UNIT_CVT to UNITINFO //-------------------------------------------- for( i = 0; i < m_unitCount; i++ ) { m_pUnits[i].UnitID = pPhon2Unit[i].UnitID; m_pUnits[i].SenoneID = pPhon2Unit[i].SenoneID; m_pUnits[i].duration = pPhon2Unit[i].Dur; m_pUnits[i].amp = pPhon2Unit[i].Amp; m_pUnits[i].ampRatio = pPhon2Unit[i].AmpRatio; strcpy( m_pUnits[i].szUnitName, pPhon2Unit[i].szUnitName ); } } else { //----------------------- // Can't get unit ID's //----------------------- delete m_pUnits; m_pUnits = NULL; } } else { //----------------------- // Can't convert allos //----------------------- delete m_pUnits; m_pUnits = NULL; } } else { //----------------------- // Out of memory //----------------------- delete m_pUnits; m_pUnits = NULL; hr = E_FAIL; } } else { //----------------------- // Out of memory //----------------------- hr = E_FAIL; } //------------------------------ // Cleanup before exit //------------------------------ if( pPhon2Unit ) { delete pPhon2Unit; } return hr; } /* CFrontend::UnitLookahead */ #endif /***************************************************************************** * CFrontend::UnitToAlloDur * *--------------------------* * Description: * ********************************************************************** MC ***/ void CFrontend::UnitToAlloDur( CAlloList *pAllos, UNITINFO *pu ) { SPDBG_FUNC( "CFrontend::UnitToAlloDur" ); CAlloCell *pCurCell; pCurCell = pAllos->GetHeadCell(); while( pCurCell ) { pCurCell->m_UnitDur = pu->duration; pu++; pCurCell = pAllos->GetNextCell(); } } /* CFrontend::UnitToAlloDur */ /***************************************************************************** * CFrontend::AlloToUnitPitch * *----------------------------* * Description: * ********************************************************************** MC ***/ #ifdef USE_VOICEDATAOBJ void CFrontend::AlloToUnitPitch( CAlloList *pAllos, UNITINFO *pu ) { SPDBG_FUNC( "CFrontend::AlloToUnitPitch" ); ULONG k; CAlloCell *pCurCell; pCurCell = pAllos->GetHeadCell(); while( pCurCell ) { pu->duration = pCurCell->m_ftDuration; for( k = 0; k < pu->nKnots; k++ ) { pu->pTime[k] = pCurCell->m_ftTime[k] * m_SampleRate; pu->pF0[k] = pCurCell->m_ftPitch[k]; pu->pAmp[k] = pu->ampRatio; } pu++; pCurCell = pAllos->GetNextCell(); } } /* CFrontend::AlloToUnitPitch */ #endif /***************************************************************************** * CAlloList::DeleteTokenList * *----------------------------* * Description: * Remove every item in link list. * ********************************************************************** MC ***/ void CFrontend::DeleteTokenList() { SPDBG_FUNC( "CFrontend::DeleteTokenList" ); CFEToken *pTok; while( !m_TokList.IsEmpty() ) { pTok = (CFEToken*)m_TokList.RemoveHead(); delete pTok; } } /* CFrontend::DeleteTokenList */ /***************************************************************************** * AdjustQuestTune * *-----------------* * Description: * Adjust termination for either YN or WH sentence tune. * ********************************************************************** MC ***/ static void AdjustQuestTune( CFEToken *pTok, bool fIsYesNo ) { SPDBG_FUNC( "AdjustQuestTune" ); if ( pTok->m_TuneBoundaryType > NULL_BOUNDARY ) { if( (pTok->m_TuneBoundaryType == YN_QUEST_BOUNDARY) || (pTok->m_TuneBoundaryType == WH_QUEST_BOUNDARY) ) { //------------------------------------ // Is this a yes/no question phrase //------------------------------------ if( fIsYesNo ) { //------------------------------------------ // Put out a final yes/no question marker //------------------------------------------ pTok->m_TuneBoundaryType = YN_QUEST_BOUNDARY; pTok->m_BoundarySource = BND_YNQuest; } else { //------------------------------------------------------------------------ // Use declarative phrase marker (for WH questions) //------------------------------------------------------------------------ pTok->m_TuneBoundaryType = WH_QUEST_BOUNDARY; pTok->m_BoundarySource = BND_WHQuest; } } } } /* AdjustQuestTune */ typedef enum { p_Interj, P_Adv, P_Verb, P_Adj, P_Noun, PRIORITY_SIZE, } CONTENT_PRIORITY; #define NO_POSITION -1 /***************************************************************************** * CFrontend::ExclamEmph * *-----------------------* * Description: * Find a likely word to emph if sentence has exclamation * ********************************************************************** MC ***/ void CFrontend::ExclamEmph() { SPDBG_FUNC( "CFrontend::ExclamEmph" ); CFEToken *pCur_Tok; SPLISTPOS listPos, targetPos, curPos, contentPos[PRIORITY_SIZE]; long cContent, cWords; long i; for(i = 0; i < PRIORITY_SIZE; i++ ) { contentPos[i] = (SPLISTPOS)NO_POSITION; } listPos = m_TokList.GetTailPosition(); pCur_Tok = m_TokList.GetNext( listPos ); //--------------------------------------------------- // First, check last token fors an exclamation //--------------------------------------------------- if( pCur_Tok->m_TuneBoundaryType == EXCLAM_BOUNDARY ) { //----------------------------------------------------- // Then, see if there's only one content word // in the sentence //----------------------------------------------------- cContent = cWords = 0; listPos = m_TokList.GetHeadPosition(); while( listPos ) { curPos = listPos; pCur_Tok = m_TokList.GetNext( listPos ); if( pCur_Tok->m_posClass == POS_CONTENT ) { cContent++; cWords++; if( cContent == 1) { targetPos = curPos; } //-------------------------------------------------------- // Fill the famous Azara Content Prominence Hierarchy (ACPH) //-------------------------------------------------------- if( (pCur_Tok->POScode == MS_Noun) && (contentPos[P_Noun] == (SPLISTPOS)NO_POSITION) ) { contentPos[P_Noun] = curPos; } else if( (pCur_Tok->POScode == MS_Verb) && (contentPos[P_Verb] == (SPLISTPOS)NO_POSITION) ) { contentPos[P_Verb] = curPos; } else if( (pCur_Tok->POScode == MS_Adj) && (contentPos[P_Adj] == (SPLISTPOS)NO_POSITION) ) { contentPos[P_Adj] = curPos; } else if( (pCur_Tok->POScode == MS_Adv) && (contentPos[P_Adv] == (SPLISTPOS)NO_POSITION) ) { contentPos[P_Adv] = curPos; } else if( (pCur_Tok->POScode == MS_Interjection) && (contentPos[p_Interj] == (SPLISTPOS)NO_POSITION) ) { contentPos[p_Interj] = curPos; } } else if( pCur_Tok->m_posClass == POS_FUNC ) { cWords++; if( cWords == 1) { targetPos = curPos; } } } //-------------------------------------------- // If there's only one word or content word // then EMPHASIZE it //-------------------------------------------- if( (cContent == 1) || (cWords == 1) ) { pCur_Tok = m_TokList.GetNext( targetPos ); pCur_Tok->user_Emph = 1; } else if( cContent > 1 ) { for(i = 0; i < PRIORITY_SIZE; i++ ) { if( contentPos[i] != (SPLISTPOS)NO_POSITION ) { targetPos = contentPos[i]; break; } } pCur_Tok = m_TokList.GetNext( targetPos ); pCur_Tok->user_Emph = 1; } } } //ExclamEmph /***************************************************************************** * CFrontend::DoWordAccent * *-------------------------* * Description: * Prepare word for emphasis * ********************************************************************** MC ***/ void CFrontend::DoWordAccent() { SPDBG_FUNC( "CFrontend::DoWordAccent" ); long cNumOfWords; long iCurWord; CFEToken *pCur_Tok, *pNext_Tok, *pPrev_Tok, *pTempTok; SPLISTPOS listPos; TUNE_TYPE cur_Bnd, prev_Bnd; //----------------------------- // Initilize locals //----------------------------- cNumOfWords = m_TokList.GetCount(); if( cNumOfWords > 0 ) { ExclamEmph(); prev_Bnd = PHRASE_BOUNDARY; // Assume start of sentence //------------------------------------- // Fill the token pipeline //------------------------------------- listPos = m_TokList.GetHeadPosition(); //-- Previous pPrev_Tok = NULL; //-- Current pCur_Tok = m_TokList.GetNext( listPos ); //-- Next if( listPos ) { pNext_Tok = m_TokList.GetNext( listPos ); } else { pNext_Tok = NULL; } //----------------------------------- // Step through entire word array // (skip last) //----------------------------------- for( iCurWord = 0; iCurWord < (cNumOfWords -1); iCurWord++ ) { cur_Bnd = pCur_Tok->m_TuneBoundaryType; if( pCur_Tok->user_Emph > 0 ) { //----------------------------------- // Current word is emphasized //----------------------------------- if( prev_Bnd == NULL_BOUNDARY ) { pTempTok = new CFEToken; if( pTempTok ) { pTempTok->user_Break = EMPH_HESITATION; pTempTok->m_TuneBoundaryType = NULL_BOUNDARY; pTempTok->phon_Len = 1; pTempTok->phon_Str[0] = _SIL_; pTempTok->srcPosition = pCur_Tok->srcPosition; pTempTok->srcLen = pCur_Tok->srcLen; pTempTok->tokStr[0] = 0; // There's no orth for Break pTempTok->tokLen = 0; pTempTok->m_TermSil = 0; pTempTok->m_SilenceSource = SIL_Emph; pTempTok->m_DurScale = 0; if( pPrev_Tok ) { //pTempTok->m_DurScale = pPrev_Tok->m_DurScale; pTempTok->m_ProsodyDurScale = pPrev_Tok->m_ProsodyDurScale; pTempTok->user_Volume = pPrev_Tok->user_Volume; } else { //pTempTok->m_DurScale = 1.0f; pTempTok->m_ProsodyDurScale = 1.0f; } m_TokList.InsertBefore( m_TokList.FindIndex( iCurWord ), pTempTok ); pCur_Tok = pTempTok; m_cNumOfWords++; cNumOfWords++; iCurWord++; } } } //------------------------------ // Shift the token pipeline //------------------------------ prev_Bnd = cur_Bnd; pPrev_Tok = pCur_Tok; pCur_Tok = pNext_Tok; if( listPos ) { pNext_Tok = m_TokList.GetNext( listPos ); } else { pNext_Tok = NULL; } } } } /* CFrontend::DoWordAccent */ /***************************************************************************** * CFrontend::DoPhrasing * *-----------------------* * Description: * Insert sub-phrase boundaries into word token array * ********************************************************************** MC ***/ void CFrontend::DoPhrasing() { SPDBG_FUNC( "CFrontend::DoPhrasing" ); long iCurWord; CFEToken *pCur_Tok, *pNext_Tok, *pNext2_Tok, *pNext3_Tok, *pTempTok, *pPrev_Tok; ENGPARTOFSPEECH cur_POS, next_POS, next2_POS, next3_POS, prev_POS; bool fNext_IsPunct, fNext2_IsPunct, fNext3_IsPunct; bool fIsYesNo, fMaybeWH, fHasDet, fInitial_Adv, fIsShortSent, fIsAlphaWH; TUNE_TYPE cur_Bnd, prev_Punct; long punctDistance; long cNumOfWords; SPLISTPOS listPos; BOUNDARY_SOURCE bndNum; ACCENT_SOURCE accNum; //----------------------------- // Initialize locals //----------------------------- cNumOfWords = m_TokList.GetCount(); if( cNumOfWords > 0 ) { cur_Bnd = NULL_BOUNDARY; prev_POS = MS_Unknown; prev_Punct = PHRASE_BOUNDARY; // Assume start of sentence punctDistance = 0; // To quiet the compiler... fIsYesNo = fMaybeWH = fHasDet = fIsAlphaWH = false; // To quiet the compiler... fMaybeWH = false; fInitial_Adv = false; if (cNumOfWords <= 9) { fIsShortSent = true; } else { fIsShortSent = false; } //------------------------------------- // Fill the token pipeline //------------------------------------- listPos = m_TokList.GetHeadPosition(); //-- Previous pPrev_Tok = NULL; //-- Current pCur_Tok = m_TokList.GetNext( listPos ); //-- Next if( listPos ) { pNext_Tok = m_TokList.GetNext( listPos ); } else { pNext_Tok = NULL; } //-- Next 2 if( listPos ) { pNext2_Tok = m_TokList.GetNext( listPos ); } else { pNext2_Tok = NULL; } //-- Next 3 if( listPos ) { pNext3_Tok = m_TokList.GetNext( listPos ); } else { pNext3_Tok = NULL; } //----------------------------------- // Step through entire word array // (skip last) //----------------------------------- for( iCurWord = 0; iCurWord < (cNumOfWords -1); iCurWord++ ) { bndNum = BND_NoSource; accNum = ACC_NoSource; if( (prev_Punct > NULL_BOUNDARY) && (prev_Punct < SUB_BOUNDARY_1) ) { punctDistance = 1; fIsYesNo = true; fMaybeWH = false; fHasDet = false; fIsAlphaWH = false; } else { punctDistance++; } //------------------------------------ // Process new word //------------------------------------ cur_POS = pCur_Tok->POScode; cur_Bnd = NULL_BOUNDARY; //------------------------------------ // Don't depend on POS to detect // "WH" question //------------------------------------ if( ((pCur_Tok->tokStr[0] == 'W') || (pCur_Tok->tokStr[0] == 'w')) && ((pCur_Tok->tokStr[1] == 'H') || (pCur_Tok->tokStr[1] == 'h')) ) { fIsAlphaWH = true; } else { fIsAlphaWH = false; } //------------------------------------ // Look ahead to NEXT word //------------------------------------ next_POS = pNext_Tok->POScode; if( pNext_Tok->m_TuneBoundaryType != NULL_BOUNDARY ) { fNext_IsPunct = true; } else { fNext_IsPunct = false; } //------------------------------------ // Look ahead 2 positions //------------------------------------ if( pNext2_Tok ) { next2_POS = pNext2_Tok->POScode; if( pNext2_Tok->m_TuneBoundaryType != NULL_BOUNDARY ) { fNext2_IsPunct = true; } else { fNext2_IsPunct = false; } } else { next2_POS = MS_Unknown; fNext2_IsPunct = false; } //------------------------------------ // Look ahead 3 positions //------------------------------------ if( pNext3_Tok ) { next3_POS = pNext3_Tok->POScode; if( pNext3_Tok->m_TuneBoundaryType != NULL_BOUNDARY ) { fNext3_IsPunct = true; } else { fNext3_IsPunct = false; } } else { next3_POS = MS_Unknown; fNext3_IsPunct = false; } //------------------------------------------------------------------------ // Is phrase a yes/no question? //------------------------------------------------------------------------ if( punctDistance == 1 ) { if( (cur_POS == MS_Interr) || (fIsAlphaWH) ) { //--------------------------------- // It's a "WH" question //--------------------------------- fIsYesNo = false; } else if( (cur_POS == MS_Prep) || (cur_POS == MS_Conj) || (cur_POS == MS_CConj) ) { fMaybeWH = true; } } else if( (punctDistance == 2) && (fMaybeWH) && ((cur_POS == MS_Interr) || (cur_POS == MS_RelPron) || (fIsAlphaWH)) ) { fIsYesNo = false; } //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- // SUB_BOUNDARY_1: Insert boundary after sentence-initial adverb // // Reluctantly __the cat sat on the mat. //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- if( fInitial_Adv ) { cur_Bnd = SUB_BOUNDARY_1; fInitial_Adv = false; bndNum = BND_PhraseRule1; accNum = ACC_PhraseRule1; } else { if( (punctDistance == 1) && (cur_POS == MS_Adv) && (next_POS == MS_Det) ) // include //LEX_SUBJPRON // he //LEX_DPRON // this //LEX_IPRON // everybody //NOT LEX_PPRON // myself { fInitial_Adv = true; } else { fInitial_Adv = false; } //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- // SUB_BOUNDARY_2:Insert boundary before coordinating conjunctions // The cat sat on the mat __and cleaned his fur. // //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- if( (cur_POS == MS_CConj) && (fHasDet == false) && (punctDistance > 3) && (next2_POS != MS_Conj) ) { cur_Bnd = SUB_BOUNDARY_2; bndNum = BND_PhraseRule2; accNum = ACC_PhraseRule2; } //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- // SUB_BOUNDARY_2:Insert boundary before adverb // The cat sat on the mat __reluctantly. //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- else if( (cur_POS == MS_Adv) && (punctDistance > 4) && (next_POS != MS_Adj) ) { cur_Bnd = SUB_BOUNDARY_2; bndNum = BND_PhraseRule3; accNum = ACC_PhraseRule3; } //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- // SUB_BOUNDARY_2:Insert boundary after object pronoun // The cat sat with me__ on the mat. // //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- else if( (prev_POS == MS_ObjPron) && (punctDistance > 2)) { cur_Bnd = SUB_BOUNDARY_2; bndNum = BND_PhraseRule4; accNum = ACC_PhraseRule4; } //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- // SUB_BOUNDARY_2:Insert boundary before subject pronoun or contraction // The cat sat on the mat _I see. // The cat sat on the mat _I'm sure. // //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- else if( ((cur_POS == MS_SubjPron) || (cur_POS == MS_Contr) ) && (punctDistance > 3) && (prev_POS != MS_RelPron) && (prev_POS != MS_Conj)) { cur_Bnd = SUB_BOUNDARY_2; bndNum = BND_PhraseRule5; accNum = ACC_PhraseRule5; } //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- // SUB_BOUNDARY_2:Insert boundary before interr // The cat sat on the mat _how odd. //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- else if( (cur_POS == MS_Interr) && (punctDistance > 4) ) { cur_Bnd = SUB_BOUNDARY_2; bndNum = BND_PhraseRule6; accNum = ACC_PhraseRule6; } //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- // SUB_BOUNDARY_3:Insert boundary after subject noun phrase followed by aux verb //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- // SUB_BOUNDARY_3:Insert boundary before vaux after noun phrase // The gray cat __should sit on the mat. //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- else if( (punctDistance > 2) && ( ((prev_POS == MS_Noun) || (prev_POS == MS_Verb)) && (prev_POS != MS_VAux) ) && (cur_POS == MS_VAux) ) { cur_Bnd = SUB_BOUNDARY_3; bndNum = BND_PhraseRule7; accNum = ACC_PhraseRule7; } //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- // SUB_BOUNDARY_3:Insert boundary after MS_Interr // The gray cat __should sit on the mat. // SEE ABOVE??? //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- /*else if( (prev_POS == MS_Noun) && ((next_POS != MS_RelPron) && (next_POS != MS_VAux) && (next_POS != MS_RVAux) && (next2_POS != MS_VAux) && (next2_POS != MS_RVAux)) && (punctDistance > 4) && ((cur_POS == MS_VAux) || (cur_POS == MS_RVAux))) { cur_Bnd = SUB_BOUNDARY_3; bndNum = BND_PhraseRule8; accNum = ACC_PhraseRule8; }*/ //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- // SUB_BOUNDARY_3:Insert boundary after MS_Interr // The cat sat on the mat _how odd. //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- else if( (prev_POS == MS_Noun) && (next_POS != MS_RelPron) && (next_POS != MS_Conj) && (next_POS != MS_CConj) && (punctDistance > 3) && (cur_POS == MS_Verb)) { cur_Bnd = SUB_BOUNDARY_3; bndNum = BND_PhraseRule9; accNum = ACC_PhraseRule9; } //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- // SUB_BOUNDARY_3:Insert boundary after MS_Interr // The cat sat on the mat _how odd. //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- /*else if( (prev_POS == MS_Noun) && (cur_POS != MS_RelPron) && (cur_POS != MS_RVAux) && (cur_POS != MS_CConj) && (cur_POS != MS_Conj) && (punctDistance > 2) && ((punctDistance > 2) || (fIsShortSent)) && (cur_POS == MS_Verb)) { cur_Bnd = SUB_BOUNDARY_3; bndNum = BND_PhraseRule10; accNum = ACC_PhraseRule10; } //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- // SUB_BOUNDARY_4:Insert boundary before conjunction //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- else if( ((cur_POS == MS_Conj) && (punctDistance > 3) && (fNext_IsPunct == false) && (prev_POS != MS_Conj) && (prev_POS != MS_CConj) && (fNext2_IsPunct == false)) || ( (prev_POS == MS_VPart) && (cur_POS != MS_Prep) && (cur_POS != MS_Det) && (punctDistance > 2) && ((cur_POS == MS_Noun) || (cur_POS == MS_Noun) || (cur_POS == MS_Adj))) || ( (cur_POS == MS_Interr) && (punctDistance > 2) && (cur_POS == MS_SubjPron)) ) { cur_Bnd = SUB_BOUNDARY_4; bndNum = BND_PhraseRule11; accNum = ACC_PhraseRule11; } //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- // SUB_BOUNDARY_5:Insert boundary before relative pronoun //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- else if( ( (cur_POS == MS_RelPron) && (punctDistance >= 3) && (prev_POS != MS_Prep) && (next3_POS != MS_VAux) && (next3_POS != MS_RVAux) && ( (prev_POS == MS_Noun) || (prev_POS == MS_Verb) ) ) || ( (cur_POS == MS_Quant) && (punctDistance > 5) && (prev_POS != MS_Adj) && (prev_POS != MS_Det) && (prev_POS != MS_VAux) && (prev_POS != MS_RVAux) && (prev_POS != MS_Det) && (next2_POS != MS_CConj) && (fNext_IsPunct == false))) { cur_Bnd = SUB_BOUNDARY_5; bndNum = BND_PhraseRule12; accNum = ACC_PhraseRule12; }*/ //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- // SUB_BOUNDARY_6:Silverman87-style, content/function tone group boundaries. // Does trivial sentence-final function word look-ahead check. //+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+- else if( ( (prev_POS == MS_Noun) || (prev_POS == MS_Verb) || (prev_POS == MS_Adj) || (prev_POS == MS_Adv)) && ((cur_POS != MS_Noun) && (cur_POS != MS_Verb) && (cur_POS != MS_Adj) && (cur_POS != MS_Adv)) && (fNext_IsPunct == false)) { cur_Bnd = SUB_BOUNDARY_6; bndNum = BND_PhraseRule13; accNum = ACC_PhraseRule13; } } //------------------------------------------------------------------------ // If phrasing was found, save it //------------------------------------------------------------------------ if( (cur_Bnd != NULL_BOUNDARY) && (iCurWord > 0) && //!(fNext_IsPunct) && !(prev_Punct) && (pCur_Tok->m_TuneBoundaryType == NULL_BOUNDARY) ) { //pCur_Tok->m_TuneBoundaryType = cur_Bnd; pTempTok = new CFEToken; if( pTempTok ) { pTempTok->m_TuneBoundaryType = cur_Bnd; pTempTok->phon_Len = 1; pTempTok->phon_Str[0] = _SIL_; pTempTok->srcPosition = pCur_Tok->srcPosition; pTempTok->srcLen = pCur_Tok->srcLen; pTempTok->tokStr[0] = '+'; // punctuation pTempTok->tokStr[1] = 0; // delimiter pTempTok->tokLen = 1; pTempTok->m_TermSil = 0; pTempTok->m_DurScale = 0; if( pPrev_Tok ) { pPrev_Tok->m_AccentSource = accNum; pPrev_Tok->m_BoundarySource = bndNum; pPrev_Tok->m_Accent = K_LHSTAR; } pTempTok->m_SilenceSource = SIL_SubBound; if( pPrev_Tok ) { //pTempTok->m_DurScale = pPrev_Tok->m_DurScale; pTempTok->m_ProsodyDurScale = pPrev_Tok->m_ProsodyDurScale; pTempTok->user_Volume = pPrev_Tok->user_Volume; } else { //pTempTok->m_DurScale = 1.0f; pTempTok->m_ProsodyDurScale = 1.0f; } m_TokList.InsertBefore( m_TokList.FindIndex( iCurWord ), pTempTok ); pCur_Tok = pTempTok; m_cNumOfWords++; cNumOfWords++; iCurWord++; } } //------------------------------- // Process sentence punctuation //------------------------------- AdjustQuestTune( pCur_Tok, fIsYesNo ); //------------------------------- // Prepare for next word //------------------------------- prev_Punct = pCur_Tok->m_TuneBoundaryType; prev_POS = cur_POS; pPrev_Tok = pCur_Tok; //------------------------------ // Shift the token pipeline //------------------------------ pCur_Tok = pNext_Tok; pNext_Tok = pNext2_Tok; pNext2_Tok = pNext3_Tok; if( listPos ) { pNext3_Tok = m_TokList.GetNext( listPos ); } else { pNext3_Tok = NULL; } //------------------------------------------------------------------------ // Keep track of when determiners encountered to help in deciding // when to allow a strong 'and' boundary (SUB_BOUNDARY_2) //------------------------------------------------------------------------ if( punctDistance > 2) { fHasDet = false; } if( cur_POS == MS_Det ) { fHasDet = true; } } //------------------------------------- // Process final sentence punctuation //------------------------------------- pCur_Tok = (CFEToken*)m_TokList.GetTail(); AdjustQuestTune( pCur_Tok, fIsYesNo ); } } /* CFrontend::DoPhrasing */ /***************************************************************************** * CFrontend::RecalcProsody * *--------------------------* * Description: * In response to a real-time rate change, recalculate duration and pitch * ********************************************************************** MC ***/ #ifdef USE_VOICEDATAOBJ void CFrontend::RecalcProsody() { SPDBG_FUNC( "CFrontend::RecalcProsody" ); UNITINFO* pu; CAlloCell* pCurCell; ULONG k; //-------------------------------------------- // Compute new allo durations //-------------------------------------------- /*pCurCell = m_pAllos->GetHeadCell(); while( pCurCell ) { //pCurCell->m_DurScale = 1.0; pCurCell = m_pAllos->GetNextCell(); }*/ m_DurObj.AlloDuration( m_pAllos, m_RateRatio_API ); //-------------------------------------------- // Modulate allo pitch //-------------------------------------------- m_PitchObj.AlloPitch( m_pAllos, m_BasePitch, m_PitchRange ); pu = m_pUnits; pCurCell = m_pAllos->GetHeadCell(); while( pCurCell ) { pu->duration = pCurCell->m_ftDuration; for( k = 0; k < pu->nKnots; k++ ) { pu->pTime[k] = pCurCell->m_ftTime[k] * m_SampleRate; pu->pF0[k] = pCurCell->m_ftPitch[k]; pu->pAmp[k] = pu->ampRatio; } pu++; pCurCell = m_pAllos->GetNextCell(); } } /* CFrontend::RecalcProsody */ #endif /***************************************************************************** * CFrontend::NextData * *---------------------* * Description: * This gets called from the backend when UNIT stream is dry. * Parse TOKENS to ALLOS to UNITS * ********************************************************************** MC ***/ HRESULT CFrontend::NextData( void **pData, SPEECH_STATE *pSpeechState ) { SPDBG_FUNC( "CFrontend::NextData" ); bool haveNewRate = false; HRESULT hr = S_OK; //----------------------------------- // First, check and see if SAPI has an action //----------------------------------- // Check for rate change long baseRateRatio; if( m_pOutputSite->GetActions() & SPVES_RATE ) { hr = m_pOutputSite->GetRate( &baseRateRatio ); if ( SUCCEEDED( hr ) ) { if( baseRateRatio > SPMAX_VOLUME ) { //--- Clip rate to engine maximum baseRateRatio = MAX_USER_RATE; } else if ( baseRateRatio < MIN_USER_RATE ) { //--- Clip rate to engine minimum baseRateRatio = MIN_USER_RATE; } m_RateRatio_API = CntrlToRatio( baseRateRatio ); haveNewRate = true; } } //--------------------------------------------- // Async stop? //--------------------------------------------- if( SUCCEEDED( hr ) && ( m_pOutputSite->GetActions() & SPVES_ABORT ) ) { m_SpeechState = SPEECH_DONE; } //--------------------------------------------- // Async skip? //--------------------------------------------- if( SUCCEEDED( hr ) && ( m_pOutputSite->GetActions() & SPVES_SKIP ) ) { SPVSKIPTYPE SkipType; long SkipCount = 0; hr = m_pOutputSite->GetSkipInfo( &SkipType, &SkipCount ); if ( SUCCEEDED( hr ) && SkipType == SPVST_SENTENCE ) { IEnumSENTITEM *pGarbage; //--- Skip Forwards if ( SkipCount > 0 ) { long OriginalSkipCount = SkipCount; while ( SkipCount > 1 && ( hr = m_pEnumSent->Next( &pGarbage ) ) == S_OK ) { SkipCount--; pGarbage->Release(); } if ( hr == S_OK ) { hr = ParseSentence( eNEXT ); if ( SUCCEEDED( hr ) ) { SkipCount--; } } else if ( hr == S_FALSE ) { m_SpeechState = SPEECH_DONE; } SkipCount = OriginalSkipCount - SkipCount; } //--- Skip Backwards else if ( SkipCount < 0 ) { long OriginalSkipCount = SkipCount; while ( SkipCount < -1 && ( hr = m_pEnumSent->Previous( &pGarbage ) ) == S_OK ) { SkipCount++; pGarbage->Release(); } if ( hr == S_OK ) { hr = ParseSentence( ePREVIOUS ); // This case is different from the forward skip, needs to test that // Parse sentence found something to parse! if ( SUCCEEDED( hr ) && m_SpeechState != SPEECH_DONE) { SkipCount++; } } else if ( hr == S_FALSE ) { m_SpeechState = SPEECH_DONE; } SkipCount = OriginalSkipCount - SkipCount; } //--- Skip to beginning of this sentence else { m_CurUnitIndex = 0; } hr = m_pOutputSite->CompleteSkip( SkipCount ); } } //--------------------------------------------- // Make sure we're still speaking //--------------------------------------------- if( SUCCEEDED( hr ) && m_SpeechState != SPEECH_DONE ) { /***** if( m_CurUnitIndex >= m_unitCount) { //----------------------------------- // Get next sentence from Normalizer //----------------------------------- hr = ParseSentence( eNEXT ); //m_SpeechState = SPEECH_DONE; } else if( haveNewRate ) { //----------------------------------- // Recalculate prosody to new rate //----------------------------------- RecalcProsody(); } if( SUCCEEDED(hr) ) { if( m_SpeechState != SPEECH_DONE ) { //----------------------------------- // Get next phon //----------------------------------- m_pUnits[m_CurUnitIndex].hasSpeech = m_HasSpeech; *pData =( void*)&m_pUnits[m_CurUnitIndex]; m_CurUnitIndex++; } } *****/ hr = ParseSentence( eNEXT ); if ( SUCCEEDED( hr ) && m_SpeechState == SPEECH_CONTINUE ) { SentenceData *pSentData = new SentenceData; pSentData->pPhones = new Phone[ m_pAllos->GetCount() ]; ZeroMemory( pSentData->pPhones, m_pAllos->GetCount() * sizeof( Phone ) ); pSentData->ulNumPhones = m_pAllos->GetCount(); m_PitchObj.GetContour( &pSentData->pf0, &pSentData->ulNumf0 ); float RunTime = 0.0; float InitialSil = 0.0; bool fInitialSil = true; char ph[512]; typedef const char *(*MapPhoneSetFunc) (ALLO_CODE); MapPhoneSetFunc MapPhoneSet; if (m_fNewPhoneSet) { MapPhoneSet = NewMapPhoneSet; } else { MapPhoneSet = OldMapPhoneSet; } for ( int i = 0; i < m_pAllos->GetCount(); i++ ) { CAlloCell *pCurCell = m_pAllos->GetCell( i ); strcpy ( ph, MapPhoneSet( pCurCell->m_allo ) ); //--- adding stress info for vowels // if ( ( pCurCell->m_ctrlFlags & PRIMARY_STRESS ) && IsVowel ( ph ) ) // { // strcat( ph, "s"); // } strcpy( pSentData->pPhones[i].phone, ph ); //--- Skip initial SIL if ( fInitialSil && stricmp( pSentData->pPhones[i].phone, "sil" ) == 0 ) { InitialSil += pCurCell->m_ftDuration; pSentData->pPhones[i].f0 = 0; pSentData->pPhones[i].end = InitialSil; continue; } //--- Skip final SIL else if ( i == m_pAllos->GetCount() - 1 && stricmp( pSentData->pPhones[i].phone, "sil" ) == 0 ) { pSentData->pPhones[i].end = RunTime + InitialSil; break; } else { fInitialSil = false; pSentData->pPhones[i].f0 = GetPhoneF0( pSentData->pf0, RunTime, pCurCell->m_ftDuration ); RunTime += pCurCell->m_ftDuration; pSentData->pPhones[i].end = RunTime + InitialSil; } } *pData = (void*) pSentData; } } //------------------------------------------- // Let client know if text input is dry //------------------------------------------- *pSpeechState = m_SpeechState; return hr; } /* CFrontend::NextData */ /***************************************************************************** * IsVowel * *----------* * *********************************************************************** WD ***/ bool IsVowel ( char* ph ) { if ( ph ) { if ( ph[0] == 'a' || ph[0] == 'e' || ph[0] == 'i' || ph[0] == 'o' || ph[0] == 'u' ) { return true; } } return false; }